Hive4.0.1

hive4.0.1安装部署 #

下载安装 #

  • hadoop的鉴权+文件权限要注意
sed -i "20 i <property><name>hadoop.proxyuser.hadoop.hosts</name><value>*</value></property>" ${HADOOP_HOME}/etc/hadoop/core-site.xml
sed -i "20 i <property><name>hadoop.proxyuser.hadoop.groups</name><value>*</value></property>" ${HADOOP_HOME}/etc/hadoop/core-site.xml
wget https://mirrors.aliyun.com/apache/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz
tar -zxvf apache-hive-4.0.1-bin.tar.gz
mv apache-hive-4.0.1-bin hive
mv hive /opt/
sudo sed -i '$aexport HIVE_HOME=/opt/hive' /etc/profile
sudo sed -i '$aPATH=\$PATH:\$HIVE_HOME/bin/' /etc/profile
sudo sed -i '$aalias beelinehive="beeline -u jdbc:hive2://127.0.0.1:10000/"' /etc/profile
source /etc/profile
hadoop fs -mkdir /tmp
hadoop fs -mkdir -p /user/hive/
hadoop fs -chmod +rwx /tmp
hadoop fs -chmod +w /user/hive/

使用MySQL #

  • host:ali.mm username:zian passwd:zian
wget -P $HIVE_HOME/lib https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar 
cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-site.xml
sed -i "s|jdbc:derby:;databaseName=metastore_db;create=true|jdbc:mysql://ali.mm:3306/zian|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|org.apache.derby.jdbc.EmbeddedDriver|com.mysql.jdbc.Driver|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|APP|zian|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|mine<|zian<|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|system:||g" $HIVE_HOME/conf/hive-site.xml
schematool -dbType mysql -initSchema
nohup hiveserver2 &

使用Derby #

  • 要注意文件目录的位置,schematool在当前目录下生成文件
schematool -dbType derby -initSchema
nohup hiveserver2 &
1亿数据生成代码
  • 导入csv文件
CREATE TABLE IF NOT EXISTS md5 (id STRING,md5 STRING) 
row format delimited fields terminated by ',' stored as textfile;
load data inpath '/md5.txt' into table md5;
  • 生成代码
import org.apache.commons.codec.digest.DigestUtils;

import java.io.BufferedWriter;
import java.io.FileWriter;

public class MD5 {
    public static void main(String[] args) throws Exception {
        final int number = 10000*10000;
        final int round = 1000;

        StringBuffer string = new StringBuffer();
        //FileWriter需要添加true参数才会有添加效果
        BufferedWriter writer = new BufferedWriter(new FileWriter("z:/md5.txt", true));

        for (int y = 0; y <= Math.abs(number / round); y++) {
            for (int x = 0; x < round + 1 && x + y * round < number; x++) {
                String data = "00000000";
                data += String.valueOf(x + y * round);
                data = data.substring(data.length() - 8);
                data = data + "," + DigestUtils.md5Hex(data) + "\n";
                string.append(data);
            }
            writer.append(string);
            string = new StringBuffer();
        }
        writer.flush();
    }
}

HQL #

  • 默认日志位置 /tmp/启动的用户名/hive.log
  • 需要注意在日志中出现的文件权限问题

CREATE TABLE IF NOT EXISTS testtable (id INT,name STRING);  
insert into testtable VALUES (1,"li"),(2,"xiang"),(3,"he");
select * from testtable;
  • 保存结果到本地磁盘local
insert overwrite local directory "/tmp/out/1"
row format delimited fields terminated by "|"
select * from testtable;
  • 保存结果到hdfs(无local)
insert overwrite directory "/tmp/out/2"
row format delimited fields terminated by "|"
select * from testtable;

tez 引擎(官方推荐的引擎,MR可以用但过时) #

wget https://mirrors.aliyun.com/apache/tez/0.10.4/apache-tez-0.10.4-bin.tar.gz
tar -zxvf apache-tez-0.10.4-bin.tar.gz -C /opt/ --transform="s|apache-tez-0.10.4-bin|tez|g"
sudo sed -i '$aexport TEZ_HOME=/opt/tez' /etc/profile
source /etc/profile
cat > $TEZ_HOME/conf/tez-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
  <!-- 指定在hdfs上的tez包文件 -->
  <property>
    <name>tez.lib.uris</name>
    <value>hdfs://master:9000/user/tez/tez.tar.gz</value>
  </property>
</configuration>
EOF

hive.execution.enginetez mapreduce.framework.name修改为yarn-tez

mr修改为tez

  • /opt/hive/conf/hive-site.xml sed -i “s|mr|tez|g” $HIVE_HOME/conf/hive-site.xml

$TEZ_HOME/share/tez.tar.gz hadoop fs -mkdir /user/tez/ -p hadoop fs -put $TEZ_HOME/share/tez.tar.gz /user/tez

sudo sed -i ‘$aexport HADOOP_CLASSPATH=hadoop classpath’ /etc/profile sudo sed -i ‘$aexport TEZ_CONF_DIR=$HADOOP_CONF_DIR’ /etc/profile sudo sed -i ‘$aexport TEZ_JARS=$TEZ_HOME/.jar:$TEZ_HOME/lib/.jar’ /etc/profile sudo sed -i ‘$aexport HADOOP_CLASSPATH=$TEZ_CONF_DIR:$TEZ_JARS:$HADOOP_CLASSPATH’ /etc/profile

https://blog.csdn.net/cc__cc___/article/details/134690366

tez+hadoop https://blog.csdn.net/qq_38628046/article/details/124775470

临时使用可在beeline执行 set hive.execution.engine=tez; set hive.execution.engine=mr;