hive4.0.1安装部署 #
- 目前hive4.0.1+hadoop3.4.1+jdk11.0.24,正常使用
- 多注意日志中关于用户权限的问题
- 官方镜像中 hive4.0.1 匹配 hadoop3.3.6
- https://hive.apache.org/docs/latest/manual-installation_283118363/
下载安装 #
- hadoop的鉴权+文件权限要注意
sed -i "20 i <property><name>hadoop.proxyuser.hadoop.hosts</name><value>*</value></property>" ${HADOOP_HOME}/etc/hadoop/core-site.xml
sed -i "20 i <property><name>hadoop.proxyuser.hadoop.groups</name><value>*</value></property>" ${HADOOP_HOME}/etc/hadoop/core-site.xml
wget https://mirrors.aliyun.com/apache/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz
tar -zxvf apache-hive-4.0.1-bin.tar.gz
mv apache-hive-4.0.1-bin hive
mv hive /opt/
sudo sed -i '$aexport HIVE_HOME=/opt/hive' /etc/profile
sudo sed -i '$aPATH=\$PATH:\$HIVE_HOME/bin/' /etc/profile
sudo sed -i '$aalias beelinehive="beeline -u jdbc:hive2://127.0.0.1:10000/"' /etc/profile
source /etc/profile
hadoop fs -mkdir /tmp
hadoop fs -mkdir -p /user/hive/
hadoop fs -chmod +rwx /tmp
hadoop fs -chmod +w /user/hive/
使用MySQL #
- host:ali.mm username:zian passwd:zian
wget -P $HIVE_HOME/lib https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar
cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-site.xml
sed -i "s|jdbc:derby:;databaseName=metastore_db;create=true|jdbc:mysql://ali.mm:3306/zian|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|org.apache.derby.jdbc.EmbeddedDriver|com.mysql.jdbc.Driver|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|APP|zian|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|mine<|zian<|g" $HIVE_HOME/conf/hive-site.xml
sed -i "s|system:||g" $HIVE_HOME/conf/hive-site.xml
schematool -dbType mysql -initSchema
nohup hiveserver2 &
使用Derby #
- 要注意文件目录的位置,schematool在当前目录下生成文件
schematool -dbType derby -initSchema
nohup hiveserver2 &
1亿数据生成代码
- 导入csv文件
CREATE TABLE IF NOT EXISTS md5 (id STRING,md5 STRING)
row format delimited fields terminated by ',' stored as textfile;
load data inpath '/md5.txt' into table md5;
- 生成代码
import org.apache.commons.codec.digest.DigestUtils;
import java.io.BufferedWriter;
import java.io.FileWriter;
public class MD5 {
public static void main(String[] args) throws Exception {
final int number = 10000*10000;
final int round = 1000;
StringBuffer string = new StringBuffer();
//FileWriter需要添加true参数才会有添加效果
BufferedWriter writer = new BufferedWriter(new FileWriter("z:/md5.txt", true));
for (int y = 0; y <= Math.abs(number / round); y++) {
for (int x = 0; x < round + 1 && x + y * round < number; x++) {
String data = "00000000";
data += String.valueOf(x + y * round);
data = data.substring(data.length() - 8);
data = data + "," + DigestUtils.md5Hex(data) + "\n";
string.append(data);
}
writer.append(string);
string = new StringBuffer();
}
writer.flush();
}
}
HQL #
- 默认日志位置 /tmp/启动的用户名/hive.log
- 需要注意在日志中出现的文件权限问题
CREATE TABLE IF NOT EXISTS testtable (id INT,name STRING);
insert into testtable VALUES (1,"li"),(2,"xiang"),(3,"he");
select * from testtable;
- 保存结果到本地磁盘local
insert overwrite local directory "/tmp/out/1"
row format delimited fields terminated by "|"
select * from testtable;
- 保存结果到hdfs(无local)
insert overwrite directory "/tmp/out/2"
row format delimited fields terminated by "|"
select * from testtable;
tez 引擎(官方推荐的引擎,MR可以用但过时) #
- https://blog.csdn.net/cc__cc___/article/details/134690366
- https://mirrors.aliyun.com/apache/tez/0.10.4/apache-tez-0.10.4-bin.tar.gz
wget https://mirrors.aliyun.com/apache/tez/0.10.4/apache-tez-0.10.4-bin.tar.gz
tar -zxvf apache-tez-0.10.4-bin.tar.gz -C /opt/ --transform="s|apache-tez-0.10.4-bin|tez|g"
sudo sed -i '$aexport TEZ_HOME=/opt/tez' /etc/profile
source /etc/profile
cat > $TEZ_HOME/conf/tez-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<!-- 指定在hdfs上的tez包文件 -->
<property>
<name>tez.lib.uris</name>
<value>hdfs://master:9000/user/tez/tez.tar.gz</value>
</property>
</configuration>
EOF
hive.execution.enginetez mapreduce.framework.name修改为yarn-tez
mr修改为tez
- /opt/hive/conf/hive-site.xml sed -i “s|mr|tez|g” $HIVE_HOME/conf/hive-site.xml
$TEZ_HOME/share/tez.tar.gz hadoop fs -mkdir /user/tez/ -p hadoop fs -put $TEZ_HOME/share/tez.tar.gz /user/tez
sudo sed -i ‘$aexport HADOOP_CLASSPATH=hadoop classpath
’ /etc/profile
sudo sed -i ‘$aexport TEZ_CONF_DIR=$HADOOP_CONF_DIR’ /etc/profile
sudo sed -i ‘$aexport TEZ_JARS=$TEZ_HOME/.jar:$TEZ_HOME/lib/.jar’ /etc/profile
sudo sed -i ‘$aexport HADOOP_CLASSPATH=$TEZ_CONF_DIR:$TEZ_JARS:$HADOOP_CLASSPATH’ /etc/profile
https://blog.csdn.net/cc__cc___/article/details/134690366
tez+hadoop https://blog.csdn.net/qq_38628046/article/details/124775470
临时使用可在beeline执行 set hive.execution.engine=tez; set hive.execution.engine=mr;