Hadoop 3.4.1 集群版

Hadoop 3.4.1 集群版之HDFS #

https://www.runoob.com/w3cnote/hadoop-tutorial.html

root操作(建议docker打包) #

sed -i "s|bookworm-updates|bookworm-updates bullseye|g" /etc/apt/sources.list.d/debian.sources
apt update && apt install -y openjdk-11-jdk
sudo apt install openjdk-11-jdk-headless -y
sed -i '$aPATH=\$PATH:/usr/games:/opt/hadoop/bin:/opt/hadoop/sbin' /etc/profile
sed -i '$aexport JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/' /etc/profile
sed -i '$aexport HADOOP_HOME=/opt/hadoop/' /etc/profile
source /etc/profile
sudo useradd -ms/bin/bash -k/etc/skel/ hadoop
sudo usermod -aG sudo hadoop
echo hadoop:. | chpasswd
cd /opt
wget https://dlcdn.apache.org/hadoop/common/hadoop/hadoop.tar.gz
tar -zxvf hadoop.tar.gz
sed -i "1 i JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" /opt/hadoop/etc/hadoop/hadoop-env.sh
mv /opt/hadoop.tar.gz /opt/hadoop
find /opt/hadoop/ -name *cmd -delete
mkdir /opt/hadoop/data
chown hadoop /opt -R

免登录 #

  • 每台机器都要执行,此密钥仅作为示例ssh-keygen -t rsa -b 1024
mkdir ~/.ssh
echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQDR5HpVwM8VvMcfS+gJBkW/IS6n7Pgcd1Rkq6AbxP7aShCFJewuPhQM6FslaARkWoAgH4wC3qtMxLkZ5fNgAPFewYEYuJXARGOIuBoAtwJYc7KYoDeYwkhFcnsWWAWezYOtqG3IapRfIgOcx3+j7HditZAn5nCsZit4WZAD+VdW3w== " >> ~/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 644 ~/.ssh/authorized_keys
cat > ~/.ssh/id_rsa <<EOF
-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAlwAAAAdzc2gtcn
NhAAAAAwEAAQAAAIEA0eR6VcDPFbzHH0voCQZFvyEup+z4HHdUZKugG8T+2koQhSXsLj4U
DOhbJWgEZFqAIB+MAt6rTMS5GeXzYADxXsGBGLiVwERjiLgaALcCWHOymKA3mMJIRXJ7Fl
gFns2DrahtyGqUXyIDnMd/o+x3YrWQJ+ZwrGYreFmQA/lXVt8AAAIIeMsZxnjLGcYAAAAH
c3NoLXJzYQAAAIEA0eR6VcDPFbzHH0voCQZFvyEup+z4HHdUZKugG8T+2koQhSXsLj4UDO
hbJWgEZFqAIB+MAt6rTMS5GeXzYADxXsGBGLiVwERjiLgaALcCWHOymKA3mMJIRXJ7FlgF
ns2DrahtyGqUXyIDnMd/o+x3YrWQJ+ZwrGYreFmQA/lXVt8AAAADAQABAAAAgEsyyIGgqp
DnKZAU6fURwjtP8B9JzzqjpiDLPHm2gnhCwNhzVcLbiuN0+/LS6X/qzKy1oLwGeujuMkMN
F7z1mYPqqcZmpVj97BF7ITQuzVJdEs6ujva/arp3V/FQYECLGHJ+pz+IBkJhVaXpKeLJOD
aeESEOy3jCp66fH6mFiFvBAAAAQGNHUW02lSyOGdoIPvxVk+boTXuZNYENCKOtBHTxX1Fd
vQBFMuWWvk0dEHZ1sSbGds+CHrrMGEJN6aIxKXSuKdoAAABBAPyYTB4PV6WpKiJPAy/F6m
MViOnfjfiX9/KqFYENd5EElm8FjjJ0lmC6YwJsSOPaB18BSmi2LpG9+QB7ujMHcgUAAABB
ANS40Jx9hA9HJ3gFQ06GS5Xr3sPjoOqkmCPFC+nhNBvdOxUwxfVsKly3qI5jjs9zZ3sben
5ge2hwzEq0xZMtxpMAAAAMcm9vdEB0ZXN0Lm1tAQIDBAUGBw==
-----END OPENSSH PRIVATE KEY-----
EOF
chmod 600 ~/.ssh/id_rsa

修改配置 #

  • /opt/hadoop/etc/hadoop/core-site.xml
  • /opt/hadoop/etc/hadoop/hdfs-site.xml
  • /opt/hadoop/etc/hadoop/yarn-site.xml
  • /opt/hadoop/etc/hadoop/mapred-site.xml
  • /opt/hadoop/etc/hadoop/workers
sed -i "20 i <property><name>fs.defaultFS</name><value>hdfs://200.mm:9000</value></property>" /opt/hadoop/etc/hadoop/core-site.xml
sed -i "20 i <property><name>hadoop.http.staticuser.user</name><value>hadoop</value></property>" /opt/hadoop/etc/hadoop/core-site.xml
sed -i "20 i <property><name>hadoop.tmp.dir</name><value>/opt/data/</value></property>" /opt/hadoop/etc/hadoop/core-site.xml
sed -i "20 i <property><name>dfs.replication</name><value>2</value></property>" /opt/hadoop/etc/hadoop/hdfs-site.xml
sed -i "20 i <property><name>dfs.datanode.data.dir</name><value>/opt/data/</value></property>" /opt/hadoop/etc/hadoop/hdfs-site.xml
sed -i "16 i <property><name>yarn.resourcemanager.hostname</name><value>200.mm</value></property>" /opt/hadoop/etc/hadoop/yarn-site.xml
sed -i "16 i <property><name>yarn.nodemanager.aux-services</name><value>mapreduce_shuffle</value></property>" /opt/hadoop/etc/hadoop/yarn-site.xml
sed -i "16 i <property><name>yarn.application.classpath</name><value>`hadoop classpath`</value></property>" /opt/hadoop/etc/hadoop/yarn-site.xml
sed -i "20 i <property><name>mapreduce.framework.name</name><value>yarn</value></property>" /opt/hadoop/etc/hadoop/mapred-site.xml
echo -e '200.mm\n201.mm\n202.mm' > /opt/hadoop/etc/hadoop/workers

主机启动 #

  • 先格式化
hdfs namenode -format
start-dfs.sh

添加节点 #

  • 新节点重复以上操作
  • master登录一次新节点,确保免登录生效
  • masterd的workers记录要更新
  • 在新的机器执行
hdfs --daemon start datanode
echo -e '200.mm\n201.mm\n202.mm\n203.mm' > /opt/hadoop/etc/hadoop/workers
docker基础配置
version: "3.8"
#https://docs.docker.com/compose/compose-file/compose-file-v3/
networks:
  macvlan:
    driver: macvlan
    driver_opts:
      #parent: enp8s0
      parent: enp6s0
    ipam:
      config:
        - subnet: 10.0.0.200/24
          ip_range: 10.0.0.200/28
          gateway: 10.0.0.1
services:
  master:
    container_name: master
    hostname: 200.mm
    image: debian12.x
    tty: true
    mac_address: 02-00-00-00-00-00
    networks:
      macvlan:
        ipv4_address: 10.0.0.200

  salver1:
    container_name: salver1
    hostname: 201.mm
    image: debian12.x
    tty: true
    mac_address: 02-01-00-00-00-00
    networks:
      macvlan:
        ipv4_address: 10.0.0.201

  salver2:
    container_name: salver2
    hostname: 202.mm
    image: debian12.x
    tty: true
    mac_address: 02-02-00-00-00-00
    networks:
      macvlan:
        ipv4_address: 10.0.0.202

  salver3:
    container_name: salver3
    hostname: 203.mm
    image: debian12.x
    tty: true
    mac_address: 02-03-00-00-00-00
    networks:
      macvlan:
        ipv4_address: 10.0.0.203