TF实战丨使用Vagrant安装Tungsten Fabric



  • b213f6e5-7a50-4161-b366-a2915e1c13e5-image.png

    本文为苏宁网络架构师陈刚的原创文章。

    01准备测试机

    在16G的笔记本没跑起来,就干脆拼凑了一台游戏工作室级别的机器:双路E5-2860v3 CPU,24核48线程,128G DDR4 ECC内存,NVME盘 512G。在上面开5个VM,假装是物理服务器。

    · 192.16.35.110 deployer

    · 192.16.35.111 tf控制器

    · 192.16.35.112 openstack服务器,同时也是计算节点

    · 192.16.35.113 k8s master

    · 192.16.35.114 k8s的Node k01,同时也是ops的计算节点

    直接使用vagrant拉镜像会很慢,就先下载下来:

    https://cloud.centos.org/centos/7/vagrant/x86_64/images/

    下载对应的VirtualBox.box文件。

    然后使用命令, 命名为vagrant的box:

    vagrant box add centos/7 CentOS-7-x86_64-Vagrant-2004_01.VirtualBox.box

    cat << EEOOFF > vagrantfile
    ### start 
    # -*- mode: ruby -*-
    # vi: set ft=ruby :
    Vagrant.require_version ">=2.0.3"
    
    # All Vagrant configuration is done below. The "2" in Vagrant.configure
    # configures the configuration version (we support older styles for
    # backwards compatibility). Please don't change it unless you know what
    # you're doing.
    
    ENV["LC_ALL"] = "en_US.UTF-8"
    
    VAGRANTFILE_API_VERSION = "2"
    
    Vagrant.configure("2") do |config|
      # The most common configuration options are documented and commented below.
      # For a complete reference, please see the online documentation at
      # https://docs.vagrantup.com.
    
      # Every Vagrant development environment requires a box. You can search for
      # boxes at https://atlas.hashicorp.com/search.
    
      config.vm.box = "geerlingguy/centos7"
      # config.vbguest.auto_update = false
      # config.vbguest.no_remote = true  
    
      config.vm.define "deployer" do | dp |
        dp.vm.provider "virtualbox" do | v |
          v.memory = "8000"
          v.cpus = 2
        end
        dp.vm.network "private_network", ip: "192.16.35.110", auto_config: true
        dp.vm.hostname = "deployer"
      end
    
      config.vm.define "tf" do | tf |
        tf.vm.provider "virtualbox" do | v |
          v.memory = "64000"
          v.cpus = 16
        end
        tf.vm.network "private_network", ip: "192.16.35.111", auto_config: true
        tf.vm.hostname = "tf"
      end
    
      config.vm.define "ops" do | os |
        os.vm.provider "virtualbox" do | v |
          v.memory = "16000"
          v.cpus = 4
        end
        os.vm.network "private_network",ip: "192.16.35.112",  auto_config: true
        os.vm.hostname = "ops"
      end
    
      config.vm.define "k8s" do | k8 |
        k8.vm.provider "virtualbox" do | v |
          v.memory = "8000"
          v.cpus = 2
        end
        k8.vm.network "private_network", ip: "192.16.35.113", auto_config: true
        k8.vm.hostname = "k8s"
      end
    
      config.vm.define "k01" do | k1 |
        k1.vm.provider "virtualbox" do | v |
          v.memory = "4000"
          v.cpus = 2
        end
        k1.vm.network "private_network", ip: "192.16.35.114", auto_config: true
        k1.vm.hostname = "k01"
      end
    
      config.vm.provision "shell", privileged: true, path: "./setup.sh"
    
    end
    
    
    EEOOFF
    
    cat << EEOOFF > setup.sh
    #!/bin/bash
    #
    # Setup vagrant vms.
    #
    
    set -eu
    
    # Copy hosts info
    cat < /etc/hosts
    127.0.0.1 localhost
    127.0.1.1 vagrant.vm vagrant
    
    192.16.35.110 deployer
    192.16.35.111 tf
    192.16.35.112 ops
    192.16.35.113 k8s
    192.16.35.114 k01
    
    
    # The following lines are desirable for IPv6 capable hosts
    ::1     localhost ip6-localhost ip6-loopback
    ff02::1 ip6-allnodes
    ff02::2 ip6-allrouters
    EOF
    
    systemctl stop firewalld
    systemctl disable firewalld
    iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat
    iptables -P FORWARD ACCEPT
    
    swapoff -a 
    sed -i 's/.*swap.*/#&/' /etc/fstab
    # swapoff -a && sysctl -w vm.swappiness=0
    
    # setenforce  0 
    sed -i "s/^SELINUX=enforcing/SELINUX=disabled/g" /etc/sysconfig/selinux 
    sed -i "s/^SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config 
    sed -i "s/^SELINUX=permissive/SELINUX=disabled/g" /etc/sysconfig/selinux 
    sed -i "s/^SELINUX=permissive/SELINUX=disabled/g" /etc/selinux/config  
    
    # modprobe ip_vs_rr
    modprobe br_netfilter
    
    yum -y update
    
    # sysctl: cannot stat /proc/sys/net/bridge/bridge-nf-call-ip6tables: No such file or directory
    # sysctl: cannot stat /proc/sys/net/bridge/bridge-nf-call-iptables: No such file or directory
    # yum install -y bridge-utils.x86_64
    # modprobe bridge
    # modprobe br_netfilter
    # Setup system vars
    
    yum install -y epel-release
    yum install -y yum-utils device-mapper-persistent-data lvm2 net-tools vim chrony python python-setuptools python-pip iproute lrzsz tree git
    
    yum install -y libguestfs-tools libvirt-python virt-install libvirt ansible
    
    pip install wheel --upgrade -i https://mirrors.aliyun.com/pypi/simple/
    pip install pip --upgrade -i https://mirrors.aliyun.com/pypi/simple/
    pip install ansible  netaddr --upgrade -i https://mirrors.aliyun.com/pypi/simple/
    
    # python-urllib3 should be installed before "pip install requests"
    # if install failed, pip uninstall urllib3, then reinstall python-urllib3
    # pip uninstall -y urllib3 | true
    # yum install -y python-urllib3 
    pip install requests -i https://mirrors.aliyun.com/pypi/simple/
    
    systemctl disable libvirtd.service
    systemctl disable dnsmasq
    systemctl stop libvirtd.service
    systemctl stop dnsmasq
    
    if [  -d "/root/.ssh" ]; then
          rm -rf /root/.ssh
    fi
    
    ssh-keygen -q -t rsa -N "" -f ~/.ssh/id_rsa
    
    cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
    chmod go-rwx ~/.ssh/authorized_keys
    
    # 
    # timedatectl set-timezone Asia/Shanghai
    
    if [ -f "/etc/chrony.conf" ]; then
       mv /etc/chrony.conf /etc/chrony.conf.bak
    fi
    
    cat < /etc/chrony.conf
          allow 192.16.35.0/24
          server ntp1.aliyun.com iburst
          local stratum 10
          logdir /var/log/chrony
          rtcsync
          makestep 1.0 3
          driftfile /var/lib/chrony/drift
    EOF
    
    systemctl restart chronyd.service
    systemctl enable chronyd.service
    
    echo "* soft nofile 65536" >> /etc/security/limits.conf
    echo "* hard nofile 65536" >> /etc/security/limits.conf
    echo "* soft nproc 65536"  >> /etc/security/limits.conf
    echo "* hard nproc 65536"  >> /etc/security/limits.conf
    echo "* soft  memlock  unlimited"  >> /etc/security/limits.conf
    echo "* hard memlock  unlimited"  >> /etc/security/limits.conf
    
    if [ ! -d "/var/log/journal" ]; then
      mkdir /var/log/journal
    fi
    
    if [ ! -d "/etc/systemd/journald.conf.d" ]; then
      mkdir /etc/systemd/journald.conf.d
    fi
    
    cat < /etc/systemd/journald.conf.d/99-prophet.conf 
    [Journal]
    Storage=persistent
    
    Compress=yes
    
    SyncIntervalSec=5m
    RateLimitInterval=30s
    RateLimitBurst=1000
    
    SystemMaxUse=10G
    
    SystemMaxFileSize=200M
    
    ForwardToSyslog=no
    
    EOF
    
    systemctl restart systemd-journald
    
    
    EEOOFF
    

    02在所有的节点上安装docker

    CentOS

    例如:如果pip安装软件的速度很慢,可以考虑使用基于aliyun的pip加速

    · 各个节点设置pip加速

    mkdir .pip && tee ~/.pip/pip.conf < instances.yaml
    provider_config:  bms:    ssh_pwd: vagrant    ssh_user: root    ntpserver: ntp1.aliyun.com    domainsuffix: localinstances:  tf:    provider: bms    ip: 192.16.35.111    roles:      config_database:      config:      control:      analytics_database:      analytics:      webui:  ops:    provider: bms    ip: 192.16.35.112    roles:
          openstack:
          openstack_compute:  
          vrouter:
            PHYSICAL_INTERFACE: enp0s8
      k8s:    provider: bms    ip: 192.16.35.113    roles:
          k8s_master:
          k8s_node:
          kubemanager:
          vrouter:
            PHYSICAL_INTERFACE: enp0s8
      k01:    provider: bms    ip: 192.16.35.114    roles:
          openstack_compute:
          k8s_node:
          vrouter:
            PHYSICAL_INTERFACE: enp0s8
    contrail_configuration:  AUTH_MODE: keystone  KEYSTONE_AUTH_URL_VERSION: /v3
      KEYSTONE_AUTH_ADMIN_PASSWORD: vagrant
      CLOUD_ORCHESTRATOR: openstack
      CONTRAIL_VERSION: latest
      UPGRADE_KERNEL: true
      ENCAP_PRIORITY: "VXLAN,MPLSoUDP,MPLSoGRE"
      PHYSICAL_INTERFACE: enp0s8
    global_configuration:
      CONTAINER_REGISTRY: opencontrailnightly
    kolla_config:
      kolla_globals:    enable_haproxy: no    enable_ironic: "no"    enable_swift: "no"
        network_interface: "enp0s8"
      kolla_passwords:    keystone_admin_password: vagrant
    
    EOF
    
    export INSTANCES_FILE=instances.yaml
    docker cp $INSTANCES_FILE contrail_kolla_ansible_deployer:/root/contrail-ansible-deployer/config/instances.yaml
    

    05准备好所有节点的环境

    除了deployer,我在所有节点上都做了一遍。

    正常的做法是建个自己的repository放各种image,实验环境节点少,直接国内下载也很快的。

    注意python和python-py这两个包是冲突的,只能安装其中之一,最好先全卸载,再安装其中一个:

    pip uninstall docker-py docker
     pip install python
    
    yum -y install python-devel python-subprocess32 python-setuptools python-pip
    
     pip install --upgrade pip
    
     find / -name *subpro*.egg-info
     find / -name *subpro*.egg-info |xargs rm -rf
    
    pip install -I sixpip install -I docker-compose
    

    将k8s repository改成阿里的,缺省的Google源太慢或不通:vi

    playbooks/roles/k8s/tasks/RedHat.yml

    yum_repository:
    name: Kubernetes
    description: k8s repo
    baseurl: https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
    gpgkey: https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
    repo_gpgcheck: yes
    gpgcheck: yes
    when: k8s_package_version is defined
    

    playbook中安装这些需要访问海外网站,可以从国内下载,然后改个tag:

    k8s.gcr.io/kube-apiserver:v1.14.8
    k8s.gcr.io/kube-controller-manager:v1.14.8
    k8s.gcr.io/kube-scheduler:v1.14.8
    k8s.gcr.io/kube-proxy:v1.14.8
    k8s.gcr.io/pause:3.1
    k8s.gcr.io/etcd:3.3.10
    k8s.gcr.io/coredns:1.3.1
    

    换个方法变通处理一下

    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.14.8
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.14.8
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.14.8
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.14.8
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.1
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.3.10
    docker pull coredns/coredns:1.3.1
    docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kubernetes-dashboard-amd64:v1.8.3
    

    再重新给下载的打个tag

    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.14.8 k8s.gcr.io/kube-apiserver:v1.14.8
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.14.8 k8s.gcr.io/kube-controller-manager:v1.14.8
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.14.8 k8s.gcr.io/kube-scheduler:v1.14.8
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.14.8 k8s.gcr.io/kube-proxy:v1.14.8
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.1 k8s.gcr.io/pause:3.1
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.3.10 k8s.gcr.io/etcd:3.3.10
    docker tag docker.io/coredns/coredns:1.3.1 k8s.gcr.io/coredns:1.3.1
    docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kubernetes-dashboard-amd64:v1.8.3  k8s.gcr.io/kubernetes-dashboard-amd64:v1.8.3
    

    06启动deployer容器,进入其中进行部署

    docker start contrail_kolla_ansible_deployer
    

    进入deployer容器:

    docker exec -it contrail_kolla_ansible_deployer bashcd /root/contrail-ansible-deployer
    ansible-playbook -i inventory/ -e orchestrator=openstack playbooks/provision_instances.yml
    ansible-playbook -i inventory/ -e orchestrator=openstack playbooks/configure_instances.yml
    ansible-playbook -i inventory/ -e orchestrator=openstack playbooks/install_openstack.yml
    ansible-playbook -i inventory/ -e orchestrator=openstack playbooks/install_k8s.yml
    ansible-playbook -i inventory/ -e orchestrator=openstack playbooks/install_contrail.yml
    
    
    
    kubectl taint nodes k8s node-role.kubernetes.io/master-
    

    最后一次kubelet升级到最新,遇到CSI的bug,修改一下配置文件后重启kubelet即可:

    After experiencing the same issue, editing /var/lib/kubelet/config.yaml to add:
    featureGates:  CSIMigration: false
    

    07安装完成后,建2个VM和容器测试一下

    yum install -y gcc python-devel
    pip install python-openstackclient
    pip install python-ironicclient
    
    source /etc/kolla/kolla-toolbox/admin-openrc.sh
    

    如果openstack命令有如下“queue”的报错,是需要python3:

    File "/usr/lib/python2.7/site-packages/openstack/utils.py", line 13, in 
        import queue
    ImportError: No module named queue
    
    rm -f /usr/bin/python
    ln -s /usr/bin/python3 /usr/bin/python
    pip install python-openstackclient
    pip install python-ironicclient
    yum install -y python3-pip
    
    yum install -y gcc python-devel wgetpip install --upgrade setuptoolspip install --ignore-installed python-openstackclient
    
    我每次都需要python3,所以干脆也安装了这个:
    pip3 install python-openstackclient -i https://mirrors.aliyun.com/pypi/simple/
    pip3 install python-ironicclient -i https://mirrors.aliyun.com/pypi/simple/
    

    进入Tungsten Fabric,用浏览器:https://192.16.35.111:8143

    进入openstack,用浏览器:https://192.16.35.112

    在k8s master上(192.16.35.113):

    scp root@192.16.35.114:/opt/cni/bin/contrail-k8s-cni /opt/cni/bin/
    mkdir /etc/cni/net.d
    scp root@192.16.35.114:/etc/cni/net.d/10-contrail.conf /etc/cni/net.d/10-contrail.conf
    

    wget

    https://github.com/cirros-dev/cirros/releases/download/0.4.0/cirros-0.4.0-x86_64-disk.img

    官方下载地址

    https://download.cirros-cloud.net/

    curl -O

    https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img

    wget

    http://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img

    wget

    http://download.cirros-cloud.net/daily/20161201/cirros-d161201-x86_64-disk.img

    (都没有找到带tcpdump的版本)

    reboot

    source /etc/kolla/kolla-toolbox/admin-openrc.sh

    openstack image create cirros --disk-format qcow2 --public --container-format bare --file cirros-0.4.0-x86_64-disk.imgnova flavor-create m1.tiny auto 512 1 1openstack network create net1openstack subnet create --subnet-range 10.1.1.0/24 --network net1 mysubnet1
    NET_ID=`openstack network list | grep net1 | awk -F '|' '{print $2}' | tr -d ' '`
     
    nova boot --image cirros --flavor m1.tiny --nic net-id=${NET_ID} VM1
    nova boot --image cirros --flavor m1.tiny --nic net-id=${NET_ID} VM2
    

    进入k8s_master, 192.16.35.113:

    yum install -y git
    git clone https://github.com/virtualhops/k8s-demo
    kubectl create -f k8s-demo/po-ubuntuapp.yml
    kubectl create -f k8s-demo/rc-frontend.yml
    kubectl expose rc/frontend
    kubectl exec -it ubuntuapp curl frontend # many times
    

    参考方案:
    https://github.com/Juniper/contrail-ansible-deployer/wiki/[-Container-Workflow]-Deploying-Contrail-with-OpenStack

    推荐阅读

    Tungsten Fabric实战:对接vMX虚拟路由平台填坑
    Tungsten Fabric实战:基于K8s的部署踩坑
    TF实战Q&A丨你不理解透,出了问题都不知道怎么弄
    TF 实战Q&A丨只在此网中,云深不知处


Log in to reply