먼저 parent ubuntu OS에서 필요한 file들을 아래처럼 docker라는 directory에 모아둡니다.
root@firestone:~/docker# ls -l
total 2369580
-rwxr-xr-x 1 root root 284629257 Oct 31 22:27 Anaconda2-4.4.0.1-Linux-ppc64le.sh
-rwxr-xr-x 1 root root 299425582 Oct 31 22:28 Anaconda3-4.4.0.1-Linux-ppc64le.sh
-rw-r--r-- 1 root root 1321330418 Oct 31 22:35 cuda-repo-ubuntu1604-8-0-local-ga2v2_8.0.61-1_ppc64el.deb
-rwxr-xr-x 1 root root 8788 Oct 31 21:40 debootstrap.sh
-rw-r--r-- 1 root root 68444212 Oct 31 22:35 libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
-rw-r--r-- 1 root root 59820704 Oct 31 22:35 libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb
-rw-r--r-- 1 root root 6575300 Oct 31 22:35 libcudnn6-doc_6.0.21-1+cuda8.0_ppc64el.deb
-rw-r--r-- 1 root root 386170568 Oct 31 22:36 mldl-repo-local_4.0.0_ppc64el.deb
drwxr-xr-x 21 root root 4096 Oct 31 21:55 ubuntu
이미 nvidia/cuda-ppc64le 이미지는 docker pull 명령으로 당겨왔습니다.
root@firestone:~/docker# docker images | grep nvidia
nvidia-docker build 405ee913a07e About an hour ago 1.02GB
nvidia/cuda-ppc64le 8.0-cudnn6-runtime-ubuntu16.04 bf28cd22ff84 6 weeks ago 974MB
nvidia/cuda-ppc64le latest 9b0a21e35c66 6 weeks ago 1.72GB
이제 nvidia/cuda-ppc64le:latest를 interactive mode로 구동합니다. 이때 docker directory를 /docker라는 이름으로 마운트합니다.
root@firestone:~/docker# docker run -ti -v ~/docker:/docker nvidia/cuda-ppc64le:latest bash
이제 nvidia/cuda-ppc64le:latest 안에 들어왔습니다. /docker로 가서 동일한 file들이 보이는지 확인합니다.
root@deeed8ce922f:/# cd /docker
root@deeed8ce922f:/docker# ls
Anaconda2-4.4.0.1-Linux-ppc64le.sh libcudnn6-doc_6.0.21-1+cuda8.0_ppc64el.deb
Anaconda3-4.4.0.1-Linux-ppc64le.sh libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
cuda-repo-ubuntu1604-8-0-local-ga2v2_8.0.61-1_ppc64el.deb mldl-repo-local_4.0.0_ppc64el.deb
debootstrap.sh ubuntu
libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb
이제 libcudnn6을 먼저 설치합니다. 아울러 NCCL 및 bazel 등을 쓸 수도 있으니 PowerAI 4.0 (mldl-repo-local_4.0.0_ppc64el.deb)의 local repo도 설치합니다.
root@deeed8ce922f:/docker# dpkg -i libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb mldl-repo-local_4.0.0_ppc64el.deb
root@deeed8ce922f:/docker# apt-get update
이제 cuda와 nccl, openblas 등을 설치합니다.
root@deeed8ce922f:/docker# apt-get install cuda
root@deeed8ce922f:/docker# apt-get install -y libnccl-dev libnccl1 python-ncclient bazel libopenblas-dev libopenblas libopenblas-base
이번엔 다른 ssh 세션에서, parent OS에서 docker ps 명령어로 현재 우리가 쓰고 있는 container ID를 확인합니다.
root@firestone:~# docker ps | grep -v k8s
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
deeed8ce922f nvidia/cuda-ppc64le:latest "bash" About an hour ago Up About an hour gracious_bhaskara
저 container ID에 대해 docker commit 명령을 다음과 같이 날립니다.
root@firestone:~# docker commit deeed8ce922f bsyu/libcudnn6-ppc64le:xenial
이제 새로운 docker image가 생성된 것을 볼 수 있습니다.
root@firestone:~# docker images | grep -v ibm
REPOSITORY TAG IMAGE ID CREATED SIZE
bsyu/libcudnn6-ppc64le xenial 6d621d9d446b 48 seconds ago 7.52GB
nvidia-docker build 405ee913a07e 2 hours ago 1.02GB
nvidia/cuda-ppc64le 8.0-cudnn6-runtime-ubuntu16.04 bf28cd22ff84 6 weeks ago 974MB
nvidia/cuda-ppc64le latest 9b0a21e35c66 6 weeks ago 1.72GB
ppc64le/golang 1.6.3 6a579d02d32f 14 months ago 705MB
적절히 tagging한 뒤, docker에 login하여 docker hub으로 push 해둡니다.
root@firestone:~# docker tag bsyu/libcudnn6-ppc64le:xenial bsyu/libcudnn6-ppc64le:latest
root@firestone:~# docker login -u bsyu
Password:
Login Succeeded
root@firestone:~# docker push bsyu/libcudnn6-ppc64le:xenial
The push refers to a repository [docker.io/bsyu/libcudnn6-ppc64le]
de3b55a17936: Pushed
9eb05620c635: Mounted from nvidia/cuda-ppc64le
688827f0a03b: Mounted from nvidia/cuda-ppc64le
a36322f4fa68: Mounted from nvidia/cuda-ppc64le
6665818dfb83: Mounted from nvidia/cuda-ppc64le
4cad4acd0601: Mounted from nvidia/cuda-ppc64le
f12b406a6a23: Mounted from nvidia/cuda-ppc64le
bb179c8bb840: Mounted from nvidia/cuda-ppc64le
cd51df595e0c: Mounted from nvidia/cuda-ppc64le
4a7a95d650cf: Mounted from nvidia/cuda-ppc64le
22c3301fbf0b: Mounted from nvidia/cuda-ppc64le
xenial: digest: sha256:3993ac50b857979694cdc41cf12d672cc078583f1babb79f6c25e0688ed603ed size: 2621
이제 여기에 추가로 caffe2를 설치합니다. 이전 포스팅(http://hwengineer.blogspot.kr/2017/10/minsky-caffe2-jupyter-notebook-mnist.html)에서 build 해두었던 /opt/caffe2 directory를 통째로 tar로 말아두었던 것을 여기에 풀겠습니다.
root@deeed8ce922f:/docker# ls
Anaconda2-4.4.0.1-Linux-ppc64le.sh libcudnn6-doc_6.0.21-1+cuda8.0_ppc64el.deb
Anaconda3-4.4.0.1-Linux-ppc64le.sh libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
caffe2.tgz mldl-repo-local_4.0.0_ppc64el.deb
cuda-repo-ubuntu1604-8-0-local-ga2v2_8.0.61-1_ppc64el.deb site-packages.tgz
debootstrap.sh ubuntu
libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb
root@deeed8ce922f:/docker# cd /opt
root@deeed8ce922f:/opt# tar -zxf /docker/caffe2.tgz
root@deeed8ce922f:/opt# vi ~/.bashrc
...
export LD_LIBRARY_PATH=/opt/DL/nccl/lib:/opt/DL/openblas/lib:/usr/local/cuda-8.0/lib6:/usr/lib:/usr/local/lib:/opt/caffe2/lib:/usr/lib/powerpc64le-linux-gnu
export PATH=/opt/anaconda2/bin:/opt/caffe2/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
export PYTHONPATH=/opt/caffe2
caffe2가 정상 작동하기 위해 필요한 package들을 추가 설치합니다.
root@deeed8ce922f:/opt# conda install protobuf future
root@deeed8ce922f:/opt# apt-get install libprotobuf-dev python-protobuf libgoogle-glog-dev libopenmpi-dev liblmdb-dev python-lmdb libleveldb-dev python-leveldb libopencv-core-dev libopencv-gpu-dev python-opencv libopencv-highgui-dev libopencv-dev
이제 다시 parent OS에서 다른 이름으로 docker commit 합니다.
root@firestone:~# docker commit deeed8ce922f bsyu/caffe2-ppc64le-xenial:v0.1
이제 GPU를 사용하기 위해 nvidia-docker로 구동해봅니다. 그러자면 (혹시 아직 안 하셨다면) 먼저 nvidia-docker-plugin을 background로 구동해야 합니다.
root@firestone:~# nohup nvidia-docker-plugin &
root@firestone:~# nvidia-docker run -ti --rm -v ~/docker:/docker bsyu/caffe2-ppc64le-xenial:v0.1 bash
bsyu/caffe2-ppc64le-xenial:v0.1 컨테이너에서 caffe2가 성공적으로 import 되는 것을 확인합니다.
root@0e58f6f69c44:/# python -c 'from caffe2.python import core' 2>/dev/null && echo "Success" || echo "Failure"
Success
이 이미지를 이용하여 또 tensorflow 1.3 및 pytorch 0.2.0이 들어간 docker image도 만듭니다.
root@firestone:~# docker run -ti --rm -v ~/docker:/docker bsyu/caffe2-ppc64le-xenial:v0.1 bash
root@8cfeaf93f28b:/# cd /opt
root@8cfeaf93f28b:/opt# ls
DL anaconda2 anaconda3 caffe2
root@8cfeaf93f28b:/opt# rm -rf caffe2
root@8cfeaf93f28b:/opt# vi ~/.bashrc
...
export LD_LIBRARY_PATH=/opt/DL/nccl/lib:/opt/DL/openblas/lib:/usr/local/cuda-8.0/lib6:/usr/lib:/usr/local/lib:/usr/lib/powerpc64le-linux-gnu
export PATH=/opt/anaconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
export PYTHONPATH=/opt/anaconda3/lib/python3.6/site-packages
root@8cfeaf93f28b:~# apt-get install libcupti-dev openjdk-8-jdk openjdk-8-jdk-headless git
root@8cfeaf93f28b:~# conda install bazel numpy
root@8cfeaf93f28b:~# git clone --recursive https://github.com/tensorflow/tensorflow.git
root@8cfeaf93f28b:~# cd tensorflow/
root@8cfeaf93f28b:~/tensorflow# git checkout r1.3
root@8cfeaf93f28b:~/tensorflow# ./configure
root@8cfeaf93f28b:~/tensorflow# bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
root@8cfeaf93f28b:~/tensorflow# bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
root@8cfeaf93f28b:~/tensorflow# pip install /tmp/tensorflow_pkg/tensorflow-1.3.1-cp36-cp36m-linux_ppc64le.whl
root@8cfeaf93f28b:~/tensorflow# conda list | grep tensor
tensorflow 1.3.1 <pip>
tensorflow-tensorboard 0.1.8 <pip>
이제 tensorflow 1.3이 설치되었으므로 이를 docker commit으로 저장합니다.
root@firestone:~# docker ps | grep -v k8s
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
8cfeaf93f28b bsyu/caffe2-ppc64le-xenial:v0.1 "bash" 2 hours ago Up 2 hours vigilant_ptolemy
root@firestone:~# docker commit 8cfeaf93f28b bsyu/tf1.3-caffe2-ppc64le-xenial:v0.1
이 이미지에 다시 pytorch를 설치합니다.
root@8cfeaf93f28b:~# git clone --recursive https://github.com/pytorch/pytorch.git
root@8cfeaf93f28b:~# cd /pytorch
root@8cfeaf93f28b:~/pytorch# export CMAKE_PREFIX_PATH=/opt/pytorch
root@8cfeaf93f28b:~/pytorch# conda install numpy pyyaml setuptools cmake cffi openblas
root@8cfeaf93f28b:~/pytorch# python setup.py install
root@8cfeaf93f28b:~# python
Python 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 15:31:35)
[GCC 4.8.4] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from __future__ import print_function
>>> import torch
>>>
이제 최종적으로 bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1 라는 이미지를 commit 합니다.
root@firestone:~# docker commit 8cfeaf93f28b bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1
root@firestone:~# docker push bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1
root@firestone:~# docker run -ti --rm -v ~/docker:/docker bsyu/caffe2-ppc64le-xenial:v0.1 bash
root@8cfeaf93f28b:/# cd /opt
root@8cfeaf93f28b:/opt# ls
DL anaconda2 anaconda3 caffe2
root@8cfeaf93f28b:/opt# rm -rf caffe2
root@8cfeaf93f28b:/opt# vi ~/.bashrc
...
export LD_LIBRARY_PATH=/opt/DL/nccl/lib:/opt/DL/openblas/lib:/usr/local/cuda-8.0/lib6:/usr/lib:/usr/local/lib:/usr/lib/powerpc64le-linux-gnu
export PATH=/opt/anaconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
export PYTHONPATH=/opt/anaconda3/lib/python3.6/site-packages
root@8cfeaf93f28b:~# apt-get install libcupti-dev openjdk-8-jdk openjdk-8-jdk-headless git
root@8cfeaf93f28b:~# conda install bazel numpy
root@8cfeaf93f28b:~# git clone --recursive https://github.com/tensorflow/tensorflow.git
root@8cfeaf93f28b:~# cd tensorflow/
root@8cfeaf93f28b:~/tensorflow# git checkout r1.3
root@8cfeaf93f28b:~/tensorflow# ./configure
root@8cfeaf93f28b:~/tensorflow# bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
root@8cfeaf93f28b:~/tensorflow# bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
root@8cfeaf93f28b:~/tensorflow# pip install /tmp/tensorflow_pkg/tensorflow-1.3.1-cp36-cp36m-linux_ppc64le.whl
root@8cfeaf93f28b:~/tensorflow# conda list | grep tensor
tensorflow 1.3.1 <pip>
tensorflow-tensorboard 0.1.8 <pip>
이제 tensorflow 1.3이 설치되었으므로 이를 docker commit으로 저장합니다.
root@firestone:~# docker ps | grep -v k8s
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
8cfeaf93f28b bsyu/caffe2-ppc64le-xenial:v0.1 "bash" 2 hours ago Up 2 hours vigilant_ptolemy
root@firestone:~# docker commit 8cfeaf93f28b bsyu/tf1.3-caffe2-ppc64le-xenial:v0.1
이 이미지에 다시 pytorch를 설치합니다.
root@8cfeaf93f28b:~# git clone --recursive https://github.com/pytorch/pytorch.git
root@8cfeaf93f28b:~# cd /pytorch
root@8cfeaf93f28b:~/pytorch# export CMAKE_PREFIX_PATH=/opt/pytorch
root@8cfeaf93f28b:~/pytorch# conda install numpy pyyaml setuptools cmake cffi openblas
root@8cfeaf93f28b:~/pytorch# python setup.py install
root@8cfeaf93f28b:~# python
Python 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 15:31:35)
[GCC 4.8.4] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from __future__ import print_function
>>> import torch
>>>
이제 최종적으로 bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1 라는 이미지를 commit 합니다.
root@firestone:~# docker commit 8cfeaf93f28b bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1
root@firestone:~# docker push bsyu/pytorch-tf1.3-caffe2-ppc64le-xenial:v0.1
댓글 없음:
댓글 쓰기