Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Use our own base images to build allennlp Docker images #5366

Merged
merged 3 commits into from
Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 22 additions & 29 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ on:
- cron: '37 11 * * 1,2,3,4,5' # early morning (11:37 UTC / 4:37 AM PDT) Monday - Friday

env:
# NOTE: Need to update `TORCH_VERSION` and `TORCH_VISION_VERSION` for new torch releases.
# NOTE: Need to update `TORCH_VERSION`, `TORCH_CPU_INSTALL` and `TORCH_GPU_INSTALL` for new torch releases.
TORCH_VERSION: 1.9.0
TORCH_VISION_VERSION: 0.10.0
TORCH_CPU_INSTALL: pip install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
TORCH_GPU_INSTALL: pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html # Our self-hosted GPU runners currently support CUDA 11.*
# Change this to invalidate existing cache.
CACHE_PREFIX: v0
# Disable tokenizers parallelism because this doesn't help, and can cause issues in distributed tests.
Expand Down Expand Up @@ -94,9 +95,9 @@ jobs:
run: make test

- name: GPU Tests
runs_on: [self-hosted, GPU]
runs_on: [self-hosted, GPU, Multi GPU]
coverage_report: true
torch_platform: cu111 # Our self-hosted GPU runners currently support CUDA 11.*
torch_platform: gpu
run: make gpu-tests

- name: Model Tests
Expand Down Expand Up @@ -132,6 +133,16 @@ jobs:
# Use week number in cache key so we can refresh the cache weekly.
echo "WEEK_NUMBER=$(date +%V)" >> $GITHUB_ENV

- name: Set build variables (CPU only)
if: matrix.task.torch_platform == 'cpu'
run: |
echo "TORCH_INSTALL=$TORCH_CPU_INSTALL" >> $GITHUB_ENV

- name: Set build variables (GPU only)
if: matrix.task.torch_platform == 'gpu'
run: |
echo "TORCH_INSTALL=$TORCH_GPU_INSTALL" >> $GITHUB_ENV

- uses: actions/cache@v2
id: virtualenv-cache
with:
Expand All @@ -145,7 +156,7 @@ jobs:
run: |
test -d .venv || virtualenv -p $(which python) --copies --reset-app-data .venv
. .venv/bin/activate
make install TORCH_VERSION="torch==${TORCH_VERSION}+${{ matrix.task.torch_platform }} torchvision==${TORCH_VISION_VERSION}+${{ matrix.task.torch_platform }} -f https://download.pytorch.org/whl/torch_stable.html"
make install TORCH_INSTALL="$TORCH_INSTALL"

- name: Setup virtual environment (cache hit)
if: steps.virtualenv-cache.outputs.cache-hit == 'true'
Expand Down Expand Up @@ -280,7 +291,7 @@ jobs:
run: |
test -d .venv || virtualenv -p $(which python) --copies --reset-app-data .venv
. .venv/bin/activate
make install TORCH_VERSION="torch==${TORCH_VERSION}+cpu torchvision==${TORCH_VISION_VERSION}+cpu -f https://download.pytorch.org/whl/torch_stable.html"
make install TORCH_INSTALL="$TORCH_CPU_INSTALL"

- name: Setup virtual environment (cache hit)
if: steps.virtualenv-cache.outputs.cache-hit == 'true'
Expand Down Expand Up @@ -385,37 +396,19 @@ jobs:
name: Docker (CUDA ${{ matrix.cuda }})
timeout-minutes: 18
if: github.repository == 'allenai/allennlp'
# Run on self-hosted to utilize layer caching.
runs-on: [self-hosted, Docker-enabled]
runs-on: ubuntu-latest
strategy:
matrix:
cuda: ['10.1', '10.2', '11.1']
cuda: ['10.2', '11.1']

steps:
- uses: actions/checkout@v2

- name: Set torch version
env:
CUDA: ${{ matrix.cuda }}
run: |
# Check the install instructions on https://pytorch.org/ to keep these up-to-date.
if [[ $CUDA == '10.1' ]]; then
# NOTE: We need to use an older version of torch to support CUDA 10.1
echo "DOCKER_TORCH_VERSION='torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
elif [[ $CUDA == '10.2' ]]; then
# NOTE: 10.2 is still the default but that could change in the next release.
echo "DOCKER_TORCH_VERSION='torch==${{ env.TORCH_VERSION }} torchvision==${{ env.TORCH_VISION_VERSION }}'" >> $GITHUB_ENV;
elif [[ $CUDA == '11.1' ]]; then
echo "DOCKER_TORCH_VERSION='torch==${{ env.TORCH_VERSION }}+cu111 torchvision==${{ env.TORCH_VISION_VERSION }}+cu111 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
else
echo "Unhandled CUDA version $CUDA";
exit 1;
fi

- name: Set image name
- name: Set image name and torch version
env:
CUDA: ${{ matrix.cuda }}
run: |
echo "DOCKER_TORCH_VERSION=${TORCH_VERSION}-cuda${CUDA}" >> $GITHUB_ENV;
if [[ $GITHUB_EVENT_NAME == 'release' ]]; then
echo "DOCKER_IMAGE_NAME=allennlp/allennlp:${GITHUB_REF#refs/tags/}-cuda${CUDA}" >> $GITHUB_ENV;
else
Expand Down Expand Up @@ -503,7 +496,7 @@ jobs:
run: |
test -d .venv || virtualenv -p $(which python) --copies --reset-app-data .venv
. .venv/bin/activate
make install TORCH_VERSION="torch==${TORCH_VERSION}+cpu torchvision==${TORCH_VISION_VERSION}+cpu -f https://download.pytorch.org/whl/torch_stable.html"
make install TORCH_INSTALL="$TORCH_CPU_INSTALL"

- name: Setup virtual environment (cache hit)
if: steps.virtualenv-cache.outputs.cache-hit == 'true'
Expand Down
25 changes: 5 additions & 20 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,13 @@
# This Dockerfile creates an environment suitable for downstream usage of AllenNLP.
# It's built from a wheel installation of allennlp.
# It's built from a wheel installation of allennlp using the base images from
# https://github.com/allenai/docker-images/pkgs/container/pytorch

FROM python:3.8

ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

# Tell nvidia-docker the driver spec that we need as well as to
# use all available devices, which are mounted at /usr/local/nvidia.
# The LABEL supports an older version of nvidia-docker, the env
# variables a newer one.
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
LABEL com.nvidia.volumes.needed="nvidia_driver"
ARG TORCH=1.9.0-cuda10.2
ARG PYTHON=3.9
FROM ghcr.io/allenai/pytorch:${TORCH}-python${PYTHON}-v0.0.1

WORKDIR /stage/allennlp

# Install torch ecosystem first. This build arg should be in the form of a version requirement,
# like 'torch==1.7' or 'torch==1.7+cu102 -f https://download.pytorch.org/whl/torch_stable.html'.
ARG TORCH
RUN pip install --no-cache-dir ${TORCH}

# Installing AllenNLP's dependencies is the most time-consuming part of building
# this Docker image, so we make use of layer caching here by adding the minimal files
# necessary to install the dependencies.
Expand Down
22 changes: 3 additions & 19 deletions Dockerfile.test
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
# Used to build an image for running tests.

FROM python:3.8

ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

# Tell nvidia-docker the driver spec that we need as well as to
# use all available devices, which are mounted at /usr/local/nvidia.
# The LABEL supports an older version of nvidia-docker, the env
# variables a newer one.
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
LABEL com.nvidia.volumes.needed="nvidia_driver"
ARG TORCH=1.9.0-cuda10.2
ARG PYTHON=3.9
FROM ghcr.io/allenai/pytorch:${TORCH}-python${PYTHON}-v0.0.1

# These environment variables are helpful for debugging.
# See https://pytorch.org/docs/stable/distributed.html#common-environment-variables for more info.
Expand All @@ -22,11 +11,6 @@ ENV NCCL_DEBUG_SUBSYS ALL

WORKDIR /stage/allennlp

# Install torch ecosystem first. This build arg should be in the form of a version requirement,
# like 'torch==1.7' or 'torch==1.7+cu102 -f https://download.pytorch.org/whl/torch_stable.html'.
ARG TORCH
RUN pip install --no-cache-dir ${TORCH}

# Installing AllenNLP's dependencies is the most time-consuming part of building
# this Docker image, so we make use of layer caching here by adding the minimal files
# necessary to install the dependencies.
Expand Down
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ MD_DOCS_CONF_SRC = mkdocs-skeleton.yml
MD_DOCS_TGT = site/
MD_DOCS_EXTRAS = $(addprefix $(MD_DOCS_ROOT),README.md CHANGELOG.md CONTRIBUTING.md)

TORCH_VERSION = torch==1.9.0 torchvision==0.10.0
TORCH_INSTALL = pip install torch torchvision
DOCKER_TORCH_VERSION = 1.9.0-cuda10.2
DOCKER_TEST_TORCH_VERSION = 1.9.0-cuda10.2
DOCKER_PYTHON_VERSION = 3.9

DOCKER_TAG = latest
DOCKER_IMAGE_NAME = allennlp/allennlp:$(DOCKER_TAG)
DOCKER_TEST_IMAGE_NAME = allennlp/test:$(DOCKER_TAG)
DOCKER_TORCH_VERSION = $(TORCH_VERSION)
DOCKER_TEST_TORCH_VERSION = $(TORCH_VERSION)
DOCKER_RUN_CMD = docker run --rm \
-v $$HOME/.allennlp:/root/.allennlp \
-v $$HOME/.cache/huggingface:/root/.cache/huggingface \
Expand Down Expand Up @@ -96,7 +97,7 @@ install :
# See https://github.com/pypa/pip/issues/4537.
# python setup.py install_egg_info
# Install torch ecosystem first.
pip install $(TORCH_VERSION)
$(TORCH_INSTALL)
pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
# These nltk packages are used by the 'checklist' module.
$(NLTK_DOWNLOAD_CMD)
Expand Down Expand Up @@ -158,6 +159,7 @@ docker-image :
--pull \
-f Dockerfile \
--build-arg TORCH=$(DOCKER_TORCH_VERSION) \
--build-arg PYTHON=$(DOCKER_PYTHON_VERSION) \
-t $(DOCKER_IMAGE_NAME) .

DOCKER_GPUS = --gpus all
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,12 @@ For various reasons you may need to create your own AllenNLP Docker image, such
of PyTorch. To do so, just run `make docker-image` from the root of your local clone of AllenNLP.

By default this builds an image with the tag `allennlp/allennlp`, but you can change this to anything you want
by setting the `DOCKER_TAG` flag when you call `make`. For example,
`make docker-image DOCKER_TAG=my-allennlp`.
by setting the `DOCKER_IMAGE_NAME` flag when you call `make`. For example,
`make docker-image DOCKER_IMAGE_NAME=my-allennlp`.

If you want to use a different version of PyTorch, set the flag `DOCKER_TORCH_VERSION` to something like
`torch==1.7.0` or `torch==1.7.0+cu110 -f https://download.pytorch.org/whl/torch_stable.html`.
The value of this flag will passed directly to `pip install`.
If you want to use a different version of Python or PyTorch, set the flags `DOCKER_PYTHON_VERSION` and `DOCKER_TORCH_VERSION` to something like
`3.9` and `1.9.0-cuda10.2`, respectively. These flags together determine the base image that is used. You can see the list of valid
combinations in this GitHub Container Registry: [github.com/allenai/docker-images/pkgs/container/pytorch](https://github.com/allenai/docker-images/pkgs/container/pytorch).

After building the image you should be able to see it listed by running `docker images allennlp`.

Expand Down