crystal-data
diff --git a/‎.github/workflows/crystal.yml
+4-4 b/‎.github/workflows/crystal.yml
+4-4
diff --git a/‎.gitignore
+6-11 b/‎.gitignore
+6-11
diff --git a/‎Makefile
-10 b/‎Makefile
-10
diff --git a/‎README.md
+42-18 b/‎README.md
+42-18
diff --git a/‎ci/Dockerfile
+15 b/‎ci/Dockerfile
+15
diff --git a/‎ci/requirements.txt
+9 b/‎ci/requirements.txt
+9
diff --git a/‎docs/gen_doc_stubs.py
+19 b/‎docs/gen_doc_stubs.py
+19
@@ -8,18 +8,18 @@ jobs:
     runs-on: ubuntu-latest
 
     container:
-      image: crystallang/crystal
+      image: crystaldata/numci
 
     steps:
     - uses: actions/checkout@v1
     - name: Install dependencies
       run: shards install --ignore-crystal-version
     - name: Run tests
-      run: crystal spec
+      run: crystal spec -v
     - name: Build docs
-      run: crystal docs
+      run: mkdocs build
     - uses: peaceiris/actions-gh-pages@v3
       if: github.event_name == 'push' && github.ref == 'refs/heads/master'
       with:
         github_token: ${{ secrets.DOCS_TOKEN }}
-        publish_dir: ./docs
+        publish_dir: ./site
@@ -1,6 +1,6 @@
 /lib/
-/docs/
 /bin/
+/site/
 /.shards/
 *.dwarf
 .DS_Store
@@ -10,13 +10,8 @@ debug.cr
 # Dependencies will be locked in applications that use them
 /shard.lock
 
-# docs
-/_build
-/doc/_build
-
-elementwise_arraymancer
-elementwise_num
-matmul_arraymancer
-matmul_num
-
-*.o
+# Docs
+!docs
+docs/*
+!docs/gen_doc_stubs.py
+!docs/index.md
@@ -48,6 +48,7 @@ They are:
 - LAPACK
 - OpenCL
 - ClBlast
+- NNPACK
 
 While not at all required, they provide additional functionality than is
 provided by the basic library.
@@ -67,11 +68,11 @@ allocate a `Tensor` backed by either `CPU` or `GPU` based storage.
 ```crystal
 [1, 2, 3].to_tensor
 Tensor.from_array [1, 2, 3]
-Tensor(UInt8).zeros([3, 3, 2])
+Tensor(UInt8, CPU(UInt8)).zeros([3, 3, 2])
 Tensor.random(0.0...1.0, [2, 2, 2])
 
-ClTensor(Float32).zeros([3, 2, 2])
-ClTensor(Float64).full([3, 4, 5], 3.8)
+Tensor(Float32, OCL(Float32)).zeros([3, 2, 2])
+Tensor(Float64, OCL(Float64)).full([3, 4, 5], 3.8)
 ```
 
 ### Operations
@@ -84,10 +85,7 @@ one or more `Tensor`s using sophisticated broadcasted mapping routines.
 a = [1, 2, 3, 4].to_tensor
 b = [[3, 4, 5, 6], [5, 6, 7, 8]].to_tensor
 
-# Convert a Tensor to a GPU backed Tensor
-acl = a.astype(Float64).gpu
-
-puts Num.add(a, b)
+puts a + b
 
 # a is broadcast to b's shape
 # [[ 4,  6,  8, 10],
@@ -173,18 +171,44 @@ puts a.eigvals
 
 # [-0.372281, 5.37228  ]
 
-acl = a.opencl
-bcl = a.opencl
-
-puts acl.gemm(bcl).cpu
+puts a.matmul(a)
 
 # [[7 , 10],
 #  [15, 22]]
+```
 
-puts a.matmul(a)
+### Einstein Notation
 
-# [[7 , 10],
-#  [15, 22]]
+For representing certain complex contractions of `Tensor`s, Einstein notation
+can be used to simplify the operation.  For example, the following matrix
+multiplication + summation operation:
+
+```crystal
+a = Tensor.new([30, 40, 50]) { |i| i * 1_f32 }
+b = Tensor.new([40, 30, 20]) { |i| i * 1_f32 }
+
+result = Float32Tensor.zeros([50, 20])
+ny, nx = result.shape
+b2 = b.swap_axes(0, 1)
+ny.times do |k|
+  nx.times do |l|
+    result[k, l] = (a[..., ..., k] * b2[..., ..., l]).sum
+  end
+end
+```
+
+Can instead be represented in Einstein notiation as the following:
+
+```crystal
+Num::Einsum.einsum("ijk,jil->kl", a, b)
+```
+
+This can lead to performance improvements due to optimized contractions
+on `Tensor`s.
+
+```
+einsum   2.22k   (450.41µs) (± 0.86%)   350kB/op        fastest
+manual   117.52  (  8.51ms) (± 0.98%)  5.66MB/op  18.89× slower
 ```
 
 ### Machine Learning
@@ -194,10 +218,10 @@ mathematical functions.  Use a `Num::Grad::Variable` with a `Num::Grad::Context`
 to easily compute these derivatives.
 
 ```crystal
-ctx = Num::Grad::Context(Tensor(Float64)).new
+ctx = Num::Grad::Context(Tensor(Float64, CPU(Float64))).new
 
-x = ctx.variable([3.0])
-y = ctx.variable([2.0])
+x = ctx.variable([3.0].to_tensor)
+y = ctx.variable([2.0].to_tensor)
 
 # f(x) = x ** y
 f = x ** y
@@ -214,7 +238,7 @@ interface to assist in creating neural networks.  Designing and creating
 a network is simple using Crystal's block syntax.
 
 ```crystal
-ctx = Num::Grad::Context(Tensor(Float64)).new
+ctx = Num::Grad::Context(Tensor(Float64, CPU(Float64))).new
 
 x_train = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]].to_tensor
 y_train = [[0.0], [1.0], [1.0], [0.0]].to_tensor
 
@@ -0,0 +1,15 @@
+FROM crystallang/crystal
+
+RUN apt-get update && apt-get install \
+    curl \
+    libopenblas-dev \
+    gnupg \
+    clang \
+    build-essential \
+    git \
+    python3 \
+    python3-pip \
+    -y
+
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
@@ -0,0 +1,9 @@
+mkdocs==1.2.3
+mkdocs-autorefs==0.3.0
+mkdocs-gen-files==0.3.3
+mkdocs-literate-nav==0.4.0
+mkdocs-material==7.3.4
+mkdocs-material-extensions==1.0.3
+mkdocs-section-index==0.3.2
+mkdocstrings==0.16.2
+mkdocstrings-crystal==0.3.3
@@ -0,0 +1,19 @@
+# Generate virtual doc files for the mkdocs site.
+# You can also run this script directly to actually write out those files, as a preview.
+
+import mkdocs_gen_files
+
+# Get the documentation root object
+root = mkdocs_gen_files.config["plugins"]["mkdocstrings"].get_handler("crystal").collector.root
+
+# For each type (e.g. "Foo::Bar")
+for typ in root.walk_types():
+    # Use the file name "Foo/Bar/index.md"
+    filename = "/".join(typ.abs_id.split("::") + ["index.md"])
+    # Make a file with the content "# ::: Foo::Bar\n"
+    with mkdocs_gen_files.open(filename, "w") as f:
+        print(f"# ::: {typ.abs_id}", file=f)
+
+    # Link to the type itself when clicking the "edit" button on the page.
+    if typ.locations:
+        mkdocs_gen_files.set_edit_path(filename, typ.locations[0].url)