Document valid config options for ECS / Fargate (#156)

jameslamb · web-flow · commit fad780ea6dcc · 2020-11-09T10:45:45.000Z
* [docs] document valid config options for ECS / Fargate

* add link to troubleshooting guide

* linting
diff --git a/dask_cloudprovider/aws/ecs.py b/dask_cloudprovider/aws/ecs.py
@@ -453,10 +453,12 @@ class ECSCluster(SpecCluster):
         The amount of CPU to request for the scheduler in milli-cpu (1/1024).
 
         Defaults to ``1024`` (one vCPU).
+        See the `troubleshooting guide`_ for information on the valid values for this argument.
     scheduler_mem: int (optional)
         The amount of memory to request for the scheduler in MB.
 
         Defaults to ``4096`` (4GB).
+        See the `troubleshooting guide`_ for information on the valid values for this argument.
     scheduler_timeout: str (optional)
         The scheduler task will exit after this amount of time if there are no clients connected.
 
@@ -471,10 +473,12 @@ class ECSCluster(SpecCluster):
         The amount of CPU to request for worker tasks in milli-cpu (1/1024).
 
         Defaults to ``4096`` (four vCPUs).
+        See the `troubleshooting guide`_ for information on the valid values for this argument.
     worker_mem: int (optional)
         The amount of memory to request for worker tasks in MB.
 
         Defaults to ``16384`` (16GB).
+        See the `troubleshooting guide`_ for information on the valid values for this argument.
     worker_gpu: int (optional)
         The number of GPUs to expose to the worker.
 
@@ -636,6 +640,7 @@ class ECSCluster(SpecCluster):
     you must ensure the NVIDIA CUDA toolkit is installed with a version that matches the host machine
     along with ``dask-cuda``.
 
+    .. _troubleshooting guide: ./troubleshooting.html#invalid-cpu-or-memory
     """
 
     def __init__(
diff --git a/doc/source/troubleshooting.rst b/doc/source/troubleshooting.rst
@@ -15,4 +15,35 @@ If you are unable to connect to this address it is likely that there is somethin
 for example you may have corporate policies implementing additional firewall rules on your account.
 
 To reduce the chances of this happening it is often simplest to run Dask Cloudprovider from within the cloud you are trying
-to use and configure private networking only. See your specific cluster manager docs for info.
+to use and configure private networking only. See your specific cluster manager docs for info.
+
+Invalid CPU or Memory
+---------------------
+
+When working with ``FargateCluster`` or ``ECSCluster``, CPU and memory arguments can only take values from a fixed set of combinations.
+
+So, for example, code like this will result in an error
+
+.. code-block:: python
+
+    from dask_cloudprovider import FargateCluster
+    cluster = FargateCluster(
+        image="daskdev/dask:latest",
+        worker_cpu=256,
+        worker_mem=30720,
+        n_workers=2,
+        fargate_use_private_ip=False,
+        scheduler_timeout="15 minutes"
+    )
+    client = Client(cluster)
+    cluster
+
+    # botocore.errorfactory.ClientException:
+    # An error occurred (ClientException) when calling the RegisterTaskDefinition operation:
+    # No Fargate configuration exists for given values.
+
+
+This is because ECS and Fargate task definitions with ``CPU=256`` cannot have as much memory as that code is requesting.
+
+The AWS-accepted set of combinations is documented at
+https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html.