Lightning-AI
diff --git a/‎docs/source-pytorch/conf.py
+2-2 b/‎docs/source-pytorch/conf.py
+2-2
diff --git a/‎examples/fabric/tensor_parallel/README.md
+45 b/‎examples/fabric/tensor_parallel/README.md
+45
diff --git a/‎examples/fabric/tensor_parallel/data.py
+21 b/‎examples/fabric/tensor_parallel/data.py
+21
@@ -356,8 +356,6 @@ def _load_py_module(name: str, location: str) -> ModuleType:
     "torchmetrics": ("https://lightning.ai/docs/torchmetrics/stable/", None),
     "lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
     "tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
-    # needed for referencing App from lightning scope
-    "lightning.app": ("https://lightning.ai/docs/app/stable/", None),
     # needed for referencing Fabric from lightning scope
     "lightning.fabric": ("https://lightning.ai/docs/fabric/stable/", None),
     # TODO: these are missing objects.inv
@@ -637,4 +635,6 @@ def package_list_from_file(file):
     "https://www.intel.com/content/www/us/en/products/docs/processors/what-is-a-gpu.html",
     "https://www.microsoft.com/en-us/research/blog/zero-infinity-and-deepspeed-unlocking-unprecedented-model-scale-for-deep-learning-training/",  # noqa: E501
     "https://stackoverflow.com/questions/66640705/how-can-i-install-grpcio-on-an-apple-m1-silicon-laptop",
+    "https://openai.com/blog/.*",
+    "https://tinyurl.com/.*",  # has a human verification check on redirect
 ]
@@ -0,0 +1,45 @@
+## Tensor Parallel and 2D Parallel
+
+This example shows how to apply tensor-parallelism to your model (here Llama 2 7B) with the `ModelParallelStrategy`, and how it can be combined with FSDP (2D parallelism).
+PyTorch 2.3+ and a machine with at least 4 GPUs and 24 GB memory each are required to run this example.
+
+```bash
+pip install 'torch>=2.3'
+```
+
+Navigate to this example folder and run the training script:
+
+```bash
+cd examples/fabric/tensor_parallel
+python train.py
+```
+
+You should see an output like this:
+
+```
+Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
+Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
+Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
+Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
+----------------------------------------------------------------------------------------------------
+distributed_backend=nccl
+All distributed processes registered. Starting with 4 processes
+----------------------------------------------------------------------------------------------------
+
+Number of model parameters: 6.7 B
+Starting training ...
+Iteration 0 complete
+Iteration 1 complete
+Iteration 2 complete
+Iteration 3 complete
+Iteration 4 complete
+Iteration 5 complete
+Iteration 6 complete
+Iteration 7 complete
+Saving a (distributed) checkpoint ...
+Training successfully completed!
+Peak memory usage: 17.95 GB
+```
+
+> \[!NOTE\]
+> The `ModelParallelStrategy` is experimental and subject to change. Report issues on [GitHub](https://github.com/Lightning-AI/pytorch-lightning/issues).
@@ -0,0 +1,21 @@
+import torch
+from torch.utils.data import Dataset
+
+
+class RandomTokenDataset(Dataset):
+    def __init__(self, vocab_size: int, seq_length: int):
+        self.vocab_size = vocab_size
+        self.seq_length = seq_length
+        self.tokens = torch.randint(
+            self.vocab_size,
+            size=(len(self), self.seq_length + 1),
+            # Set a seed to make this toy dataset the same on each rank
+            # Fabric will add a `DistributedSampler` to shard the data correctly
+            generator=torch.Generator().manual_seed(42),
+        )
+
+    def __len__(self) -> int:
+        return 128
+
+    def __getitem__(self, item: int):
+        return self.tokens[item]