Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit de9165e

Browse files
authored
rename 'node_rank' to 'global_rank' in dataset reader 'DistributedInfo' (#4608)
* rename 'node_rank' to 'global_rank' * Clarify doc comments * fix line length
1 parent 3d11419 commit de9165e

File tree

2 files changed

+11
-10
lines changed

2 files changed

+11
-10
lines changed

allennlp/data/dataset_readers/dataset_reader.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,21 @@ class WorkerInfo:
3838
@dataclass
3939
class DistributedInfo:
4040
"""
41-
Contains information about the node rank and world size when the reader is being
41+
Contains information about the global process rank and total world size when the reader is being
4242
used within distributed training.
4343
4444
From a `DatasetReader` this can be accessed with the [`get_distributed_info()`](#get_distributed_info) method.
4545
"""
4646

4747
world_size: int
4848
"""
49-
The total number of distributed nodes.
49+
The total number of processes in the distributed group.
5050
"""
5151

52-
node_rank: int
52+
global_rank: int
5353
"""
54-
The 0-indexed ID of the current node.
54+
The 0-indexed ID of the current process within the distributed group.
55+
This will be between 0 and `world_size - 1`, inclusive.
5556
"""
5657

5758

@@ -313,7 +314,7 @@ def _multi_worker_islice(
313314
if max_instances is not None:
314315
# Need to scale down max_instances because otherwise each node would read self.max_instances,
315316
# but we really want self.max_instances total across all nodes.
316-
if self._distributed_info.node_rank < (
317+
if self._distributed_info.global_rank < (
317318
max_instances % self._distributed_info.world_size
318319
):
319320
max_instances = max_instances // self._distributed_info.world_size + 1
@@ -323,7 +324,7 @@ def _multi_worker_islice(
323324
if not self.manual_distributed_sharding:
324325
sharded_slice = itertools.islice(
325326
sharded_slice,
326-
self._distributed_info.node_rank,
327+
self._distributed_info.global_rank,
327328
None,
328329
self._distributed_info.world_size,
329330
)

tests/data/dataset_readers/dataset_reader_test.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ def test_instance_slicing(
123123
minimum_expected_result_size //= world_size
124124
minimum_expected_result_size //= num_workers
125125
maximum_expected_result_size = minimum_expected_result_size + 1
126-
for node_rank in range(world_size):
126+
for global_rank in range(world_size):
127127
monkeypatch.setattr(common_util, "is_distributed", lambda: True)
128-
monkeypatch.setattr(dist, "get_rank", lambda: node_rank)
128+
monkeypatch.setattr(dist, "get_rank", lambda: global_rank)
129129
monkeypatch.setattr(dist, "get_world_size", lambda: world_size)
130130
for worker_id in range(num_workers):
131131
reader = reader_class(max_instances=max_instances)
@@ -137,9 +137,9 @@ def test_instance_slicing(
137137
elif world_size is not None:
138138
minimum_expected_result_size //= world_size
139139
maximum_expected_result_size = minimum_expected_result_size + 1
140-
for node_rank in range(world_size):
140+
for global_rank in range(world_size):
141141
monkeypatch.setattr(common_util, "is_distributed", lambda: True)
142-
monkeypatch.setattr(dist, "get_rank", lambda: node_rank)
142+
monkeypatch.setattr(dist, "get_rank", lambda: global_rank)
143143
monkeypatch.setattr(dist, "get_world_size", lambda: world_size)
144144
reader = reader_class(max_instances=max_instances)
145145
result = set(

0 commit comments

Comments
 (0)