@@ -1488,7 +1488,7 @@ def save_to_disk(
1488
1488
parent_cache_files_paths = {
1489
1489
Path (cache_filename ["filename" ]).resolve ().parent for cache_filename in self .cache_files
1490
1490
}
1491
- # Check that the dataset doesn't overwrite iself . It can cause a permission error on Windows and a segfault on linux.
1491
+ # Check that the dataset doesn't overwrite itself . It can cause a permission error on Windows and a segfault on linux.
1492
1492
if Path (dataset_path ).expanduser ().resolve () in parent_cache_files_paths :
1493
1493
raise PermissionError (
1494
1494
f"Tried to overwrite { Path (dataset_path ).expanduser ().resolve ()} but a dataset can't overwrite itself."
@@ -2867,7 +2867,7 @@ def map(
2867
2867
Note that the last batch may have less than `n` examples.
2868
2868
A batch is a dictionary, e.g. a batch of `n` examples is `{"text": ["Hello there !"] * n}`.
2869
2869
2870
- If the function is asynchronous, then `map` will run your function in parallel, with up to one thousand simulatenous calls.
2870
+ If the function is asynchronous, then `map` will run your function in parallel, with up to one thousand simultaneous calls.
2871
2871
It is recommended to use a `asyncio.Semaphore` in your function if you want to set a maximum number of operations that can run at the same time.
2872
2872
2873
2873
Args:
@@ -3475,7 +3475,7 @@ def iter_outputs(shard_iterable):
3475
3475
yield i , apply_function (example , i , offset = offset )
3476
3476
3477
3477
num_examples_progress_update = 0
3478
- # If `update_data` is True after processing the first example/batch, initalize these resources with `init_buffer_and_writer`
3478
+ # If `update_data` is True after processing the first example/batch, initialize these resources with `init_buffer_and_writer`
3479
3479
buf_writer , writer , tmp_file = None , None , None
3480
3480
3481
3481
# Check if Polars is available and import it if so
@@ -3659,7 +3659,7 @@ def filter(
3659
3659
"""Apply a filter function to all the elements in the table in batches
3660
3660
and update the table so that the dataset only includes examples according to the filter function.
3661
3661
3662
- If the function is asynchronous, then `filter` will run your function in parallel, with up to one thousand simulatenous calls (configurable).
3662
+ If the function is asynchronous, then `filter` will run your function in parallel, with up to one thousand simultaneous calls (configurable).
3663
3663
It is recommended to use a `asyncio.Semaphore` in your function if you want to set a maximum number of operations that can run at the same time.
3664
3664
3665
3665
Args:
@@ -4277,7 +4277,7 @@ def sort(
4277
4277
f"Column '{ column } ' not found in the dataset. Please provide a column selected in: { self ._data .column_names } "
4278
4278
)
4279
4279
4280
- # Change null_placement to conform to pyarrow's sort_indices() while ensuring backwards compatability
4280
+ # Change null_placement to conform to pyarrow's sort_indices() while ensuring backwards compatibility
4281
4281
if null_placement not in ["at_start" , "at_end" ]:
4282
4282
if null_placement == "first" :
4283
4283
null_placement = "at_start"
@@ -5345,7 +5345,7 @@ def _push_parquet_shards_to_hub(
5345
5345
Returns:
5346
5346
additions (`List[CommitOperation]`): list of the `CommitOperationAdd` of the uploaded shards
5347
5347
uploaded_size (`int`): number of uploaded bytes to the repository
5348
- dataset_nbytes (`int`): approximate size in bytes of the uploaded dataset afer uncompression
5348
+ dataset_nbytes (`int`): approximate size in bytes of the uploaded dataset after uncompression
5349
5349
"""
5350
5350
# Find decodable columns, because if there are any, we need to:
5351
5351
# embed the bytes from the files in the shards
@@ -6178,7 +6178,7 @@ def _concatenate_map_style_datasets(
6178
6178
# Return first dataset if all datasets are empty
6179
6179
return dsets [0 ]
6180
6180
6181
- # Perform checks (and a potentional cast if axis=0)
6181
+ # Perform checks (and a potential cast if axis=0)
6182
6182
if axis == 0 :
6183
6183
_check_if_features_can_be_aligned ([dset .features for dset in dsets ])
6184
6184
else :
0 commit comments