Skip to content

Commit b90e0d9

Browse files
author
pytorchbot
committed
2025-05-29 nightly release (86f148b)
1 parent 8ac6821 commit b90e0d9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+634
-140
lines changed

recipes/configs/gemma/2B_full.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma.gemma_2b

recipes/configs/gemma/2B_lora.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,21 @@ tokenizer:
2222
_component_: torchtune.models.gemma.gemma_tokenizer
2323
path: /tmp/gemma-2b/tokenizer.model
2424

25-
# Dataset
25+
# Dataset and Sampler
2626
dataset:
27-
_component_: torchtune.datasets.alpaca_dataset
27+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2828
packed: False # True increases speed
29+
split: train[:95%]
2930
seed: null
3031
shuffle: True
3132

33+
# Validation
34+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
35+
dataset_val:
36+
_component_: torchtune.datasets.alpaca_cleaned_dataset
37+
split: train[95%:]
38+
batch_size_val: ${batch_size}
39+
3240
# Model Arguments
3341
model:
3442
_component_: torchtune.models.gemma.lora_gemma_2b

recipes/configs/gemma/7B_full.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-7b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma.gemma_7b

recipes/configs/gemma/7B_lora.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-7b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma.lora_gemma_7b

recipes/configs/gemma2/27B_full.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2-27b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma2.gemma2_27b

recipes/configs/gemma2/27B_lora.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2-27b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma2.lora_gemma2_27b

recipes/configs/gemma2/2B_full.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2-2b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma2.gemma2_2b

recipes/configs/gemma2/2B_lora.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,21 @@ tokenizer:
2222
_component_: torchtune.models.gemma.gemma_tokenizer
2323
path: /tmp/gemma-2-2b/tokenizer.model
2424

25-
# Dataset
25+
# Dataset and Sampler
2626
dataset:
27-
_component_: torchtune.datasets.alpaca_dataset
27+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2828
packed: False # True increases speed
29+
split: train[:95%]
2930
seed: null
3031
shuffle: True
3132

33+
# Validation
34+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
35+
dataset_val:
36+
_component_: torchtune.datasets.alpaca_cleaned_dataset
37+
split: train[95%:]
38+
batch_size_val: ${batch_size}
39+
3240
# Model Arguments
3341
model:
3442
_component_: torchtune.models.gemma2.lora_gemma2_2b

recipes/configs/gemma2/9B_full.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2-9b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma2.gemma2_9b

recipes/configs/gemma2/9B_lora.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ tokenizer:
2323
_component_: torchtune.models.gemma.gemma_tokenizer
2424
path: /tmp/gemma-2-9b/tokenizer.model
2525

26-
# Dataset
26+
# Dataset and Sampler
2727
dataset:
28-
_component_: torchtune.datasets.alpaca_dataset
28+
_component_: torchtune.datasets.alpaca_cleaned_dataset
2929
packed: False # True increases speed
30+
split: train[:95%]
3031
seed: null
3132
shuffle: True
3233

34+
# Validation
35+
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
36+
dataset_val:
37+
_component_: torchtune.datasets.alpaca_cleaned_dataset
38+
split: train[95%:]
39+
batch_size_val: ${batch_size}
40+
3341
# Model Arguments
3442
model:
3543
_component_: torchtune.models.gemma2.lora_gemma2_9b

0 commit comments

Comments
 (0)