Modified GemmaWrapper for Gemma2 to pass local mask cache to original model and added filename for model weights.

ai-edge-bot · copybara-github · commit 51248ef44878 · 2025-06-03T06:00:38.000-07:00
PiperOrigin-RevId: 766626383
diff --git a/ai_edge_torch/generative/examples/gemma/verify_gemma2.py b/ai_edge_torch/generative/examples/gemma/verify_gemma2.py
@@ -42,12 +42,18 @@
     True,
     "Transpose the KV cache to reduce memory usage.",
 )
+_WEIGHT_FILENAME = flags.DEFINE_string(
+    "weight_filename",
+    "model.ckpt",
+    "Name of the weight file in the checkpoint directory.",
+)
 
 def main(_):
   checkpoint = kagglehub.model_download("google/gemma-2/pyTorch/gemma-2-2b-it")
 
   verify_util.verify_gemma2(
       checkpoint,
+      _WEIGHT_FILENAME.value,
       _PROMPTS.value,
       _MAX_NEW_TOKENS.value,
       _MASK_AS_INPUT.value,
diff --git a/ai_edge_torch/generative/examples/gemma/verify_util.py b/ai_edge_torch/generative/examples/gemma/verify_util.py
@@ -62,6 +62,8 @@ def forward(self, tokens: torch.Tensor) -> torch.Tensor:
     actual_input_len = self._get_actual_input_len(tokens)
     input_pos = torch.arange(0, actual_input_len, dtype=torch.long)
     mask_cache = attn_utils.build_causal_mask_cache(tokens.shape[1])
+    local_mask_cache = attn_utils.build_sliding_window_mask_cache(
+        tokens.shape[1], self.model.config.sliding_window_size)
     _, logits = self.model.forward(
         input_token_ids=tokens[0, :actual_input_len].unsqueeze(0),
         input_positions=input_pos,
@@ -72,6 +74,7 @@ def forward(self, tokens: torch.Tensor) -> torch.Tensor:
         temperatures=None,
         top_ps=torch.tensor([1.0], dtype=torch.float),
         top_ks=torch.tensor([1], dtype=torch.long),
+        local_mask=local_mask_cache.index_select(2, input_pos)
     )
     return logits