HoangHa
/

smoldlm-144m

@@ -319,8 +319,10 @@ def _add_gumbel_noise(logits, temperature):
     """Gumbel-max sampling for stochastic token selection."""
     if temperature == 0:
         return logits
-    logits = logits.to(torch.float64)
-    noise = torch.rand_like(logits, dtype=torch.float64)
     gumbel_noise = (-torch.log(noise.clamp(min=1e-20))) ** temperature
     return logits.exp() / gumbel_noise

     """Gumbel-max sampling for stochastic token selection."""
     if temperature == 0:
         return logits
+    # float64 for precision on CUDA/CPU; MPS only supports float32
+    dtype = torch.float32 if logits.device.type == "mps" else torch.float64
+    logits = logits.to(dtype)
+    noise = torch.rand_like(logits, dtype=dtype)
     gumbel_noise = (-torch.log(noise.clamp(min=1e-20))) ** temperature
     return logits.exp() / gumbel_noise