We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b73985c commit cba2e4eCopy full SHA for cba2e4e
app.py
@@ -176,6 +176,18 @@ def load_model():
176
print("Warming up (40 tokens)…")
177
with torch.inference_mode():
178
_ = model.generate(**dummy, max_new_tokens=40)
179
+ print("Warming up (100 tokens)…")
180
+ with torch.inference_mode():
181
+ _ = model.generate(**dummy, max_new_tokens=100)
182
+ print("Warming up (200 tokens)…")
183
184
+ _ = model.generate(**dummy, max_new_tokens=200)
185
+ print("Warming up (400 tokens)…")
186
187
+ _ = model.generate(**dummy, max_new_tokens=400)
188
+ print("Warming up (800 tokens)…")
189
190
+ _ = model.generate(**dummy, max_new_tokens=800)
191
print("Warm-up complete.")
192
193
0 commit comments