diff --git a/YouCut_20250526_184250539 github.mp4 b/YouCut_20250526_184250539 github.mp4
deleted file mode 100644
index efa13f83..00000000
Binary files a/YouCut_20250526_184250539 github.mp4 and /dev/null differ
diff --git a/app-release-signed.apk b/app-release-signed.apk
deleted file mode 100644
index 4c3ade4f..00000000
Binary files a/app-release-signed.apk and /dev/null differ
diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt
index 00489d4f..3a858a5f 100644
--- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt
+++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt
@@ -15,6 +15,7 @@ import kotlinx.serialization.json.JsonClassDiscriminator
 import kotlinx.serialization.modules.SerializersModule
 import kotlinx.serialization.modules.polymorphic
 import kotlinx.serialization.modules.subclass
+import com.google.ai.sample.network.MistralRequestCoordinator
 import okhttp3.MediaType.Companion.toMediaType
 import okhttp3.OkHttpClient
 import okhttp3.Request
@@ -70,7 +71,7 @@ data class ServiceMistralResponseMessage(
     val content: String
 )
 
-internal suspend fun callMistralApi(modelName: String, apiKey: String, chatHistory: List<Content>, inputContent: Content): Pair<String?, String?> {
+internal suspend fun callMistralApi(modelName: String, apiKeys: List<String>, chatHistory: List<Content>, inputContent: Content): Pair<String?, String?> {
     var responseText: String? = null
     var errorMessage: String? = null
 
@@ -126,10 +127,18 @@ internal suspend fun callMistralApi(modelName: String, apiKey: String, chatHisto
             .url("https://api.mistral.ai/v1/chat/completions")
             .post(jsonBody.toRequestBody(mediaType))
             .addHeader("Content-Type", "application/json")
-            .addHeader("Authorization", "Bearer $apiKey")
+            .addHeader("Authorization", "Bearer ${apiKeys.first()}")
             .build()
 
-        client.newCall(request).execute().use { response ->
+        val coordinated = MistralRequestCoordinator.execute(apiKeys = apiKeys, maxAttempts = apiKeys.size * 4 + 8) { key ->
+            client.newCall(
+                request.newBuilder()
+                    .header("Authorization", "Bearer $key")
+                    .build()
+            ).execute()
+        }
+
+        coordinated.response.use { response ->
             val responseBody = response.body?.string()
             if (!response.isSuccessful) {
                 Log.e("ScreenCaptureService", "Mistral API Error ($response.code): $responseBody")
diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt
index 4551070a..c268458e 100644
--- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt
+++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt
@@ -297,7 +297,11 @@ class ScreenCaptureService : Service() {
                             if (apiProvider == ApiProvider.VERCEL) {
                                 responseText = callVercelApi(applicationContext, modelName, apiKey, chatHistoryDtos, inputContentDto)
                             } else if (apiProvider == ApiProvider.MISTRAL) {
-                                val result = callMistralApi(modelName, apiKey, chatHistory, inputContent)
+                                val apiKeyManager = ApiKeyManager.getInstance(applicationContext)
+                                val availableKeys = apiKeyManager.getApiKeys(ApiProvider.MISTRAL)
+                                    .filter { it.isNotBlank() }
+                                    .distinct()
+                                val result = callMistralApi(modelName, availableKeys, chatHistory, inputContent)
                                 responseText = result.first
                                 errorMessage = result.second
                             } else if (apiProvider == ApiProvider.PUTER) {
diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
index 002cb2a6..a68aa30d 100644
--- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
+++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
@@ -34,6 +34,7 @@ import com.google.ai.sample.feature.multimodal.ModelDownloadManager
 import com.google.ai.sample.ModelOption
 import com.google.ai.sample.GenerativeAiViewModelFactory
 import com.google.ai.sample.InferenceBackend
+import com.google.ai.sample.network.MistralRequestCoordinator
 import com.google.ai.sample.feature.multimodal.dtos.toDto
 import com.google.ai.sample.feature.multimodal.dtos.TempFilePathCollector
 import kotlinx.coroutines.Dispatchers
@@ -70,7 +71,6 @@ import kotlinx.serialization.modules.subclass
 import com.google.ai.sample.webrtc.WebRTCSender
 import com.google.ai.sample.webrtc.SignalingClient
 import org.webrtc.IceCandidate
-import kotlin.math.max
 
 class PhotoReasoningViewModel(
     application: Application,
@@ -183,11 +183,14 @@ class PhotoReasoningViewModel(
     // to avoid re-executing already-executed commands
     private var incrementalCommandCount = 0
 
-    // Mistral rate limiting per API key (1.1 seconds between requests with same key)
-    private val mistralNextAllowedRequestAtMsByKey = mutableMapOf<String, Long>()
-    private var lastMistralTokenTimeMs = 0L
-    private var lastMistralTokenKey: String? = null
-    private val MISTRAL_MIN_INTERVAL_MS = 1100L
+    private data class QueuedMistralScreenshotRequest(
+        val bitmap: Bitmap,
+        val screenshotUri: String,
+        val screenInfo: String?
+    )
+    private val mistralAutoScreenshotQueueLock = Any()
+    private var mistralAutoScreenshotInFlight = false
+    private var queuedMistralScreenshotRequest: QueuedMistralScreenshotRequest? = null
 
     // Accumulated full text during streaming for incremental command parsing
     private var streamingAccumulatedText = StringBuilder()
@@ -609,6 +612,7 @@ class PhotoReasoningViewModel(
         val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel()
 
         clearStaleErrorState()
+        stopExecutionFlag.set(false)
 
         // Check for Human Expert model
         if (currentModel == ModelOption.HUMAN_EXPERT) {
@@ -1024,15 +1028,16 @@ class PhotoReasoningViewModel(
         )
     }
     
-private fun reasonWithMistral(
-    userInput: String,
-    selectedImages: List<Bitmap>,
-    screenInfoForPrompt: String? = null,
-    imageUrisForChat: List<String>? = null
-) {
-    _uiState.value = PhotoReasoningUiState.Loading
-    val context = appContext
-    val apiKeyManager = ApiKeyManager.getInstance(context)
+    private fun reasonWithMistral(
+        userInput: String,
+        selectedImages: List<Bitmap>,
+        screenInfoForPrompt: String? = null,
+        imageUrisForChat: List<String>? = null
+    ) {
+        _uiState.value = PhotoReasoningUiState.Loading
+        _showStopNotificationFlow.value = true
+        val context = appContext
+        val apiKeyManager = ApiKeyManager.getInstance(context)
 
     val initialApiKey = apiKeyManager.getCurrentApiKey(ApiProvider.MISTRAL)
     if (initialApiKey.isNullOrEmpty()) {
@@ -1054,7 +1059,8 @@ private fun reasonWithMistral(
 
     resetStreamingCommandState()
 
-    viewModelScope.launch(Dispatchers.IO) {
+    currentReasoningJob?.cancel()
+    currentReasoningJob = viewModelScope.launch(Dispatchers.IO) {
         try {
             val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel()
             val genSettings = com.google.ai.sample.util.GenerationSettingsPreferences.loadSettings(context, currentModel.modelName)
@@ -1132,124 +1138,32 @@ private fun reasonWithMistral(
 
             // Validate that we have at least one key before proceeding
             require(availableKeys.isNotEmpty()) { "No valid Mistral API keys available after filtering" }
-
-            fun markKeyCooldown(key: String, referenceTimeMs: Long) {
-                val nextAllowedAt = referenceTimeMs + MISTRAL_MIN_INTERVAL_MS
-                val existing = mistralNextAllowedRequestAtMsByKey[key] ?: 0L
-                mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
-            }
-
-            fun remainingWaitForKeyMs(key: String, nowMs: Long): Long {
-                val nextAllowedAt = mistralNextAllowedRequestAtMsByKey[key] ?: 0L
-                return (nextAllowedAt - nowMs).coerceAtLeast(0L)
-            }
-
-            fun isRetryableMistralFailure(code: Int): Boolean {
-                return code == 429 || code >= 500
-            }
-
-            var response: okhttp3.Response? = null
-            var selectedKeyForResponse: String? = null
-            var consecutiveFailures = 0
-            var blockedKeysThisRound = mutableSetOf<String>()
-
-            val maxAttempts = availableKeys.size * 2 + 3 // Allow cycling through all keys at least twice
-            while (response == null && consecutiveFailures < maxAttempts) {
-                if (stopExecutionFlag.get()) break
-
-                val now = System.currentTimeMillis()
-                val keyPool = availableKeys.filter { it !in blockedKeysThisRound }.ifEmpty {
-                    blockedKeysThisRound.clear()
-                    availableKeys
-                }
-
-                val keyWithLeastWait = keyPool.minByOrNull { remainingWaitForKeyMs(it, now) } ?: availableKeys.first()
-                val waitMs = remainingWaitForKeyMs(keyWithLeastWait, now)
-                if (waitMs > 0L) {
-                    delay(waitMs)
+            val maxAttempts = availableKeys.size * 4 + 8
+            val coordinated = MistralRequestCoordinator.execute(
+                apiKeys = availableKeys,
+                maxAttempts = maxAttempts
+            ) { selectedKey ->
+                if (stopExecutionFlag.get()) {
+                    throw IOException("Mistral request aborted.")
                 }
-
-                val selectedKey = keyWithLeastWait
-                selectedKeyForResponse = selectedKey
-
-                try {
-                    val attemptResponse = client.newCall(buildRequest(selectedKey)).execute()
-                    val requestEndMs = System.currentTimeMillis()
-                    markKeyCooldown(selectedKey, requestEndMs)
-
-                    if (attemptResponse.isSuccessful) {
-                        response = attemptResponse
-                        break
-                    }
-
-                    val isRetryable = isRetryableMistralFailure(attemptResponse.code)
-                    if (!isRetryable) {
-                        val errBody = attemptResponse.body?.string()
-                        attemptResponse.close()
-                        throw IllegalStateException("Mistral Error ${attemptResponse.code}: $errBody")
-                    }
-
-                    attemptResponse.close()
-                    blockedKeysThisRound.add(selectedKey)
-                    consecutiveFailures++
-                    withContext(Dispatchers.Main) {
-                        replaceAiMessageText(
-                            "Mistral temporär nicht verfügbar (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...",
-                            isPending = true
-                        )
-                    }
-                } catch (e: IOException) {
-                    val requestEndMs = System.currentTimeMillis()
-                    markKeyCooldown(selectedKey, requestEndMs)
-                    blockedKeysThisRound.add(selectedKey)
-                    consecutiveFailures++
-                    if (consecutiveFailures >= 5) {
-                        throw IOException("Mistral request failed after 5 attempts: ${e.message}", e)
-                    }
-                    withContext(Dispatchers.Main) {
-                        replaceAiMessageText(
-                        if (consecutiveFailures >= maxAttempts) {
-                            throw IOException("Mistral request failed after $maxAttempts attempts: ${e.message}", e)
-                        )
-                    }
-                }
-                                "Mistral Netzwerkfehler (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...",
-
-            if (stopExecutionFlag.get()) {
-                throw IOException("Mistral request aborted.")
+                client.newCall(buildRequest(selectedKey)).execute()
             }
-
-            val finalResponse = response ?: throw IOException("Mistral request failed after 5 attempts.")
+            val finalResponse = coordinated.response
 
             if (!finalResponse.isSuccessful) {
                 val errBody = finalResponse.body?.string()
                 finalResponse.close()
-            val finalResponse = response ?: throw IOException("Mistral request failed after $maxAttempts attempts.")
+                throw IOException("Mistral Error ${finalResponse.code}: $errBody")
             }
 
             val body = finalResponse.body ?: throw IOException("Empty response body from Mistral")
             val aiResponseText = openAiStreamParser.parse(body) { accText ->
-                selectedKeyForResponse?.let { key ->
-                    lastMistralTokenKey = key
-                    lastMistralTokenTimeMs = System.currentTimeMillis()
-                    markKeyCooldown(key, lastMistralTokenTimeMs)
-                } ?: run {
-                    Log.w(TAG, "selectedKeyForResponse is null during streaming callback")
-                }
                 withContext(Dispatchers.Main) {
                     replaceAiMessageText(accText, isPending = true)
                     processCommandsIncrementally(accText)
                 }
             }
             finalResponse.close()
-            selectedKeyForResponse?.let { key ->
-                val reference = if (lastMistralTokenKey == key && lastMistralTokenTimeMs > 0L) {
-                    lastMistralTokenTimeMs
-                } else {
-                    System.currentTimeMillis()
-                }
-                markKeyCooldown(key, reference)
-            }
 
             withContext(Dispatchers.Main) {
                 _uiState.value = PhotoReasoningUiState.Success(aiResponseText)
@@ -1261,9 +1175,15 @@ private fun reasonWithMistral(
             withContext(Dispatchers.Main) {
                 Log.e(TAG, "Mistral API call failed", e)
                 _uiState.value = PhotoReasoningUiState.Error(e.message ?: "Unknown error")
+                _chatState.replaceLastPendingMessage()
                 appendErrorMessage("Error: ${e.message}")
                 saveChatHistory(context)
             }
+        } finally {
+            withContext(Dispatchers.Main) {
+                releaseAndDrainMistralAutoScreenshotQueue()
+                refreshStopButtonState()
+            }
         }
     }
 }
@@ -2360,16 +2280,22 @@ private fun processCommands(text: String) {
                             _commandExecutionStatus.value = status
                         }
                         
-                        // Create prompt with screen information if available
-                        val genericAnalysisPrompt = createGenericScreenshotPrompt()
-                        
-                        // Re-send the query with only the latest screenshot
-                        reason(
-                            userInput = genericAnalysisPrompt,
-                            selectedImages = listOf(bitmap),
-                            screenInfoForPrompt = screenInfo,
-                            imageUrisForChat = listOf(screenshotUri.toString()) // Add this argument
-                        )
+                        val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
+                        if (currentModel.apiProvider == ApiProvider.MISTRAL) {
+                            enqueueMistralAutoScreenshotRequest(
+                                bitmap = bitmap,
+                                screenshotUri = screenshotUri.toString(),
+                                screenInfo = screenInfo
+                            )
+                        } else {
+                            // Re-send the query with only the latest screenshot
+                            reason(
+                                userInput = createGenericScreenshotPrompt(),
+                                selectedImages = listOf(bitmap),
+                                screenInfoForPrompt = screenInfo,
+                                imageUrisForChat = listOf(screenshotUri.toString())
+                            )
+                        }
                         
                         PhotoReasoningScreenshotUiNotifier.showAddedToConversation(context)
                     } else {
@@ -2392,5 +2318,57 @@ private fun processCommands(text: String) {
             }
         }
     }
+
+    private fun enqueueMistralAutoScreenshotRequest(
+        bitmap: Bitmap,
+        screenshotUri: String,
+        screenInfo: String?
+    ) {
+        val request = QueuedMistralScreenshotRequest(
+            bitmap = bitmap,
+            screenshotUri = screenshotUri,
+            screenInfo = screenInfo
+        )
+        var shouldStartNow = false
+        synchronized(mistralAutoScreenshotQueueLock) {
+            if (mistralAutoScreenshotInFlight) {
+                queuedMistralScreenshotRequest = request
+                Log.d(TAG, "Mistral auto screenshot request queued (latest wins).")
+            } else {
+                mistralAutoScreenshotInFlight = true
+                shouldStartNow = true
+            }
+        }
+        if (shouldStartNow) {
+            dispatchMistralAutoScreenshotRequest(request)
+        }
+    }
+
+    private fun dispatchMistralAutoScreenshotRequest(request: QueuedMistralScreenshotRequest) {
+        val genericAnalysisPrompt = createGenericScreenshotPrompt()
+        reasonWithMistral(
+            userInput = genericAnalysisPrompt,
+            selectedImages = listOf(request.bitmap),
+            screenInfoForPrompt = request.screenInfo,
+            imageUrisForChat = listOf(request.screenshotUri)
+        )
+    }
+
+    private fun releaseAndDrainMistralAutoScreenshotQueue() {
+        val nextRequest: QueuedMistralScreenshotRequest? = synchronized(mistralAutoScreenshotQueueLock) {
+            val queued = queuedMistralScreenshotRequest
+            if (queued == null) {
+                mistralAutoScreenshotInFlight = false
+                null
+            } else {
+                queuedMistralScreenshotRequest = null
+                queued
+            }
+        }
+        if (nextRequest != null) {
+            Log.d(TAG, "Draining queued Mistral auto screenshot request.")
+            dispatchMistralAutoScreenshotRequest(nextRequest)
+        }
+    }
     
 }
diff --git a/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt b/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt
new file mode 100644
index 00000000..2cdf7bd0
--- /dev/null
+++ b/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt
@@ -0,0 +1,117 @@
+package com.google.ai.sample.network
+
+import kotlinx.coroutines.delay
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
+import okhttp3.Response
+import kotlin.math.max
+import kotlin.math.roundToLong
+
+internal data class MistralCoordinatedResponse(
+    val response: Response,
+    val apiKey: String
+)
+
+internal object MistralRequestCoordinator {
+    private const val MIN_INTERVAL_MS = 1500L
+    private val cooldownMutex = Mutex()
+    private val nextAllowedRequestAtMsByKey = mutableMapOf<String, Long>()
+
+    private suspend fun markKeyCooldown(
+        key: String,
+        referenceTimeMs: Long,
+        extraDelayMs: Long = 0L
+    ) {
+        val nextAllowedAt = referenceTimeMs + max(MIN_INTERVAL_MS, extraDelayMs.coerceAtLeast(0L))
+        cooldownMutex.withLock {
+            val existing = nextAllowedRequestAtMsByKey[key] ?: 0L
+            nextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
+        }
+    }
+
+    private suspend fun remainingWaitForKeyMs(key: String, nowMs: Long): Long {
+        return cooldownMutex.withLock {
+            val nextAllowedAt = nextAllowedRequestAtMsByKey[key] ?: 0L
+            (nextAllowedAt - nowMs).coerceAtLeast(0L)
+        }
+    }
+
+    private fun parseRetryAfterMs(headerValue: String?): Long? {
+        if (headerValue.isNullOrBlank()) return null
+        val seconds = headerValue.trim().toDoubleOrNull() ?: return null
+        return (seconds * 1000.0).roundToLong().coerceAtLeast(0L)
+    }
+
+    private fun parseRateLimitResetDelayMs(response: Response, nowMs: Long): Long? {
+        val resetHeader = response.header("x-ratelimit-reset") ?: return null
+        val resetEpochSeconds = resetHeader.trim().toLongOrNull() ?: return null
+        val resetMs = resetEpochSeconds * 1000L
+        return (resetMs - nowMs).coerceAtLeast(0L)
+    }
+
+    private fun adaptiveRetryDelayMs(failureCount: Int): Long {
+        val cappedExponent = (failureCount - 1).coerceIn(0, 5)
+        return 1000L shl cappedExponent
+    }
+
+    private fun isRetryableFailure(code: Int): Boolean = code == 429 || code >= 500
+
+    suspend fun execute(
+        apiKeys: List<String>,
+        maxAttempts: Int = apiKeys.size * 4 + 8,
+        request: suspend (apiKey: String) -> Response
+    ): MistralCoordinatedResponse {
+        require(apiKeys.isNotEmpty()) { "No Mistral API keys provided." }
+
+        var consecutiveFailures = 0
+        var blockedKeysThisRound = mutableSetOf<String>()
+
+        while (consecutiveFailures < maxAttempts) {
+            val now = System.currentTimeMillis()
+            val keyPool = apiKeys.filter { it !in blockedKeysThisRound }.ifEmpty {
+                blockedKeysThisRound.clear()
+                apiKeys
+            }
+
+            var selectedKey = keyPool.first()
+            var waitMs = Long.MAX_VALUE
+            for (candidate in keyPool) {
+                val candidateWait = remainingWaitForKeyMs(candidate, now)
+                if (candidateWait < waitMs) {
+                    waitMs = candidateWait
+                    selectedKey = candidate
+                }
+            }
+            if (waitMs > 0L) {
+                delay(waitMs)
+            }
+
+            try {
+                val response = request(selectedKey)
+                val requestEndMs = System.currentTimeMillis()
+                val retryAfterMs = parseRetryAfterMs(response.header("Retry-After"))
+                val resetDelayMs = parseRateLimitResetDelayMs(response, requestEndMs)
+                val serverRequestedDelayMs = max(retryAfterMs ?: 0L, resetDelayMs ?: 0L)
+                markKeyCooldown(selectedKey, requestEndMs, serverRequestedDelayMs)
+
+                if (response.isSuccessful || !isRetryableFailure(response.code)) {
+                    return MistralCoordinatedResponse(response = response, apiKey = selectedKey)
+                }
+
+                response.close()
+                blockedKeysThisRound.add(selectedKey)
+                consecutiveFailures++
+                val adaptiveDelay = adaptiveRetryDelayMs(consecutiveFailures)
+                markKeyCooldown(selectedKey, requestEndMs, max(serverRequestedDelayMs, adaptiveDelay))
+            } catch (e: Exception) {
+                val requestEndMs = System.currentTimeMillis()
+                blockedKeysThisRound.add(selectedKey)
+                consecutiveFailures++
+                markKeyCooldown(selectedKey, requestEndMs, adaptiveRetryDelayMs(consecutiveFailures))
+                if (consecutiveFailures >= maxAttempts) throw e
+            }
+        }
+
+        throw IllegalStateException("Mistral request failed after $maxAttempts attempts.")
+    }
+}
diff --git a/scripts/mistral_cooldown_probe.py b/scripts/mistral_cooldown_probe.py
new file mode 100755
index 00000000..d470a62d
--- /dev/null
+++ b/scripts/mistral_cooldown_probe.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+import json
+import subprocess
+import time
+from typing import Tuple, List
+
+MISTRAL_API_KEY = "zsEegAJFadHH4uooe2lW0HVNmy1rpqGT"
+MISTRAL_MODEL = "mistral-large-latest"
+MISTRAL_ENDPOINT = "https://api.mistral.ai/v1/chat/completions"
+
+
+def now_ms() -> int:
+    return int(time.time() * 1000)
+
+
+def curl_chat(payload: dict, stream: bool) -> Tuple[int, int, int]:
+    """
+    Returns: (http_code, request_started_ms, last_token_ms_or_response_end_ms)
+    For non-stream requests, 3rd value is response-end timestamp.
+    """
+    request_started = now_ms()
+    cmd = [
+        "curl",
+        "-sS",
+        "-X",
+        "POST",
+        MISTRAL_ENDPOINT,
+        "-H",
+        "Content-Type: application/json",
+        "-H",
+        f"Authorization: Bearer {MISTRAL_API_KEY}",
+        "--data-binary",
+        json.dumps(payload),
+        "-w",
+        "\nHTTP_STATUS:%{http_code}\n",
+    ]
+    if stream:
+        cmd.insert(1, "-N")
+
+    proc = subprocess.Popen(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,
+    )
+
+    last_token_ms = request_started
+    http_code = 0
+    assert proc.stdout is not None
+    for line in proc.stdout:
+        line = line.rstrip("\n")
+        if line.startswith("data:"):
+            data = line[5:].strip()
+            if data and data != "[DONE]":
+                last_token_ms = now_ms()
+        elif line.startswith("HTTP_STATUS:"):
+            try:
+                http_code = int(line.split(":", 1)[1].strip())
+            except ValueError:
+                http_code = 0
+
+    exit_code = proc.wait()
+    if exit_code != 0:
+        raise RuntimeError(f"curl failed with exit code {exit_code}")
+
+    if not stream:
+        last_token_ms = now_ms()
+    return http_code, request_started, last_token_ms
+
+
+def sleep_until(target_ms: int) -> None:
+    remaining = target_ms - now_ms()
+    if remaining > 0:
+        time.sleep(remaining / 1000.0)
+
+
+def probe_last_token_mode(delays: List[int]) -> None:
+    print("=== PROBE: ab_letztem_token ===")
+    min_success = None
+    for delay in delays:
+        stream_payload = {
+            "model": MISTRAL_MODEL,
+            "messages": [{"role": "user", "content": "Sag nur OK."}],
+            "max_tokens": 32,
+            "stream": True,
+        }
+        code, _, last_token = curl_chat(stream_payload, stream=True)
+        if code != 200:
+            print(f"baseline_stream_failed http={code}")
+            continue
+
+        sleep_until(last_token + delay)
+        probe_payload = {
+            "model": MISTRAL_MODEL,
+            "messages": [{"role": "user", "content": "OK?"}],
+            "max_tokens": 1,
+            "stream": False,
+        }
+        probe_code, _, _ = curl_chat(probe_payload, stream=False)
+        print(f"delay={delay}ms http={probe_code}")
+        if min_success is None and probe_code == 200:
+            min_success = delay
+    print(f"min_success_delay_ms={min_success}")
+    print()
+
+
+def probe_request_start_mode(delays: List[int]) -> None:
+    print("=== PROBE: ab_request_start ===")
+    min_success = None
+    for delay in delays:
+        baseline_payload = {
+            "model": MISTRAL_MODEL,
+            "messages": [{"role": "user", "content": "Sag nur OK."}],
+            "max_tokens": 32,
+            "stream": True,
+        }
+        request_started = now_ms()
+        baseline_cmd = [
+            "curl",
+            "-sS",
+            "-N",
+            "-X",
+            "POST",
+            MISTRAL_ENDPOINT,
+            "-H",
+            "Content-Type: application/json",
+            "-H",
+            f"Authorization: Bearer {MISTRAL_API_KEY}",
+            "--data-binary",
+            json.dumps(baseline_payload),
+            "-w",
+            "\nHTTP_STATUS:%{http_code}\n",
+        ]
+        baseline_proc = subprocess.Popen(
+            baseline_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+
+        sleep_until(request_started + delay)
+        probe_payload = {
+            "model": MISTRAL_MODEL,
+            "messages": [{"role": "user", "content": "OK?"}],
+            "max_tokens": 1,
+            "stream": False,
+        }
+        probe_code, _, _ = curl_chat(probe_payload, stream=False)
+        print(f"delay={delay}ms http={probe_code}")
+        if min_success is None and probe_code == 200:
+            min_success = delay
+
+        baseline_output, _ = baseline_proc.communicate()
+        baseline_status = 0
+        for line in baseline_output.splitlines():
+            if line.startswith("HTTP_STATUS:"):
+                try:
+                    baseline_status = int(line.split(":", 1)[1].strip())
+                except ValueError:
+                    baseline_status = 0
+        if baseline_status != 200:
+            print(f"baseline_stream_failed http={baseline_status}")
+    print(f"min_success_delay_ms={min_success}")
+    print()
+
+
+if __name__ == "__main__":
+    step_delays = list(range(100, 3001, 100))
+    probe_last_token_mode(step_delays)
+    probe_request_start_mode(step_delays)
diff --git a/scripts/mistral_cooldown_probe.sh b/scripts/mistral_cooldown_probe.sh
new file mode 100755
index 00000000..673aa4ce
--- /dev/null
+++ b/scripts/mistral_cooldown_probe.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+exec python3 "$SCRIPT_DIR/mistral_cooldown_probe.py"