diff --git a/YouCut_20250526_184250539 github.mp4 b/YouCut_20250526_184250539 github.mp4 deleted file mode 100644 index efa13f83..00000000 Binary files a/YouCut_20250526_184250539 github.mp4 and /dev/null differ diff --git a/app-release-signed.apk b/app-release-signed.apk deleted file mode 100644 index 4c3ade4f..00000000 Binary files a/app-release-signed.apk and /dev/null differ diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt index 00489d4f..3a858a5f 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt @@ -15,6 +15,7 @@ import kotlinx.serialization.json.JsonClassDiscriminator import kotlinx.serialization.modules.SerializersModule import kotlinx.serialization.modules.polymorphic import kotlinx.serialization.modules.subclass +import com.google.ai.sample.network.MistralRequestCoordinator import okhttp3.MediaType.Companion.toMediaType import okhttp3.OkHttpClient import okhttp3.Request @@ -70,7 +71,7 @@ data class ServiceMistralResponseMessage( val content: String ) -internal suspend fun callMistralApi(modelName: String, apiKey: String, chatHistory: List, inputContent: Content): Pair { +internal suspend fun callMistralApi(modelName: String, apiKeys: List, chatHistory: List, inputContent: Content): Pair { var responseText: String? = null var errorMessage: String? = null @@ -126,10 +127,18 @@ internal suspend fun callMistralApi(modelName: String, apiKey: String, chatHisto .url("https://api.mistral.ai/v1/chat/completions") .post(jsonBody.toRequestBody(mediaType)) .addHeader("Content-Type", "application/json") - .addHeader("Authorization", "Bearer $apiKey") + .addHeader("Authorization", "Bearer ${apiKeys.first()}") .build() - client.newCall(request).execute().use { response -> + val coordinated = MistralRequestCoordinator.execute(apiKeys = apiKeys, maxAttempts = apiKeys.size * 4 + 8) { key -> + client.newCall( + request.newBuilder() + .header("Authorization", "Bearer $key") + .build() + ).execute() + } + + coordinated.response.use { response -> val responseBody = response.body?.string() if (!response.isSuccessful) { Log.e("ScreenCaptureService", "Mistral API Error ($response.code): $responseBody") diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt index 4551070a..c268458e 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt @@ -297,7 +297,11 @@ class ScreenCaptureService : Service() { if (apiProvider == ApiProvider.VERCEL) { responseText = callVercelApi(applicationContext, modelName, apiKey, chatHistoryDtos, inputContentDto) } else if (apiProvider == ApiProvider.MISTRAL) { - val result = callMistralApi(modelName, apiKey, chatHistory, inputContent) + val apiKeyManager = ApiKeyManager.getInstance(applicationContext) + val availableKeys = apiKeyManager.getApiKeys(ApiProvider.MISTRAL) + .filter { it.isNotBlank() } + .distinct() + val result = callMistralApi(modelName, availableKeys, chatHistory, inputContent) responseText = result.first errorMessage = result.second } else if (apiProvider == ApiProvider.PUTER) { diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index 002cb2a6..a68aa30d 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -34,6 +34,7 @@ import com.google.ai.sample.feature.multimodal.ModelDownloadManager import com.google.ai.sample.ModelOption import com.google.ai.sample.GenerativeAiViewModelFactory import com.google.ai.sample.InferenceBackend +import com.google.ai.sample.network.MistralRequestCoordinator import com.google.ai.sample.feature.multimodal.dtos.toDto import com.google.ai.sample.feature.multimodal.dtos.TempFilePathCollector import kotlinx.coroutines.Dispatchers @@ -70,7 +71,6 @@ import kotlinx.serialization.modules.subclass import com.google.ai.sample.webrtc.WebRTCSender import com.google.ai.sample.webrtc.SignalingClient import org.webrtc.IceCandidate -import kotlin.math.max class PhotoReasoningViewModel( application: Application, @@ -183,11 +183,14 @@ class PhotoReasoningViewModel( // to avoid re-executing already-executed commands private var incrementalCommandCount = 0 - // Mistral rate limiting per API key (1.1 seconds between requests with same key) - private val mistralNextAllowedRequestAtMsByKey = mutableMapOf() - private var lastMistralTokenTimeMs = 0L - private var lastMistralTokenKey: String? = null - private val MISTRAL_MIN_INTERVAL_MS = 1100L + private data class QueuedMistralScreenshotRequest( + val bitmap: Bitmap, + val screenshotUri: String, + val screenInfo: String? + ) + private val mistralAutoScreenshotQueueLock = Any() + private var mistralAutoScreenshotInFlight = false + private var queuedMistralScreenshotRequest: QueuedMistralScreenshotRequest? = null // Accumulated full text during streaming for incremental command parsing private var streamingAccumulatedText = StringBuilder() @@ -609,6 +612,7 @@ class PhotoReasoningViewModel( val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() clearStaleErrorState() + stopExecutionFlag.set(false) // Check for Human Expert model if (currentModel == ModelOption.HUMAN_EXPERT) { @@ -1024,15 +1028,16 @@ class PhotoReasoningViewModel( ) } -private fun reasonWithMistral( - userInput: String, - selectedImages: List, - screenInfoForPrompt: String? = null, - imageUrisForChat: List? = null -) { - _uiState.value = PhotoReasoningUiState.Loading - val context = appContext - val apiKeyManager = ApiKeyManager.getInstance(context) + private fun reasonWithMistral( + userInput: String, + selectedImages: List, + screenInfoForPrompt: String? = null, + imageUrisForChat: List? = null + ) { + _uiState.value = PhotoReasoningUiState.Loading + _showStopNotificationFlow.value = true + val context = appContext + val apiKeyManager = ApiKeyManager.getInstance(context) val initialApiKey = apiKeyManager.getCurrentApiKey(ApiProvider.MISTRAL) if (initialApiKey.isNullOrEmpty()) { @@ -1054,7 +1059,8 @@ private fun reasonWithMistral( resetStreamingCommandState() - viewModelScope.launch(Dispatchers.IO) { + currentReasoningJob?.cancel() + currentReasoningJob = viewModelScope.launch(Dispatchers.IO) { try { val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() val genSettings = com.google.ai.sample.util.GenerationSettingsPreferences.loadSettings(context, currentModel.modelName) @@ -1132,124 +1138,32 @@ private fun reasonWithMistral( // Validate that we have at least one key before proceeding require(availableKeys.isNotEmpty()) { "No valid Mistral API keys available after filtering" } - - fun markKeyCooldown(key: String, referenceTimeMs: Long) { - val nextAllowedAt = referenceTimeMs + MISTRAL_MIN_INTERVAL_MS - val existing = mistralNextAllowedRequestAtMsByKey[key] ?: 0L - mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt) - } - - fun remainingWaitForKeyMs(key: String, nowMs: Long): Long { - val nextAllowedAt = mistralNextAllowedRequestAtMsByKey[key] ?: 0L - return (nextAllowedAt - nowMs).coerceAtLeast(0L) - } - - fun isRetryableMistralFailure(code: Int): Boolean { - return code == 429 || code >= 500 - } - - var response: okhttp3.Response? = null - var selectedKeyForResponse: String? = null - var consecutiveFailures = 0 - var blockedKeysThisRound = mutableSetOf() - - val maxAttempts = availableKeys.size * 2 + 3 // Allow cycling through all keys at least twice - while (response == null && consecutiveFailures < maxAttempts) { - if (stopExecutionFlag.get()) break - - val now = System.currentTimeMillis() - val keyPool = availableKeys.filter { it !in blockedKeysThisRound }.ifEmpty { - blockedKeysThisRound.clear() - availableKeys - } - - val keyWithLeastWait = keyPool.minByOrNull { remainingWaitForKeyMs(it, now) } ?: availableKeys.first() - val waitMs = remainingWaitForKeyMs(keyWithLeastWait, now) - if (waitMs > 0L) { - delay(waitMs) + val maxAttempts = availableKeys.size * 4 + 8 + val coordinated = MistralRequestCoordinator.execute( + apiKeys = availableKeys, + maxAttempts = maxAttempts + ) { selectedKey -> + if (stopExecutionFlag.get()) { + throw IOException("Mistral request aborted.") } - - val selectedKey = keyWithLeastWait - selectedKeyForResponse = selectedKey - - try { - val attemptResponse = client.newCall(buildRequest(selectedKey)).execute() - val requestEndMs = System.currentTimeMillis() - markKeyCooldown(selectedKey, requestEndMs) - - if (attemptResponse.isSuccessful) { - response = attemptResponse - break - } - - val isRetryable = isRetryableMistralFailure(attemptResponse.code) - if (!isRetryable) { - val errBody = attemptResponse.body?.string() - attemptResponse.close() - throw IllegalStateException("Mistral Error ${attemptResponse.code}: $errBody") - } - - attemptResponse.close() - blockedKeysThisRound.add(selectedKey) - consecutiveFailures++ - withContext(Dispatchers.Main) { - replaceAiMessageText( - "Mistral temporär nicht verfügbar (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...", - isPending = true - ) - } - } catch (e: IOException) { - val requestEndMs = System.currentTimeMillis() - markKeyCooldown(selectedKey, requestEndMs) - blockedKeysThisRound.add(selectedKey) - consecutiveFailures++ - if (consecutiveFailures >= 5) { - throw IOException("Mistral request failed after 5 attempts: ${e.message}", e) - } - withContext(Dispatchers.Main) { - replaceAiMessageText( - if (consecutiveFailures >= maxAttempts) { - throw IOException("Mistral request failed after $maxAttempts attempts: ${e.message}", e) - ) - } - } - "Mistral Netzwerkfehler (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...", - - if (stopExecutionFlag.get()) { - throw IOException("Mistral request aborted.") + client.newCall(buildRequest(selectedKey)).execute() } - - val finalResponse = response ?: throw IOException("Mistral request failed after 5 attempts.") + val finalResponse = coordinated.response if (!finalResponse.isSuccessful) { val errBody = finalResponse.body?.string() finalResponse.close() - val finalResponse = response ?: throw IOException("Mistral request failed after $maxAttempts attempts.") + throw IOException("Mistral Error ${finalResponse.code}: $errBody") } val body = finalResponse.body ?: throw IOException("Empty response body from Mistral") val aiResponseText = openAiStreamParser.parse(body) { accText -> - selectedKeyForResponse?.let { key -> - lastMistralTokenKey = key - lastMistralTokenTimeMs = System.currentTimeMillis() - markKeyCooldown(key, lastMistralTokenTimeMs) - } ?: run { - Log.w(TAG, "selectedKeyForResponse is null during streaming callback") - } withContext(Dispatchers.Main) { replaceAiMessageText(accText, isPending = true) processCommandsIncrementally(accText) } } finalResponse.close() - selectedKeyForResponse?.let { key -> - val reference = if (lastMistralTokenKey == key && lastMistralTokenTimeMs > 0L) { - lastMistralTokenTimeMs - } else { - System.currentTimeMillis() - } - markKeyCooldown(key, reference) - } withContext(Dispatchers.Main) { _uiState.value = PhotoReasoningUiState.Success(aiResponseText) @@ -1261,9 +1175,15 @@ private fun reasonWithMistral( withContext(Dispatchers.Main) { Log.e(TAG, "Mistral API call failed", e) _uiState.value = PhotoReasoningUiState.Error(e.message ?: "Unknown error") + _chatState.replaceLastPendingMessage() appendErrorMessage("Error: ${e.message}") saveChatHistory(context) } + } finally { + withContext(Dispatchers.Main) { + releaseAndDrainMistralAutoScreenshotQueue() + refreshStopButtonState() + } } } } @@ -2360,16 +2280,22 @@ private fun processCommands(text: String) { _commandExecutionStatus.value = status } - // Create prompt with screen information if available - val genericAnalysisPrompt = createGenericScreenshotPrompt() - - // Re-send the query with only the latest screenshot - reason( - userInput = genericAnalysisPrompt, - selectedImages = listOf(bitmap), - screenInfoForPrompt = screenInfo, - imageUrisForChat = listOf(screenshotUri.toString()) // Add this argument - ) + val currentModel = GenerativeAiViewModelFactory.getCurrentModel() + if (currentModel.apiProvider == ApiProvider.MISTRAL) { + enqueueMistralAutoScreenshotRequest( + bitmap = bitmap, + screenshotUri = screenshotUri.toString(), + screenInfo = screenInfo + ) + } else { + // Re-send the query with only the latest screenshot + reason( + userInput = createGenericScreenshotPrompt(), + selectedImages = listOf(bitmap), + screenInfoForPrompt = screenInfo, + imageUrisForChat = listOf(screenshotUri.toString()) + ) + } PhotoReasoningScreenshotUiNotifier.showAddedToConversation(context) } else { @@ -2392,5 +2318,57 @@ private fun processCommands(text: String) { } } } + + private fun enqueueMistralAutoScreenshotRequest( + bitmap: Bitmap, + screenshotUri: String, + screenInfo: String? + ) { + val request = QueuedMistralScreenshotRequest( + bitmap = bitmap, + screenshotUri = screenshotUri, + screenInfo = screenInfo + ) + var shouldStartNow = false + synchronized(mistralAutoScreenshotQueueLock) { + if (mistralAutoScreenshotInFlight) { + queuedMistralScreenshotRequest = request + Log.d(TAG, "Mistral auto screenshot request queued (latest wins).") + } else { + mistralAutoScreenshotInFlight = true + shouldStartNow = true + } + } + if (shouldStartNow) { + dispatchMistralAutoScreenshotRequest(request) + } + } + + private fun dispatchMistralAutoScreenshotRequest(request: QueuedMistralScreenshotRequest) { + val genericAnalysisPrompt = createGenericScreenshotPrompt() + reasonWithMistral( + userInput = genericAnalysisPrompt, + selectedImages = listOf(request.bitmap), + screenInfoForPrompt = request.screenInfo, + imageUrisForChat = listOf(request.screenshotUri) + ) + } + + private fun releaseAndDrainMistralAutoScreenshotQueue() { + val nextRequest: QueuedMistralScreenshotRequest? = synchronized(mistralAutoScreenshotQueueLock) { + val queued = queuedMistralScreenshotRequest + if (queued == null) { + mistralAutoScreenshotInFlight = false + null + } else { + queuedMistralScreenshotRequest = null + queued + } + } + if (nextRequest != null) { + Log.d(TAG, "Draining queued Mistral auto screenshot request.") + dispatchMistralAutoScreenshotRequest(nextRequest) + } + } } diff --git a/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt b/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt new file mode 100644 index 00000000..2cdf7bd0 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/network/MistralRequestCoordinator.kt @@ -0,0 +1,117 @@ +package com.google.ai.sample.network + +import kotlinx.coroutines.delay +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import okhttp3.Response +import kotlin.math.max +import kotlin.math.roundToLong + +internal data class MistralCoordinatedResponse( + val response: Response, + val apiKey: String +) + +internal object MistralRequestCoordinator { + private const val MIN_INTERVAL_MS = 1500L + private val cooldownMutex = Mutex() + private val nextAllowedRequestAtMsByKey = mutableMapOf() + + private suspend fun markKeyCooldown( + key: String, + referenceTimeMs: Long, + extraDelayMs: Long = 0L + ) { + val nextAllowedAt = referenceTimeMs + max(MIN_INTERVAL_MS, extraDelayMs.coerceAtLeast(0L)) + cooldownMutex.withLock { + val existing = nextAllowedRequestAtMsByKey[key] ?: 0L + nextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt) + } + } + + private suspend fun remainingWaitForKeyMs(key: String, nowMs: Long): Long { + return cooldownMutex.withLock { + val nextAllowedAt = nextAllowedRequestAtMsByKey[key] ?: 0L + (nextAllowedAt - nowMs).coerceAtLeast(0L) + } + } + + private fun parseRetryAfterMs(headerValue: String?): Long? { + if (headerValue.isNullOrBlank()) return null + val seconds = headerValue.trim().toDoubleOrNull() ?: return null + return (seconds * 1000.0).roundToLong().coerceAtLeast(0L) + } + + private fun parseRateLimitResetDelayMs(response: Response, nowMs: Long): Long? { + val resetHeader = response.header("x-ratelimit-reset") ?: return null + val resetEpochSeconds = resetHeader.trim().toLongOrNull() ?: return null + val resetMs = resetEpochSeconds * 1000L + return (resetMs - nowMs).coerceAtLeast(0L) + } + + private fun adaptiveRetryDelayMs(failureCount: Int): Long { + val cappedExponent = (failureCount - 1).coerceIn(0, 5) + return 1000L shl cappedExponent + } + + private fun isRetryableFailure(code: Int): Boolean = code == 429 || code >= 500 + + suspend fun execute( + apiKeys: List, + maxAttempts: Int = apiKeys.size * 4 + 8, + request: suspend (apiKey: String) -> Response + ): MistralCoordinatedResponse { + require(apiKeys.isNotEmpty()) { "No Mistral API keys provided." } + + var consecutiveFailures = 0 + var blockedKeysThisRound = mutableSetOf() + + while (consecutiveFailures < maxAttempts) { + val now = System.currentTimeMillis() + val keyPool = apiKeys.filter { it !in blockedKeysThisRound }.ifEmpty { + blockedKeysThisRound.clear() + apiKeys + } + + var selectedKey = keyPool.first() + var waitMs = Long.MAX_VALUE + for (candidate in keyPool) { + val candidateWait = remainingWaitForKeyMs(candidate, now) + if (candidateWait < waitMs) { + waitMs = candidateWait + selectedKey = candidate + } + } + if (waitMs > 0L) { + delay(waitMs) + } + + try { + val response = request(selectedKey) + val requestEndMs = System.currentTimeMillis() + val retryAfterMs = parseRetryAfterMs(response.header("Retry-After")) + val resetDelayMs = parseRateLimitResetDelayMs(response, requestEndMs) + val serverRequestedDelayMs = max(retryAfterMs ?: 0L, resetDelayMs ?: 0L) + markKeyCooldown(selectedKey, requestEndMs, serverRequestedDelayMs) + + if (response.isSuccessful || !isRetryableFailure(response.code)) { + return MistralCoordinatedResponse(response = response, apiKey = selectedKey) + } + + response.close() + blockedKeysThisRound.add(selectedKey) + consecutiveFailures++ + val adaptiveDelay = adaptiveRetryDelayMs(consecutiveFailures) + markKeyCooldown(selectedKey, requestEndMs, max(serverRequestedDelayMs, adaptiveDelay)) + } catch (e: Exception) { + val requestEndMs = System.currentTimeMillis() + blockedKeysThisRound.add(selectedKey) + consecutiveFailures++ + markKeyCooldown(selectedKey, requestEndMs, adaptiveRetryDelayMs(consecutiveFailures)) + if (consecutiveFailures >= maxAttempts) throw e + } + } + + throw IllegalStateException("Mistral request failed after $maxAttempts attempts.") + } +} diff --git a/scripts/mistral_cooldown_probe.py b/scripts/mistral_cooldown_probe.py new file mode 100755 index 00000000..d470a62d --- /dev/null +++ b/scripts/mistral_cooldown_probe.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +import json +import subprocess +import time +from typing import Tuple, List + +MISTRAL_API_KEY = "zsEegAJFadHH4uooe2lW0HVNmy1rpqGT" +MISTRAL_MODEL = "mistral-large-latest" +MISTRAL_ENDPOINT = "https://api.mistral.ai/v1/chat/completions" + + +def now_ms() -> int: + return int(time.time() * 1000) + + +def curl_chat(payload: dict, stream: bool) -> Tuple[int, int, int]: + """ + Returns: (http_code, request_started_ms, last_token_ms_or_response_end_ms) + For non-stream requests, 3rd value is response-end timestamp. + """ + request_started = now_ms() + cmd = [ + "curl", + "-sS", + "-X", + "POST", + MISTRAL_ENDPOINT, + "-H", + "Content-Type: application/json", + "-H", + f"Authorization: Bearer {MISTRAL_API_KEY}", + "--data-binary", + json.dumps(payload), + "-w", + "\nHTTP_STATUS:%{http_code}\n", + ] + if stream: + cmd.insert(1, "-N") + + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + + last_token_ms = request_started + http_code = 0 + assert proc.stdout is not None + for line in proc.stdout: + line = line.rstrip("\n") + if line.startswith("data:"): + data = line[5:].strip() + if data and data != "[DONE]": + last_token_ms = now_ms() + elif line.startswith("HTTP_STATUS:"): + try: + http_code = int(line.split(":", 1)[1].strip()) + except ValueError: + http_code = 0 + + exit_code = proc.wait() + if exit_code != 0: + raise RuntimeError(f"curl failed with exit code {exit_code}") + + if not stream: + last_token_ms = now_ms() + return http_code, request_started, last_token_ms + + +def sleep_until(target_ms: int) -> None: + remaining = target_ms - now_ms() + if remaining > 0: + time.sleep(remaining / 1000.0) + + +def probe_last_token_mode(delays: List[int]) -> None: + print("=== PROBE: ab_letztem_token ===") + min_success = None + for delay in delays: + stream_payload = { + "model": MISTRAL_MODEL, + "messages": [{"role": "user", "content": "Sag nur OK."}], + "max_tokens": 32, + "stream": True, + } + code, _, last_token = curl_chat(stream_payload, stream=True) + if code != 200: + print(f"baseline_stream_failed http={code}") + continue + + sleep_until(last_token + delay) + probe_payload = { + "model": MISTRAL_MODEL, + "messages": [{"role": "user", "content": "OK?"}], + "max_tokens": 1, + "stream": False, + } + probe_code, _, _ = curl_chat(probe_payload, stream=False) + print(f"delay={delay}ms http={probe_code}") + if min_success is None and probe_code == 200: + min_success = delay + print(f"min_success_delay_ms={min_success}") + print() + + +def probe_request_start_mode(delays: List[int]) -> None: + print("=== PROBE: ab_request_start ===") + min_success = None + for delay in delays: + baseline_payload = { + "model": MISTRAL_MODEL, + "messages": [{"role": "user", "content": "Sag nur OK."}], + "max_tokens": 32, + "stream": True, + } + request_started = now_ms() + baseline_cmd = [ + "curl", + "-sS", + "-N", + "-X", + "POST", + MISTRAL_ENDPOINT, + "-H", + "Content-Type: application/json", + "-H", + f"Authorization: Bearer {MISTRAL_API_KEY}", + "--data-binary", + json.dumps(baseline_payload), + "-w", + "\nHTTP_STATUS:%{http_code}\n", + ] + baseline_proc = subprocess.Popen( + baseline_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + + sleep_until(request_started + delay) + probe_payload = { + "model": MISTRAL_MODEL, + "messages": [{"role": "user", "content": "OK?"}], + "max_tokens": 1, + "stream": False, + } + probe_code, _, _ = curl_chat(probe_payload, stream=False) + print(f"delay={delay}ms http={probe_code}") + if min_success is None and probe_code == 200: + min_success = delay + + baseline_output, _ = baseline_proc.communicate() + baseline_status = 0 + for line in baseline_output.splitlines(): + if line.startswith("HTTP_STATUS:"): + try: + baseline_status = int(line.split(":", 1)[1].strip()) + except ValueError: + baseline_status = 0 + if baseline_status != 200: + print(f"baseline_stream_failed http={baseline_status}") + print(f"min_success_delay_ms={min_success}") + print() + + +if __name__ == "__main__": + step_delays = list(range(100, 3001, 100)) + probe_last_token_mode(step_delays) + probe_request_start_mode(step_delays) diff --git a/scripts/mistral_cooldown_probe.sh b/scripts/mistral_cooldown_probe.sh new file mode 100755 index 00000000..673aa4ce --- /dev/null +++ b/scripts/mistral_cooldown_probe.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec python3 "$SCRIPT_DIR/mistral_cooldown_probe.py"