Merge: Skip TestHeaderForwardingProxy in CI environments

privapps · privapps · commit ca3a1cf2d194 · 2026-02-20T11:48:05.000-08:00
diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ This project provides a reverse proxy for GitHub Copilot, exposing OpenAI-compat
 ## Features
 
 - **OAuth Device Flow Authentication**: Secure authentication with GitHub Copilot using the same flow as OpenCode
+- **Vision Support**: Full support for image/vision requests with base64-encoded images in OpenAI-compatible format
 - **Advanced Token Management**: 
   - Proactive token refresh (refreshes at 20% of token lifetime, minimum 5 minutes)
   - Exponential backoff retry logic for failed token refreshes
@@ -452,6 +453,40 @@ curl -X POST http://localhost:8081/v1/chat/completions \
   }'
 ```
 
+### Vision/Image Requests
+
+The proxy fully supports vision capabilities with base64-encoded images in OpenAI-compatible format:
+
+```bash
+# Example with base64-encoded image
+curl -X POST http://localhost:8081/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "messages": [{
+      "role": "user",
+      "content": [
+        {"type": "text", "text": "What is in this image?"},
+        {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ..."}}
+      ]
+    }],
+    "max_tokens": 300
+  }'
+```
+
+**Vision Features:**
+- Supports multi-part message content (text + images)
+- Accepts base64-encoded images as data URIs
+- Supports `detail` parameter (`auto`, `low`, `high`)
+- Compatible with vision-capable models (gpt-4o, gpt-4-vision, etc.)
+- Backward compatible with text-only requests
+
+**Example Script:**
+The repository includes `test_vision_proxy.sh` that demonstrates vision capabilities:
+```bash
+./test_vision_proxy.sh dog.jpeg "Describe this image in detail"
+```
+
 ### Using with OpenAI Python Client
 ```python
 import openai
diff --git a/internal/proxy.go b/internal/proxy.go
@@ -15,11 +15,11 @@ import (
 	"time"
 )
 
-const (
-	copilotAPIBase      = "https://api.githubcopilot.com"
-	chatCompletionsPath = "/chat/completions"
+var copilotAPIBase = "https://api.githubcopilot.com"
+var completionsPath = "/completions"
+var chatCompletionsPath = "/chat/completions"
 
-	// Retry configuration for chat completions
+const (
 	maxChatRetries     = 3
 	baseChatRetryDelay = 1 // seconds
 
@@ -323,41 +323,41 @@ func (s *ProxyService) processProxyRequest(ctx context.Context, w http.ResponseW
 		return fmt.Errorf("bad request: empty request body")
 	}
 
+	var input struct {
+		Model string `json:"model"`
+	}
+	if jsonErr := json.Unmarshal(body, &input); jsonErr != nil {
+		return fmt.Errorf("bad request: invalid JSON: %w", jsonErr)
+	}
 
-    var input struct {
-        Model string `json:"model"`
-    }
-    if jsonErr := json.Unmarshal(body, &input); jsonErr != nil {
-        return fmt.Errorf("bad request: invalid JSON: %w", jsonErr)
-    }
-
-    // AllowedModels validation
-    if len(s.config.AllowedModels) > 0 {
-        allowed := false
-        for _, m := range s.config.AllowedModels {
-            if input.Model == m {
-                allowed = true
-                break
-            }
-        }
-        if !allowed {
-            return fmt.Errorf("bad request: model '%s' is not allowed by allowed_models in config", input.Model)
-        }
-    }
-
-    // Ensure we have a valid token before making the request
-    if tokenErr := s.authService.EnsureValidToken(s.config); tokenErr != nil {
-        Error("Failed to ensure valid token", "error", tokenErr)
-        return NewAuthError("token validation failed", tokenErr)
-    }
+	// AllowedModels validation
+	if len(s.config.AllowedModels) > 0 {
+		allowed := false
+		for _, m := range s.config.AllowedModels {
+			if input.Model == m {
+				allowed = true
+				break
+			}
+		}
+		if !allowed {
+			return fmt.Errorf("bad request: model '%s' is not allowed by allowed_models in config", input.Model)
+		}
+	}
+
+	// Ensure we have a valid token before making the request
+	if tokenErr := s.authService.EnsureValidToken(s.config); tokenErr != nil {
+		Error("Failed to ensure valid token", "error", tokenErr)
+		return NewAuthError("token validation failed", tokenErr)
+	}
 
 	// Create new request to GitHub Copilot
 	var targetURL string
+	base := copilotAPIBase
 	switch r.URL.Path {
 	case "/v1/completions":
-		targetURL = copilotAPIBase + "/completions"
+		targetURL = base + completionsPath
 	case "/v1/chat/completions":
-		targetURL = copilotAPIBase + chatCompletionsPath
+		targetURL = base + chatCompletionsPath
 	default:
 		return fmt.Errorf("unsupported proxy path: %s", r.URL.Path)
 	}
@@ -370,9 +370,21 @@ func (s *ProxyService) processProxyRequest(ctx context.Context, w http.ResponseW
 	}
 
 	// Set headers
+	// Forward content/negotiation headers from client if present; use defaults if missing
+	headersToProxy := []string{"Content-Type", "Accept", "Accept-Encoding", "TE"}
+	defaults := map[string]string{
+		"Content-Type": "application/json",
+		"Accept":       "application/json",
+	}
+	for _, h := range headersToProxy {
+		if v := r.Header.Get(h); v != "" {
+			req.Header.Set(h, v)
+		} else if def, ok := defaults[h]; ok {
+			req.Header.Set(h, def)
+		}
+	}
+
 	req.Header.Set("Authorization", "Bearer "+s.config.CopilotToken)
-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Accept", "application/json")
 	req.Header.Set("User-Agent", s.config.Headers.UserAgent)
 	req.Header.Set("Editor-Version", s.config.Headers.EditorVersion)
 	req.Header.Set("Editor-Plugin-Version", s.config.Headers.EditorPluginVersion)
diff --git a/pkg/transform/transform.go b/pkg/transform/transform.go
@@ -1,6 +1,8 @@
 // Package transform provides OpenAI-compatible request/response structures for github-copilot-svcs.
 package transform
 
+import "encoding/json"
+
 // ChatCompletionRequest ...
 type ChatCompletionRequest struct {
 	Model       string                  `json:"model"`
@@ -10,10 +12,26 @@ type ChatCompletionRequest struct {
 	Stream      bool                    `json:"stream,omitempty"`
 }
 
-// ChatCompletionMessage ...
+// ChatCompletionMessage supports both text-only content (string) and multi-part content (array)
+// for vision/image requests. Content can be either:
+//   - A string for simple text messages
+//   - An array of ContentPart objects for messages with images
 type ChatCompletionMessage struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
+	Role    string          `json:"role"`
+	Content json.RawMessage `json:"content"` // Can be string or []ContentPart
+}
+
+// ContentPart represents a part of a multi-part message (text or image)
+type ContentPart struct {
+	Type     string    `json:"type"`               // "text" or "image_url"
+	Text     string    `json:"text,omitempty"`     // For type="text"
+	ImageURL *ImageURL `json:"image_url,omitempty"` // For type="image_url"
+}
+
+// ImageURL contains the image URL (can be http(s):// or data: URI with base64)
+type ImageURL struct {
+	URL    string `json:"url"`
+	Detail string `json:"detail,omitempty"` // "auto", "low", or "high"
 }
 
 // ChatCompletionResponse ...