Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/dev.lock.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 20 additions & 19 deletions .github/workflows/smoke-pi.lock.yml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions .github/workflows/smoke-pi.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ engine:
id: pi
model: copilot/claude-sonnet-4-20250514
strict: true
runtimes:
node: {}
imports:
- shared/gh.md
- shared/reporting-otlp.md
Expand Down
44 changes: 40 additions & 4 deletions actions/setup/js/awf_reflect.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const AWF_API_PROXY_REFLECT_URL = "http://api-proxy:10000/reflect";
// co-located with other AWF firewall observability data so it is included in the agent artifact.
const AWF_REFLECT_OUTPUT_PATH = "/tmp/gh-aw/sandbox/firewall/awf-reflect.json";
// Milliseconds to wait for the /reflect endpoint before giving up.
const AWF_REFLECT_TIMEOUT_MS = 5000;
const AWF_REFLECT_TIMEOUT_MS = 60000;
// Milliseconds to wait for each models_url fallback fetch (shorter than the main reflect timeout).
const AWF_MODELS_URL_TIMEOUT_MS = 3000;
// Gemini model name prefix stripped from model IDs in the Gemini models API response.
Expand Down Expand Up @@ -165,7 +165,15 @@ async function enrichReflectModels(reflectData, timeoutMs, logger) {
* logger?: (msg: string) => void,
* writeFileSync?: (path: string, data: string, options: object) => void,
* }=} options
* @returns {Promise<void>}
* @returns {Promise<{
* ok: boolean,
* reflectUrl: string,
* outputPath: string,
* bytesWritten?: number,
* reason?: "unexpected_status"|"timeout"|"request_failed",
* status?: number,
* error?: string,
* }>}
*/
async function fetchAWFReflect(options) {
const reflectUrl = (options && options.reflectUrl) || AWF_API_PROXY_REFLECT_URL;
Expand All @@ -178,7 +186,9 @@ async function fetchAWFReflect(options) {
logger(`awf-reflect: fetching ${reflectUrl} (timeout=${timeoutMs}ms)`);

const ac = new AbortController();
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
logger(`awf-reflect: request timed out after ${timeoutMs}ms`);
ac.abort();
}, timeoutMs);
Expand All @@ -187,7 +197,13 @@ async function fetchAWFReflect(options) {
const res = await fetch(reflectUrl, { signal: ac.signal });
if (!res.ok) {
logger(`awf-reflect: unexpected status ${res.status}, skipping`);
return;
return {
ok: false,
reflectUrl,
outputPath,
reason: "unexpected_status",
status: res.status,
};
}
const reflectData = await res.json();
// Attempt to fill in null models for configured providers by fetching directly
Expand All @@ -198,15 +214,35 @@ async function fetchAWFReflect(options) {
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
writeFile(outputPath, enrichedBody, { encoding: "utf8" });
logger(`awf-reflect: saved ${enrichedBody.length}B to ${outputPath}`);
return {
ok: true,
reflectUrl,
outputPath,
bytesWritten: enrichedBody.length,
};
} catch (err) {
const e = /** @type {Error} */ err;
if (e.name === "AbortError") {
return; // already logged above
return {
ok: false,
reflectUrl,
outputPath,
reason: "timeout",
error: timedOut ? `request timed out after ${timeoutMs}ms` : e.message,
};
}
logger(`awf-reflect: request failed: ${e.message}`);
return {
ok: false,
reflectUrl,
outputPath,
reason: "request_failed",
error: e.message,
};
} finally {
clearTimeout(timer);
}

}

if (typeof module !== "undefined" && module.exports) {
Expand Down
26 changes: 22 additions & 4 deletions actions/setup/js/awf_reflect.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ describe("awf_reflect.cjs", () => {
it("exports expected default values", () => {
expect(AWF_API_PROXY_REFLECT_URL).toBe("http://api-proxy:10000/reflect");
expect(AWF_REFLECT_OUTPUT_PATH).toBe("/tmp/gh-aw/sandbox/firewall/awf-reflect.json");
expect(AWF_REFLECT_TIMEOUT_MS).toBe(5000);
expect(AWF_REFLECT_TIMEOUT_MS).toBe(60000);
expect(AWF_MODELS_URL_TIMEOUT_MS).toBe(3000);
expect(GEMINI_MODEL_NAME_PREFIX).toBe("models/");
});
Expand Down Expand Up @@ -198,14 +198,20 @@ describe("awf_reflect.cjs", () => {
const logs = [];

try {
await fetchAWFReflect({
const result = await fetchAWFReflect({
reflectUrl: "http://api-proxy:10000/reflect",
outputPath,
timeoutMs: 3000,
modelsTimeoutMs: 1000,
logger: msg => logs.push(msg),
});

expect(result).toEqual({
ok: true,
reflectUrl: "http://api-proxy:10000/reflect",
outputPath,
bytesWritten: expect.any(Number),
});
const saved = JSON.parse(fs.readFileSync(outputPath, "utf8"));
expect(saved.endpoints[0].models).toEqual(["gpt-4o", "gpt-4o-mini"]);
expect(logs.some(l => l.includes("saved "))).toBe(true);
Expand All @@ -224,7 +230,13 @@ describe("awf_reflect.cjs", () => {
timeoutMs: 500,
logger: msg => logs.push(msg),
})
).resolves.toBeUndefined();
).resolves.toEqual({
ok: false,
reflectUrl: "http://api-proxy:10000/reflect",
outputPath: "/tmp/gh-aw-test-noop.json",
reason: "request_failed",
error: "ECONNREFUSED",
});
expect(logs.some(l => l.includes("request failed"))).toBe(true);
});

Expand All @@ -238,7 +250,13 @@ describe("awf_reflect.cjs", () => {
timeoutMs: 500,
logger: msg => logs.push(msg),
})
).resolves.toBeUndefined();
).resolves.toEqual({
ok: false,
reflectUrl: "http://api-proxy:10000/reflect",
outputPath: "/tmp/gh-aw-test-noop.json",
reason: "unexpected_status",
status: 503,
});
expect(logs.some(l => l.includes("unexpected status 503"))).toBe(true);
});

Expand Down
135 changes: 133 additions & 2 deletions actions/setup/js/pi_provider.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,94 @@ function resolveGatewayUrl(provider) {
return `http://api-proxy:${port}`;
}

/**
* Join a base URL and relative API path without duplicating slashes.
*
* @param {string} baseUrl
* @param {string} apiPath
* @returns {string}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice helper — consider also normalizing leading slashes on apiPath to defend against an accidental // in joinApiUrl. (smoke test review)

*/
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] Missing edge case tests for joinApiUrl — what happens with multiple trailing slashes or empty apiPath?

💡 Suggested tests
describe("joinApiUrl", () => {
  it("removes single trailing slash from baseUrl", () => {
    expect(module.joinApiUrl("(api.com/redacted) "/path"))
      .toBe("(api.com/redacted)
  });

  it("removes multiple trailing slashes from baseUrl", () => {
    expect(module.joinApiUrl("(api.com/redacted) "/path"))
      .toBe("(api.com/redacted)
  });

  it("handles baseUrl without trailing slash", () => {
    expect(module.joinApiUrl("(api.com/redacted) "/path"))
      .toBe("(api.com/redacted)
  });

  it("handles empty apiPath", () => {
    expect(module.joinApiUrl("(api.com/redacted) ""))
      .toBe("(api.com/redacted)
  });
});

The current implementation looks correct, but explicit tests prevent future regressions when someone "simplifies" it.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit (smoke-test): joinApiUrl cleanly handles trailing slashes — nice defensive coding. Consider an inline unit test alongside the helper.

function joinApiUrl(baseUrl, apiPath) {
return `${baseUrl.replace(/\/+$/, "")}${apiPath}`;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regex pattern may fail on URLs with multiple consecutive slashes: The regex /+$ only strips trailing slashes, but baseUrl could have internal doubled slashes (e.g., "(apiproxy/redacted) resulting in "(apiproxy/redacted)

💡 Suggested fix
function joinApiUrl(baseUrl, apiPath) {
  const cleanBase = baseUrl.replace(/\/+$/, "");
  const cleanPath = apiPath.replace(/^\/+/, "");
  return `${cleanBase}/${cleanPath}`;
}

This ensures exactly one slash between base and path regardless of input formatting.

}
Comment on lines +92 to +94

/**
* Resolve the Pi model's inferred provider request target for logging.
*
* @param {any} model
* @returns {{ api: string, method: string, url: string }}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning "(unknown api)" and "(baseUrl unavailable)" as sentinels is fine for logs, but a structured null could simplify downstream filtering. (smoke test review)

*/
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type annotation could be more precise: model is typed as any, but based on usage it appears to have a known shape ({ api?: string, baseUrl?: string, ... }).

💡 Suggested improvement
/**
 * `@typedef` {Object} PiModel
 * `@property` {string} [api]
 * `@property` {string} [baseUrl]
 * `@property` {string} [provider]
 * `@property` {string} [id]
 */

/**
 * Resolve the Pi model's inferred provider request target for logging.
 *
 * `@param` {PiModel|undefined} model
 * `@returns` {{ api: string, method: string, url: string }}
 */
function resolveProviderRequestTarget(model) {
  // ...
}

Improves IntelliSense and catches type errors at authoring time.

function resolveProviderRequestTarget(model) {
const api = typeof model?.api === "string" && model.api ? model.api : "(unknown api)";
const method = "POST";
const baseUrl = typeof model?.baseUrl === "string" && model.baseUrl ? model.baseUrl : "";

if (!baseUrl) {
return { api, method, url: "(baseUrl unavailable)" };
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good defensive check here - handles missing baseUrl gracefully.

}

switch (api) {
case "openai-completions":
return { api, method, url: joinApiUrl(baseUrl, "/chat/completions") };
case "openai-responses":
case "azure-openai-responses":
case "openai-codex-responses":
return { api, method, url: joinApiUrl(baseUrl, "/responses") };
case "anthropic":
case "anthropic-messages":
return { api, method, url: joinApiUrl(baseUrl, "/messages") };
case "mistral-conversations":
return { api, method, url: joinApiUrl(baseUrl, "/conversations") };
default:
return { api, method, url: baseUrl };
}
}

/**
* Format response header names for logs without printing sensitive values.
*
* @param {Record<string, string>|undefined|null} headers
* @returns {string}
*/
function formatResponseHeaderNames(headers) {
const names = Object.keys(headers || {})
.map(name => String(name).toLowerCase())
.sort();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] Missing test coverage for formatResponseHeaderNames with edge cases.

💡 Suggested tests
describe("formatResponseHeaderNames", () => {
  it("formats multiple headers alphabetically", () => {
    expect(module.formatResponseHeaderNames({
      "X-Request-ID": "123",
      "Content-Type": "application/json",
      "Accept": "*/*"
    })).toBe("accept,content-type,x-request-id");
  });

  it("returns 'none' for empty object", () => {
    expect(module.formatResponseHeaderNames({})).toBe("none");
  });

  it("handles null safely", () => {
    expect(module.formatResponseHeaderNames(null)).toBe("none");
  });

  it("handles undefined safely", () => {
    expect(module.formatResponseHeaderNames(undefined)).toBe("none");
  });
});

This function is exported and reusable — nail down its contract with tests.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit (smoke-test): formatResponseHeaderNames returns sorted lowercase header names — looks good for deterministic logs.

return names.length > 0 ? names.join(",") : "none";
}
Comment on lines +134 to +139
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch on Headers instance! Me agree this important fix.

Warning

Firewall blocked 6 domains

The following domains were blocked by the firewall during workflow execution:

  • accounts.google.com
  • android.clients.google.com
  • clients2.google.com
  • contentautofill.googleapis.com
  • safebrowsingohttpgateway.googleapis.com
  • www.google.com

To allow these domains, add them to the network.allowed list in your workflow frontmatter:

network:
  allowed:
    - defaults
    - "accounts.google.com"
    - "android.clients.google.com"
    - "clients2.google.com"
    - "contentautofill.googleapis.com"
    - "safebrowsingohttpgateway.googleapis.com"
    - "www.google.com"

See Network Configuration for more information.

📰 BREAKING: Report filed by Smoke Copilot · ● 933.3K

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch on the Headers instance handling! 🎯

Warning

Firewall blocked 6 domains

The following domains were blocked by the firewall during workflow execution:

  • accounts.google.com
  • android.clients.google.com
  • clients2.google.com
  • contentautofill.googleapis.com
  • safebrowsingohttpgateway.googleapis.com
  • www.google.com

To allow these domains, add them to the network.allowed list in your workflow frontmatter:

network:
  allowed:
    - defaults
    - "accounts.google.com"
    - "android.clients.google.com"
    - "clients2.google.com"
    - "contentautofill.googleapis.com"
    - "safebrowsingohttpgateway.googleapis.com"
    - "www.google.com"

See Network Configuration for more information.

📰 BREAKING: Report filed by Smoke Copilot · ● 1.3M


/**
* Log extra context when the AWF /reflect call does not produce a snapshot.
*
* @param {{
* phase: string,
* provider: string,
* model: string,
* result: {
* ok: boolean,
* reflectUrl: string,
* outputPath: string,
* reason?: string,
* status?: number,
* error?: string,
* },
* logger: (msg: string) => void,
* }} params
* @returns {void}
*/
function logReflectFailure(params) {
const { phase, provider, model, result, logger } = params;
if (!result || result.ok) {
return;
}

const status = typeof result.status === "number" ? ` status=${result.status}` : "";
const error = result.error ? ` error=${JSON.stringify(result.error)}` : "";
logger(
`reflect_failure phase=${phase} provider=${provider || "(no provider prefix)"} model=${model || "(not set)"} url=${result.reflectUrl} output=${result.outputPath} reason=${result.reason || "unknown"}${status}${error}`
);
}

/**
* Register a Pi provider and any aliases.
*
Expand Down Expand Up @@ -175,8 +263,44 @@ function registerConfiguredProviders(pi, logger) {
*/
function piProviderExtension(pi) {
const log = DEFAULT_LOGGER;
/** @type {{ api: string, method: string, url: string }|null} */
let lastProviderRequest = null;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mutable state in module scope could cause issues across multiple Pi sessions: lastProviderRequest and lastProviderResponse are shared across all invocations of event handlers.

💡 Consideration

If multiple Pi sessions run concurrently (unlikely in current workflows but possible in future parallel agent scenarios), their requests could overwrite each other's lastProviderRequest state, leading to misattributed error logs.

For now this is acceptable given single-agent workflows, but consider encapsulating this state per Pi instance if parallel sessions become a requirement:

function piProviderExtension(pi) {
  const log = DEFAULT_LOGGER;
  const state = {
    lastProviderRequest: null,
    lastProviderResponse: null,
  };
  registerConfiguredProviders(pi, log);

  pi.on("before_provider_request", (_event, ctx) => {
    state.lastProviderRequest = resolveProviderRequestTarget(ctx && ctx.model);
    // ...
  });
}

This keeps state local to each Pi extension instance.

/** @type {{ status: number, responseHeaders: string }|null} */
let lastProviderResponse = null;
registerConfiguredProviders(pi, log);

pi.on("before_provider_request", (_event, ctx) => {
lastProviderRequest = resolveProviderRequestTarget(ctx && ctx.model);
lastProviderResponse = null;
const provider = ctx?.model?.provider || "(unknown provider)";
const model = ctx?.model?.id || getConfiguredModel() || "(unknown model)";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/diagnose] Race condition risk: lastProviderRequest could be overwritten by concurrent requests.

💡 Diagnosis

If Pi makes overlapping inference calls:

  1. Request A fires → lastProviderRequest = A's target
  2. Request B fires → lastProviderRequest = B's target (overwrites A)
  3. Response A returns → logs B's target for A's response

This misattribution could confuse debugging.

Suggested fix: Use a WeakMap keyed by request context or a correlation ID:

const requestMap = new Map(); // or WeakMap if ctx objects are GC-able

pi.on("before_provider_request", (event, ctx) => {
  const target = resolveProviderRequestTarget(ctx?.model);
  const correlationId = event.correlationId || crypto.randomUUID();
  requestMap.set(correlationId, { target, timestamp: Date.now() });
  log(`provider_request correlation_id=${correlationId} ...`);
});

pi.on("after_provider_response", (event, ctx) => {
  const correlationId = event.correlationId;
  const request = requestMap.get(correlationId) || { target: resolveProviderRequestTarget(ctx?.model) };
  requestMap.delete(correlationId);
  log(`provider_response correlation_id=${correlationId} ...`);
});

Only worth fixing if Pi actually makes concurrent requests — verify behavior first.

log(`provider_request provider=${provider} model=${model} api=${lastProviderRequest.api} method=${lastProviderRequest.method} url=${lastProviderRequest.url}`);
});

pi.on("after_provider_response", (event, ctx) => {
const request = lastProviderRequest || resolveProviderRequestTarget(ctx && ctx.model);
lastProviderResponse = {
status: event.status,
responseHeaders: formatResponseHeaderNames(event.headers),
};
const provider = ctx?.model?.provider || "(unknown provider)";
const model = ctx?.model?.id || getConfiguredModel() || "(unknown model)";
log(`provider_response provider=${provider} model=${model} status=${event.status} method=${request.method} url=${request.url} response_headers=${lastProviderResponse.responseHeaders}`);
});

pi.on("message_end", event => {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/diagnose] Fallback logic masks null context: ctx && ctx.model may hide when context is unexpectedly missing.

💡 Instrumentation opportunity

If ctx is null/undefined or ctx.model is missing, the current code silently falls back:

const request = lastProviderRequest || resolveProviderRequestTarget(ctx && ctx.model);

But this hides a potential Pi SDK contract violation. Consider logging when fallback is used:

let request = lastProviderRequest;
if (!request) {
  log("warning: lastProviderRequest unavailable, resolving from context");
  request = resolveProviderRequestTarget(ctx?.model);
  if (request.url === "(baseUrl unavailable)") {
    log("warning: model context incomplete, missing baseUrl");
  }
}

This makes it obvious when event ordering is unexpected or context is incomplete.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this! 🎯

Warning

Firewall blocked 6 domains

The following domains were blocked by the firewall during workflow execution:

  • accounts.google.com
  • android.clients.google.com
  • clients2.google.com
  • contentautofill.googleapis.com
  • safebrowsingohttpgateway.googleapis.com
  • www.google.com

To allow these domains, add them to the network.allowed list in your workflow frontmatter:

network:
  allowed:
    - defaults
    - "accounts.google.com"
    - "android.clients.google.com"
    - "clients2.google.com"
    - "contentautofill.googleapis.com"
    - "safebrowsingohttpgateway.googleapis.com"
    - "www.google.com"

See Network Configuration for more information.

📰 BREAKING: Report filed by Smoke Copilot · ● 1.7M

const message = event && event.message;
if (message?.role !== "assistant" || message?.stopReason !== "error" || !message?.errorMessage) {
return;
}
const request = lastProviderRequest || { api: message.api || "(unknown api)", method: "POST", url: "(request unavailable)" };
const status = lastProviderResponse ? String(lastProviderResponse.status) : "no-response";
const responseHeaders = lastProviderResponse ? lastProviderResponse.responseHeaders : "none";
log(
`provider_error provider=${message.provider || "(unknown provider)"} model=${message.model || "(unknown model)"} api=${request.api} status=${status} method=${request.method} url=${request.url} response_headers=${responseHeaders} error=${JSON.stringify(message.errorMessage)}`
);
});

pi.on("agent_start", async () => {
const model = getConfiguredModel();
const provider = extractProviderFromModel(model);
Expand All @@ -196,13 +320,14 @@ function piProviderExtension(pi) {
// This is best-effort: failures are logged but do not affect the agent session.
// Skip when AWF_REFLECT_ENABLED is not "1" (e.g. sandbox.agent: false — no api-proxy running).
if (process.env.AWF_REFLECT_ENABLED === "1") {
await fetchAWFReflect({
const result = await fetchAWFReflect({
reflectUrl: AWF_API_PROXY_REFLECT_URL,
outputPath: AWF_REFLECT_OUTPUT_PATH,
timeoutMs: AWF_REFLECT_TIMEOUT_MS,
modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS,
logger: log,
});
logReflectFailure({ phase: "agent_start", provider, model, result, logger: log });
}
});

Expand All @@ -211,13 +336,16 @@ function piProviderExtension(pi) {
// This is best-effort: failures are logged but do not affect the agent exit code.
// Skip when AWF_REFLECT_ENABLED is not "1" (e.g. sandbox.agent: false — no api-proxy running).
if (process.env.AWF_REFLECT_ENABLED === "1") {
await fetchAWFReflect({
const model = getConfiguredModel();
const provider = extractProviderFromModel(model);
const result = await fetchAWFReflect({
reflectUrl: AWF_API_PROXY_REFLECT_URL,
outputPath: AWF_REFLECT_OUTPUT_PATH,
timeoutMs: AWF_REFLECT_TIMEOUT_MS,
modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS,
logger: log,
});
logReflectFailure({ phase: "agent_end", provider, model, result, logger: log });
}
});
}
Expand All @@ -227,3 +355,6 @@ module.exports.getConfiguredModel = getConfiguredModel;
module.exports.extractProviderFromModel = extractProviderFromModel;
module.exports.resolveGatewayUrl = resolveGatewayUrl;
module.exports.registerConfiguredProviders = registerConfiguredProviders;
module.exports.resolveProviderRequestTarget = resolveProviderRequestTarget;
module.exports.formatResponseHeaderNames = formatResponseHeaderNames;
module.exports.logReflectFailure = logReflectFailure;
Loading
Loading