From 70a2f76a25fe52059e771992f7b2c48114e11a73 Mon Sep 17 00:00:00 2001
From: claude-code-best <claude-code-best@proton.me>
Date: Sun, 14 Jun 2026 12:26:39 +0800
Subject: [PATCH] =?UTF-8?q?fix(workflow):=20agent=20dead=20=E5=B8=A6=20rea?=
 =?UTF-8?q?son/detail=20+=20prompt=20=E5=8A=A0=E5=8E=8B=20StructuredOutput?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

12 agent audit workflow 8 个 dead，journal 只记 {kind:"dead"} 无信息，
事后无法区分 "agent 没产 StructuredOutput" vs "runAgent 抛错"。
证据指向主因：sonnet 长 tool chain 后忘记调 StructuredOutput，
extractStructuredOutput 返回 null 即降级 dead。

- types.ts: AgentRunResult.dead 加可选 reason/detail 字段
  （no-structured-output / runagent-threw / worktree-failed / unknown）
  兼容旧 journal（均 optional）。
- claudeCodeBackend.ts: 三处 dead 填 reason + detail；
  no-structured-output 把 finalized 文本前 200 字符做 detail，
  让日志/面板能立刻看到 agent 最后说了什么。
- claudeCodeBackend.ts: schema 模式 prompt 首尾各放一次
  StructuredOutput 强制要求，针对 sonnet 长 tool chain 后忘记收尾。
- hooks.ts: retry 日志带 reason；retry 仍 throw 时降级 dead 也填
  reason=runagent-threw + detail。
- types.test.ts: 加 reason JSON 往返 + 旧 journal 兼容测试。

Co-Authored-By: glm-5.2 <zai-org@claude-code-best.win>
---
 .../src/__tests__/types.test.ts               | 22 ++++++++
 packages/workflow-engine/src/engine/hooks.ts  | 12 ++++-
 packages/workflow-engine/src/types.ts         | 26 +++++++++-
 src/workflow/backends/claudeCodeBackend.ts    | 51 ++++++++++++++++---
 4 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/packages/workflow-engine/src/__tests__/types.test.ts b/packages/workflow-engine/src/__tests__/types.test.ts
index 22866440b..5ca2b19bf 100644
--- a/packages/workflow-engine/src/__tests__/types.test.ts
+++ b/packages/workflow-engine/src/__tests__/types.test.ts
@@ -19,6 +19,28 @@ test('AgentRunResult skipped/dead 分支可 JSON 往返', () => {
   }
 })
 
+// dead 携带可选 reason/detail：journal 持久化后能保留死因，事后审计/面板展示用。
+test('AgentRunResult dead 带 reason/detail 可 JSON 往返', () => {
+  const dead = {
+    kind: 'dead' as const,
+    reason: 'no-structured-output' as const,
+    detail: 'finalize content has no StructuredOutput tool_use or JSON text',
+  }
+  const round = JSON.parse(JSON.stringify(dead))
+  expect(round).toEqual(dead)
+  expect(round.kind).toBe('dead')
+  expect(round.reason).toBe('no-structured-output')
+})
+
+// 兼容旧 journal：reason/detail 都可选，缺失时仍是合法 dead。
+test('AgentRunResult dead 无 reason 仍合法（兼容旧 journal）', () => {
+  const legacy = { kind: 'dead' as const }
+  const round = JSON.parse(JSON.stringify(legacy))
+  expect(round.kind).toBe('dead')
+  expect(round.reason).toBeUndefined()
+  expect(round.detail).toBeUndefined()
+})
+
 test('JournalEntry 形状稳定', () => {
   const entry = {
     key: 'abc123',
diff --git a/packages/workflow-engine/src/engine/hooks.ts b/packages/workflow-engine/src/engine/hooks.ts
index 923e21aca..02d65cea6 100644
--- a/packages/workflow-engine/src/engine/hooks.ts
+++ b/packages/workflow-engine/src/engine/hooks.ts
@@ -166,12 +166,16 @@ export function makeHooks(
       // 都给一次重试机会；WorkflowAbortedError（kill）不重试——是用户意图。
       // 重试仍失败：dead 保持 dead；throw 降级为 dead（不让一个 agent 击穿 workflow）。
       // budget 不重复扣：dead 不 addOutputTokens；重试 ok 才扣一次（最终 ok 时）。
+      // dead.reason 透传到日志（审计 8/12 dead 都是 no-structured-output 时直接可见）。
       let result: AgentRunResult
       try {
         result = await invokeBackend()
         if (result.kind === 'dead') {
           ctx.ports.logger.warn?.(
-            `agent "${label ?? `#${agentId}`}" returned dead; retrying once`,
+            `agent "${label ?? `#${agentId}`}" returned dead` +
+              (result.reason ? ` (${result.reason})` : '') +
+              (result.detail ? `: ${result.detail.slice(0, 150)}` : '') +
+              '; retrying once',
           )
           result = await invokeBackend()
         }
@@ -185,7 +189,11 @@ export function makeHooks(
         } catch (e2) {
           if (e2 instanceof WorkflowAbortedError) throw e2
           // 重试仍抛：降级 dead（保持 workflow 继续；hooks.agent 返 null）
-          result = { kind: 'dead' }
+          result = {
+            kind: 'dead',
+            reason: 'runagent-threw',
+            detail: (e2 as Error).message,
+          }
         }
       }
       if (result.kind === 'ok') {
diff --git a/packages/workflow-engine/src/types.ts b/packages/workflow-engine/src/types.ts
index 6fab85468..33f95ffe9 100644
--- a/packages/workflow-engine/src/types.ts
+++ b/packages/workflow-engine/src/types.ts
@@ -33,7 +33,13 @@ export type AgentProgressUpdate = {
   toolCount: number
 }
 
-/** AgentRunner 返回。ok 变体携带 model/toolCount 供面板展示（可选，独立后端可不填）。 */
+/**
+ * AgentRunner 返回。ok 变体携带 model/toolCount 供面板展示（可选，独立后端可不填）。
+ *
+ * dead 携带可选 reason/detail：journal 历史只记 `{kind:"dead"}` 无信息，
+ * 调试时无法区分"agent 跑完没产 StructuredOutput"还是"runAgent 抛错"。
+ * reason 让 hooks 重试日志、面板、事后审计能立刻看到死因。
+ */
 export type AgentRunResult =
   | {
       kind: 'ok'
@@ -47,7 +53,23 @@ export type AgentRunResult =
       tokenCount?: number
     }
   | { kind: 'skipped' }
-  | { kind: 'dead' }
+  | {
+      kind: 'dead'
+      /**
+       * 死因分类，方便日志聚合 / 事后审计。可选以兼容旧 journal。
+       * - no-structured-output：agent 完成但 finalize content 无 StructuredOutput（既没调工具也没在文本里产 JSON）
+       * - runagent-threw：runAgent 抛非 abort 错误（API 故障 / context 溢出 / runtime 错误）
+       * - worktree-failed：isolation:'worktree' 创建失败（fail-closed 退化）
+       * - unknown：未分类（兼容旧 backend / 第三方 adapter）
+       */
+      reason?:
+        | 'no-structured-output'
+        | 'runagent-threw'
+        | 'worktree-failed'
+        | 'unknown'
+      /** 详细信息（错误 message / 文本预览），用于日志，不展示给最终用户。 */
+      detail?: string
+    }
 
 /** journal 中单条记录。seq = agent() 调用序号，read() 据此重排以稳定 resume。 */
 export type JournalEntry = {
diff --git a/src/workflow/backends/claudeCodeBackend.ts b/src/workflow/backends/claudeCodeBackend.ts
index d208d1870..aed7025ec 100644
--- a/src/workflow/backends/claudeCodeBackend.ts
+++ b/src/workflow/backends/claudeCodeBackend.ts
@@ -160,10 +160,11 @@ export const claudeCodeBackend: AgentAdapter = {
         )
       } catch (e) {
         // fail-closed：隔离未达成不静默退化为共享 cwd（否则并发写数据竞争）
+        const detail = (e as Error).message
         logForDebugging(
-          `workflow worktree creation failed (${agentDef.agentType}): ${(e as Error).message}`,
+          `workflow worktree creation failed (${agentDef.agentType}): ${detail}`,
         )
-        return { kind: 'dead' }
+        return { kind: 'dead', reason: 'worktree-failed', detail }
       }
     }
     // runWithCwdOverride 让 agent 内的 Bash/Read 等工具看到 worktree 路径
@@ -197,9 +198,28 @@ export const claudeCodeBackend: AgentAdapter = {
       appState.mcp.tools,
     )
 
-    // schema → 通过 prompt 追加 JSON Schema 指令（非交互模式 StructuredOutput 已启用）
+    // schema → prompt 首尾各放一份 StructuredOutput 强制要求（sonnet 长 tool chain 后
+    // 易忘记收尾，是 8/12 dead 的主因）。原版只在尾部追加，sonnet 跑到第 N 个工具时
+    // 早就把"必须调 StructuredOutput"挤出注意力了。新版：头部放任务上下文 + 收尾契约，
+    // 尾部再强制提醒一次，让 agent 任何时刻调头都能看到收尾要求。
     const promptText = params.schema
-      ? `${params.prompt}\n\nYou MUST return your final answer by calling the StructuredOutput tool with a value matching this JSON Schema:\n${JSON.stringify(params.schema)}`
+      ? [
+          '[STRUCTURED OUTPUT MODE — read before starting]',
+          'Your ENTIRE final response MUST be a single call to the `StructuredOutput` tool with a value matching this JSON Schema:',
+          JSON.stringify(params.schema),
+          '',
+          'Rules:',
+          '- Call `StructuredOutput` exactly once as your LAST action.',
+          '- NEVER end your turn with plain text. If you have not called the tool, your entire response is discarded and the workflow sees no result.',
+          '- If you need to investigate first (read files, run tests), do so via other tools, then finish with `StructuredOutput`.',
+          '',
+          '--- task ---',
+          params.prompt,
+          '',
+          '--- end task ---',
+          '',
+          '[FINAL REMINDER] Before stopping: verify you have called `StructuredOutput`. If not, call it now with your conclusion. Plain-text endings are treated as failure.',
+        ].join('\n')
       : params.prompt
 
     const promptMessages = [createUserMessage({ content: promptText })]
@@ -249,11 +269,12 @@ export const claudeCodeBackend: AgentAdapter = {
       if (agentAbort.signal.aborted || (e as Error)?.name === 'AbortError') {
         throw new WorkflowAbortedError()
       }
+      const detail = (e as Error).message
       logForDebugging(
-        `workflow sub-agent error (${agentDef.agentType}): ${(e as Error).message}`,
+        `workflow sub-agent error (${agentDef.agentType}): ${detail}`,
       )
       logEvent('tengu_workflow_agent', { ok: 0 })
-      return { kind: 'dead' }
+      return { kind: 'dead', reason: 'runagent-threw', detail }
     } finally {
       // 清理（幂等）：listener removeEventListener / Map.delete 重复调用安全。
       if (typeof ctx.unregisterAgentAbort === 'function') {
@@ -285,7 +306,23 @@ export const claudeCodeBackend: AgentAdapter = {
 
     if (params.schema) {
       const structured = extractStructuredOutput(finalized.content)
-      if (structured === null) return { kind: 'dead' }
+      if (structured === null) {
+        // agent 跑完所有工具调用但既没调 StructuredOutput 工具、也没在文本里产 JSON。
+        // 把最后文本预览进 detail，让 hooks 重试日志和面板能立刻看到 agent 实际说了什么。
+        // 8/12 dead 在最近一次 audit workflow 都落这里——sonnet 长 tool chain 后忘了收尾。
+        const preview = extractTextContent(finalized.content, '\n').slice(
+          0,
+          200,
+        )
+        logForDebugging(
+          `workflow sub-agent produced no StructuredOutput (${agentDef.agentType}); preview: ${preview}`,
+        )
+        return {
+          kind: 'dead',
+          reason: 'no-structured-output',
+          detail: preview,
+        }
+      }
       return {
         kind: 'ok',
         output: structured as object,