feat(workflow): 默认并发降为 3 并支持 per-run maxConcurrency 注入

- DEFAULT_MAX_CONCURRENCY=3 替代旧的 min(16, cores-2)；MAX_CONCURRENCY_CAP=16 保留为用户输入的绝对上限 - 新增 clampMaxConcurrency() 处理 undefined/<1/>CAP 边界 - WorkflowInput schema 新增 maxConcurrency: number.int().min(1).max(16).optional() - 引擎层 context/runWorkflow 全链路透传：semaphore 容量来自 per-run 入参 - WorkflowTool prompt 增加指引：fan-out 场景先用 AskUserQuestion 与用户确认并发再启动 - 同步 ultracode skill + audit workflow spec 的并发文字（删 cpu-cores 公式） - 同步 docs/features/workflow-scripts.md 旧公式 Co-Authored-By: glm-5.2 <zai-org@claude-code-best.win>
2026-06-15 12:55:51 +00:00 · 2026-06-14 10:16:29 +08:00
parent b5ead59e72
commit 3edc370aa1
15 changed files with 219 additions and 32 deletions
--- a/docs/features/workflow-scripts.md
+++ b/docs/features/workflow-scripts.md
@@ -79,7 +79,7 @@ workflow 脚本内可用的钩子（语义详见引擎包 `engine/hooks.ts`）
 | `log(msg)` | 进度日志（面板展示，无状态变更） |
 | `workflow(name \| { scriptPath }, args?)` | 嵌套一层子 workflow（仅允许一层） |

-**硬限**：单次 `parallel`/`pipeline` ≤ `MAX_ITEMS_PER_CALL`（4096）；单 workflow 总 agent ≤ `MAX_TOTAL_AGENTS`（1000）；并发 cap = `min(16, cores - 2)`。
+**硬限**：单次 `parallel`/`pipeline` ≤ `MAX_ITEMS_PER_CALL`（4096）；单 workflow 总 agent ≤ `MAX_TOTAL_AGENTS`（1000）；并发 cap 默认 = `DEFAULT_MAX_CONCURRENCY`（3），可经 Workflow 工具的 `maxConcurrency` 入参覆盖，绝对上限 `MAX_CONCURRENCY_CAP`（16）。

 ## 四、编写 workflow

@@ -159,7 +159,7 @@ return results.flat().filter(Boolean)

 - **journal**：每次 run 记录到 `.claude/workflow-runs/<runId>/journal.jsonl`。`resumeFromRunId` 重放 journal，已完成 `agent()` 秒回缓存结果。
 - **budget**：`budget.total` 为 token 硬顶（默认 `null` = 无限）；`budget.spent()` / `budget.remaining()` 读实时消耗；耗尽后再发 `agent()` 抛错。
- **并发**：引擎 `Semaphore`（`min(16, cores - 2)`）限制同时运行的 agent 数。
+- **并发**：引擎 `Semaphore` 默认许可 3（`DEFAULT_MAX_CONCURRENCY`），可经 Workflow 工具的 `maxConcurrency` 入参 per-run 覆盖（钳到 `[1, MAX_CONCURRENCY_CAP=16]`）。
 - **错误**：脚本语法/meta 错 → `parseScript` 即时返错（不进后台）；agent 抛错 → `kind:'dead'` → `null`，workflow 继续（`parallel`/`pipeline` 容错）；`WorkflowAbortedError` → `killed`。

 ## 九、文件索引
--- a/packages/workflow-engine/src/tests/WorkflowTool.test.ts
+++ b/packages/workflow-engine/src/tests/WorkflowTool.test.ts
@@ -242,6 +242,15 @@ test('元数据方法：description/prompt/renderToolUseMessage', async () => {
  )
 })

+test('prompt 包含默认并发 3 + AskUserQuestion 指引', async () => {
+  const { ports } = mockPorts('/tmp', new Map())
+  const tool = createWorkflowTool(ports)
+  const p = await tool.prompt()
+  expect(p).toMatch(/default is 3/i)
+  expect(p).toMatch(/maxConcurrency/i)
+  expect(p).toMatch(/AskUserQuestion/i)
+})
+
 test('name 不存在 → 返回错误（不进后台）', async () => {
  const dir = await mkdtemp(join(tmpdir(), 'wf-tool-'))
  try {
--- a/packages/workflow-engine/src/tests/concurrency.test.ts
+++ b/packages/workflow-engine/src/tests/concurrency.test.ts
@@ -1,5 +1,10 @@
 import { expect, test } from 'bun:test'
-import { Semaphore, maxConcurrency } from '../engine/concurrency.js'
+import {
+  clampMaxConcurrency,
+  Semaphore,
+  maxConcurrency,
+} from '../engine/concurrency.js'
+import { DEFAULT_MAX_CONCURRENCY, MAX_CONCURRENCY_CAP } from '../constants.js'

 test('Semaphore 限制并发，permit 转移不泄漏', async () => {
  const sem = new Semaphore(2)
@@ -19,10 +24,24 @@ test('Semaphore 限制并发，permit 转移不泄漏', async () => {
  expect(peak).toBe(2) // 永不超过 permits
 })

-test('maxConcurrency 落在 [1, 16]', () => {
-  const n = maxConcurrency()
-  expect(n).toBeGreaterThanOrEqual(1)
-  expect(n).toBeLessThanOrEqual(16)
+test('maxConcurrency 返回 DEFAULT_MAX_CONCURRENCY (=3)', () => {
+  expect(maxConcurrency()).toBe(DEFAULT_MAX_CONCURRENCY)
+  expect(maxConcurrency()).toBe(3)
+})
+
+test('clampMaxConcurrency：undefined/NaN→DEFAULT；<1→1；>CAP→CAP；正常原值', () => {
+  expect(clampMaxConcurrency(undefined)).toBe(DEFAULT_MAX_CONCURRENCY)
+  expect(clampMaxConcurrency(Number.NaN)).toBe(DEFAULT_MAX_CONCURRENCY)
+  expect(clampMaxConcurrency(0)).toBe(1)
+  expect(clampMaxConcurrency(-3)).toBe(1)
+  expect(clampMaxConcurrency(MAX_CONCURRENCY_CAP + 100)).toBe(
+    MAX_CONCURRENCY_CAP,
+  )
+  expect(clampMaxConcurrency(5)).toBe(5)
+  expect(clampMaxConcurrency(1)).toBe(1)
+  expect(clampMaxConcurrency(MAX_CONCURRENCY_CAP)).toBe(MAX_CONCURRENCY_CAP)
+  // 小数截断（Semaphore 已有 Math.max(1, Math.floor)；clampMaxConcurrency 显式 trunc）
+  expect(clampMaxConcurrency(2.9)).toBe(2)
 })

 test('Semaphore(0) 至少 1 permit，acquire 不阻塞', async () => {
--- a/packages/workflow-engine/src/tests/context.test.ts
+++ b/packages/workflow-engine/src/tests/context.test.ts
@@ -40,6 +40,69 @@ test('createSharedResources 初始化预算与计数', () => {
  expect(r.depth).toBe(0)
 })

+test('createSharedResources：maxConcurrency 控制 semaphore permits', async () => {
+  // 默认 permits = DEFAULT_MAX_CONCURRENCY = 3：4 次 acquire 后第 4 次 pending
+  const r1 = createSharedResources(null)
+  const releases1: Array<() => void> = []
+  for (let i = 0; i < 3; i++) releases1.push(await r1.semaphore.acquire())
+  let fourthResolved = false
+  const pending = r1.semaphore.acquire().then(r => {
+    fourthResolved = true
+    return r
+  })
+  await new Promise(res => {
+    setTimeout(res, 5)
+  })
+  expect(fourthResolved).toBe(false)
+  releases1[0]!() // 释放一个，第四个应被唤醒
+  releases1.push(await pending)
+  for (const rel of releases1) rel()
+
+  // 显式 maxConcurrency=2：第 3 次 acquire pending
+  const r2 = createSharedResources(null, 2)
+  const releases2: Array<() => void> = []
+  releases2.push(await r2.semaphore.acquire())
+  releases2.push(await r2.semaphore.acquire())
+  let thirdResolved = false
+  const pending2 = r2.semaphore.acquire().then(r => {
+    thirdResolved = true
+    return r
+  })
+  await new Promise(res => {
+    setTimeout(res, 5)
+  })
+  expect(thirdResolved).toBe(false)
+  releases2[0]!()
+  releases2.push(await pending2)
+  for (const rel of releases2) rel()
+})
+
+test('createEngineContext 透传 maxConcurrency 到 resources.semaphore', async () => {
+  const ctx = createEngineContext({
+    ports: mockPorts(),
+    host: createHostHandle(null),
+    signal: new AbortController().signal,
+    runId: 'r-mc',
+    workflowName: 'w',
+    cwd: '/tmp',
+    budgetTotal: null,
+    maxConcurrency: 1,
+  })
+  // maxConcurrency=1：第二次 acquire 应 pending
+  const first = await ctx.resources.semaphore.acquire()
+  let secondResolved = false
+  const pending = ctx.resources.semaphore.acquire().then(r => {
+    secondResolved = true
+    return r
+  })
+  await new Promise(res => {
+    setTimeout(res, 5)
+  })
+  expect(secondResolved).toBe(false)
+  first()
+  await pending
+})
+
 test('createEngineContext 复制 journal 并重置游标', () => {
  const journal = [
    {
--- a/packages/workflow-engine/src/tests/index.test.ts
+++ b/packages/workflow-engine/src/tests/index.test.ts
@@ -29,6 +29,7 @@ test('持久化 / 结构化 / 命名 workflow / 进度 API 完整导出', () =>
 test('并发 / 预算 / 错误类完整导出', () => {
  expect(typeof wf.Semaphore).toBe('function')
  expect(typeof wf.maxConcurrency).toBe('function')
+  expect(typeof wf.clampMaxConcurrency).toBe('function')
  expect(typeof wf.Budget).toBe('function')
  expect(typeof wf.BudgetExhaustedError).toBe('function')
  expect(typeof wf.WorkflowError).toBe('function')
@@ -49,7 +50,7 @@ test('引擎常量值稳定', () => {
  expect(wf.MAX_TOTAL_AGENTS).toBe(1000)
  expect(wf.MAX_ITEMS_PER_CALL).toBe(4096)
  expect(wf.MAX_CONCURRENCY_CAP).toBe(16)
-  expect(wf.MAX_CONCURRENCY_OFFSET).toBe(2)
+  expect(wf.DEFAULT_MAX_CONCURRENCY).toBe(3)
  expect(wf.WORKFLOW_SCRIPT_EXTENSIONS).toEqual(['.ts', '.js', '.mjs'])
 })

--- a/packages/workflow-engine/src/tests/runWorkflow.test.ts
+++ b/packages/workflow-engine/src/tests/runWorkflow.test.ts
@@ -380,6 +380,57 @@ test('budgetTotal 耗尽 → failed', async () => {
  }
 })

+test('maxConcurrency 透传：并行 agent 受 run 级并发槽位限制', async () => {
+  const dir = await mkdtemp(join(tmpdir(), 'wf-run-'))
+  try {
+    let active = 0
+    let peak = 0
+    const ports: WorkflowPorts = {
+      agentRunner: {
+        runAgentToResult: async () => {
+          active++
+          peak = Math.max(peak, active)
+          await new Promise(r => {
+            setTimeout(r, 8)
+          })
+          active--
+          return { kind: 'ok', output: 'x', usage: { outputTokens: 1 } }
+        },
+      },
+      progressEmitter: { emit: () => {} },
+      taskRegistrar: {
+        register: () => ({ runId: 'r', signal: new AbortController().signal }),
+        complete: () => {},
+        fail: () => {},
+        kill: () => {},
+        pendingAction: () => null,
+      },
+      journalStore: createFileJournalStore(dir),
+      permissionGate: { isAborted: () => false },
+      logger: { debug: () => {}, event: () => {} },
+      hostFactory: () => ({
+        handle: createHostHandle(null),
+        cwd: dir,
+        budgetTotal: null,
+      }),
+    }
+    const result = await runWorkflow({
+      script: `return parallel(Array.from({length: 8}, () => () => agent('p')))`,
+      runId: 'run-mc',
+      ports,
+      host: createHostHandle(null),
+      signal: new AbortController().signal,
+      cwd: dir,
+      budgetTotal: null,
+      maxConcurrency: 2,
+    })
+    expect(result.status).toBe('completed')
+    expect(peak).toBeLessThanOrEqual(2)
+  } finally {
+    await rm(dir, { recursive: true, force: true })
+  }
+})
+
 test('workflow() 引用语法错的子脚本 → failed', async () => {
  const dir = await mkdtemp(join(tmpdir(), 'wf-run-'))
  try {
--- a/packages/workflow-engine/src/tests/schema.test.ts
+++ b/packages/workflow-engine/src/tests/schema.test.ts
@@ -14,6 +14,7 @@ test('全部已知字段可填', () => {
    resumeFromRunId: 'run-1',
    description: 'do thing',
    title: 'T',
+    maxConcurrency: 3,
  })
  expect(r.success).toBe(true)
 })
@@ -42,3 +43,20 @@ test('未知字段被 strip（zod 默认非 strict，safeParse 成功）', () =>
  const r = workflowInputSchema.safeParse({ script: 'x', extra: 1 })
  expect(r.success).toBe(true)
 })
+
+test('maxConcurrency：1–16 整数合法；0/17/小数/非数字被拒', () => {
+  for (const n of [1, 3, 5, 16]) {
+    expect(workflowInputSchema.safeParse({ maxConcurrency: n }).success).toBe(
+      true,
+    )
+  }
+  for (const bad of [0, -1, 17, 100, 1.5, '3', NaN]) {
+    expect(workflowInputSchema.safeParse({ maxConcurrency: bad }).success).toBe(
+      false,
+    )
+  }
+})
+
+test('maxConcurrency optional（省略时 safeParse 成功）', () => {
+  expect(workflowInputSchema.safeParse({ script: 'x' }).success).toBe(true)
+})
--- a/packages/workflow-engine/src/constants.ts
+++ b/packages/workflow-engine/src/constants.ts
@@ -15,8 +15,14 @@ export const WORKFLOW_RUNS_DIR = '.claude/workflow-runs'
 /** 命名 workflow 支持的脚本扩展名（按优先级）。 */
 export const WORKFLOW_SCRIPT_EXTENSIONS = ['.ts', '.js', '.mjs'] as const

-/** 并发：信号量许可 = min(MAX_CONCURRENCY_CAP, cpuCores - MAX_CONCURRENCY_OFFSET)。 */
-export const MAX_CONCURRENCY_OFFSET = 2
+/**
+ * 并发：每个 workflow run 默认 semaphore 许可数。
+ * 历史：曾用 min(CAP, cpuCores - 2)；改为固定默认 3——避免在多核机器上一次铺开十几个 agent。
+ * 单次 run 可经 Workflow 工具的 maxConcurrency 入参覆盖（仍受 CAP 钳制）。
+ */
+export const DEFAULT_MAX_CONCURRENCY = 3
+
+/** 用户传入 maxConcurrency 的绝对上限（防滥用）。 */
 export const MAX_CONCURRENCY_CAP = 16

 /** 单个 workflow 生命周期内 agent() 总数上限。 */
--- a/packages/workflow-engine/src/engine/concurrency.ts
+++ b/packages/workflow-engine/src/engine/concurrency.ts
@@ -1,5 +1,4 @@
-import * as os from 'node:os'
-import { MAX_CONCURRENCY_CAP, MAX_CONCURRENCY_OFFSET } from '../constants.js'
+import { DEFAULT_MAX_CONCURRENCY, MAX_CONCURRENCY_CAP } from '../constants.js'

 /**
 * 异步信号量。acquire() 返回一个 release 函数；permit 在 release 时直接
@@ -56,22 +55,19 @@ export class Semaphore {
  }
 }

-function cpuCores(): number {
-  const a = (os as { availableParallelism?: () => number }).availableParallelism
-  if (typeof a === 'function') {
-    try {
-      return a()
-    } catch {
-      // fallthrough
-    }
-  }
-  return os.cpus()?.length ?? 4
+/** 当前进程默认并发（向下兼容入口；具体 run 请用 clampMaxConcurrency 处理用户入参）。 */
+export function maxConcurrency(): number {
+  return DEFAULT_MAX_CONCURRENCY
 }

-/** min(MAX_CONCURRENCY_CAP, cpuCores - MAX_CONCURRENCY_OFFSET)，至少 1。 */
-export function maxConcurrency(): number {
-  return Math.max(
-    1,
-    Math.min(MAX_CONCURRENCY_CAP, cpuCores() - MAX_CONCURRENCY_OFFSET),
-  )
+/**
+ * 把"用户传入的 maxConcurrency"归一到合法 permits。
+ * - undefined / NaN → DEFAULT_MAX_CONCURRENCY
+ * - <1 → 1（至少 1 个并发槽，否则 workflow 无法推进）
+ * - >MAX_CONCURRENCY_CAP → MAX_CONCURRENCY_CAP
+ * - 否则取整后原值
+ */
+export function clampMaxConcurrency(n: number | undefined): number {
+  if (n === undefined || Number.isNaN(n)) return DEFAULT_MAX_CONCURRENCY
+  return Math.max(1, Math.min(Math.trunc(n), MAX_CONCURRENCY_CAP))
 }
--- a/packages/workflow-engine/src/engine/context.ts
+++ b/packages/workflow-engine/src/engine/context.ts
@@ -1,7 +1,7 @@
 import type { HostHandle, WorkflowPorts } from '../ports.js'
 import type { JournalEntry } from '../types.js'
 import { Budget } from './budget.js'
-import { Semaphore, maxConcurrency } from './concurrency.js'
+import { Semaphore, clampMaxConcurrency } from './concurrency.js'

 /**
 * 可被子 workflow 共享的资源。嵌套时 semaphore/budget/agentCountBox 按引用共享，
@@ -33,9 +33,10 @@ export type EngineContext = {

 export function createSharedResources(
  budgetTotal: number | null,
+  maxConcurrency?: number,
 ): SharedResources {
  return {
-    semaphore: new Semaphore(maxConcurrency()),
+    semaphore: new Semaphore(clampMaxConcurrency(maxConcurrency)),
    budget: new Budget(budgetTotal),
    agentCountBox: { value: 0 },
    agentIdSeq: { value: 0 },
@@ -51,9 +52,11 @@ export function createEngineContext(opts: {
  workflowName: string
  cwd: string
  budgetTotal: number | null
+  /** 单次 run 的并发槽位；undefined → DEFAULT_MAX_CONCURRENCY。经 clampMaxConcurrency 钳制。 */
+  maxConcurrency?: number
  journal?: JournalEntry[]
 }): EngineContext {
-  const resources = createSharedResources(opts.budgetTotal)
+  const resources = createSharedResources(opts.budgetTotal, opts.maxConcurrency)
  return {
    ports: opts.ports,
    host: opts.host,
--- a/packages/workflow-engine/src/engine/runWorkflow.ts
+++ b/packages/workflow-engine/src/engine/runWorkflow.ts
@@ -20,6 +20,8 @@ export type RunWorkflowOptions = {
  signal: AbortSignal
  cwd: string
  budgetTotal: number | null
+  /** 单次 run 的并发槽位；undefined → DEFAULT_MAX_CONCURRENCY。 */
+  maxConcurrency?: number
  /** resume：true 时载入既有 journal 重放。 */
  resume?: boolean
  /** resume 时脚本源码 hash 是否变化。true 则忽略 journal 全重跑。 */
@@ -65,6 +67,7 @@ export async function runWorkflow(
    workflowName,
    cwd: opts.cwd,
    budgetTotal: opts.budgetTotal,
+    maxConcurrency: opts.maxConcurrency,
    journal,
  })
  if (journalInvalidated) ctx.journalInvalidated = true
--- a/packages/workflow-engine/src/tool/WorkflowTool.ts
+++ b/packages/workflow-engine/src/tool/WorkflowTool.ts
@@ -43,6 +43,8 @@ Provide the script inline via "script", or reference a named workflow via "name"

 Use "resumeFromRunId" to resume a prior run — completed agent() calls replay from the journal instantly.

+Concurrency: default is 3 (hard ceiling 16). Pass "maxConcurrency" to override. If the user hasn't specified a concurrency and the workflow fans out (parallel/pipeline with many items, multi-dimensional audit, etc.), use AskUserQuestion to confirm the desired concurrency before launching — e.g. offer 3 / 6 / 9 as choices for a 9-dimension review.
+
 Script execution model (common pitfalls — getting these wrong is the #1 cause of script errors): the script is the body of \`new AsyncFunction\` — NOT an ESM module, and TypeScript is NOT transpiled. Therefore:
 - Do NOT use \`import\` — \`agent\`, \`parallel\`, \`pipeline\`, \`phase\`, \`log\`, \`workflow\`, \`args\`, and \`budget\` are injected as parameters; reference them directly.
 - Do NOT use TS type annotations, \`interface\`, \`enum\`, \`as\`, or generics — the engine does not transpile, so even a .ts file with type syntax fails to parse.
@@ -144,6 +146,9 @@ export function createWorkflowTool(
        signal,
        cwd: host.cwd,
        budgetTotal: host.budgetTotal,
+        ...(input.maxConcurrency !== undefined
+          ? { maxConcurrency: input.maxConcurrency }
+          : {}),
        ...(input.resumeFromRunId ? { resume: true } : {}),
      })
        .then(result => onFinish(ports, result, runId))
--- a/packages/workflow-engine/src/tool/schema.ts
+++ b/packages/workflow-engine/src/tool/schema.ts
@@ -23,6 +23,15 @@ export const workflowInputSchema = z.object({
    .describe('resume 指定 run，重放 journal'),
  description: z.string().optional().describe('本次调用的简短描述（3-5 词）'),
  title: z.string().optional().describe('进度查看器标题'),
+  maxConcurrency: z
+    .number()
+    .int()
+    .min(1)
+    .max(16)
+    .optional()
+    .describe(
+      '并发 agent() 上限。默认 3（最大 16）。当 workflow 包含大量 parallel/pipeline fan-out 时，可在启动前用 AskUserQuestion 与用户确认期望并发。',
+    ),
 })

 /**
--- a/src/skills/bundled/ultracode.ts
+++ b/src/skills/bundled/ultracode.ts
@@ -74,7 +74,7 @@ Script body hooks:
 - \`budget: {total: number|null, spent(): number, remaining(): number}\` — the turn's token target from the user's "+500k"-style directive. \`budget.total\` is null if no target was set. \`budget.spent()\` returns output tokens spent this turn across the main loop and all workflows — the pool is shared, not per-workflow. \`budget.remaining()\` returns \`max(0, total - spent())\`, or \`Infinity\` if no target. The target is a HARD ceiling, not advisory: once \`spent()\` reaches \`total\`, further \`agent()\` calls throw. Use for dynamic loops: \`while (budget.total && budget.remaining() > 50_000) { ... }\`, or static scaling: \`const FLEET = budget.total ? Math.floor(budget.total / 100_000) : 5\`.
 - \`workflow(nameOrRef: string | {scriptPath: string}, args?: any): Promise<any>\` — run another workflow inline as a sub-step and return whatever it returns. Pass a name to invoke a saved workflow (same registry as {name: "..."}), or {scriptPath} to run a script file you Wrote earlier. The child shares this run's concurrency cap, agent counter, abort signal, and token budget — its agents appear under a "▸ name" group in /workflows and its tokens count toward budget.spent(). The args param becomes the child's \`args\` global. Nesting is one level only: workflow() inside a child throws. Throws on unknown name / unreadable scriptPath / child syntax error; catch to handle gracefully.

-Concurrent agent() calls are capped at min(16, cpu cores - 2) per workflow — excess calls queue and run as slots free up. You can still pass 100 items to parallel()/pipeline() and they all complete; only ~10 run at any moment. Total agent count across a workflow's lifetime is capped at 1000 — a runaway-loop backstop set far above any real workflow. A single parallel()/pipeline() call accepts at most 4096 items; passing more is an explicit error, not a silent truncation.
+Concurrent agent() calls are capped at 3 by default per workflow — excess calls queue and run as slots free up. The Workflow tool accepts an optional \`maxConcurrency\` input (1–16) to override per-run; if the user hasn't specified and the workflow fans out (large parallel/pipeline, multi-dimensional review), ask them via AskUserQuestion before launching — e.g. offer 3 / 6 / 9 as choices. You can still pass 100 items to parallel()/pipeline() and they all complete; only the configured number run at any moment. Total agent count across a workflow's lifetime is capped at 1000 — a runaway-loop backstop set far above any real workflow. A single parallel()/pipeline() call accepts at most 4096 items; passing more is an explicit error, not a silent truncation.

 Subagents are told their final text IS the return value (not a human-facing message), so they return raw data. For structured output, use the schema option — validation happens at the tool-call layer so the model retries on mismatch.

--- a/src/workflow/service.ts
+++ b/src/workflow/service.ts
@@ -53,6 +53,7 @@ export type WorkflowService = {
      | 'description'
      | 'resumeFromRunId'
      | 'title'
+      | 'maxConcurrency'
    >,
    toolUseContext: ToolUseContext,
    canUseTool: CanUseToolFn,
@@ -216,6 +217,9 @@ export function makeService(
        signal,
        cwd: host.cwd,
        budgetTotal: host.budgetTotal,
+        ...(input.maxConcurrency !== undefined
+          ? { maxConcurrency: input.maxConcurrency }
+          : {}),
        ...(input.resumeFromRunId ? { resume: true } : {}),
      })
        .then(result => {