mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 16:25:51 +00:00
refactor: 统一自建 Tool Search — 移除 tool_reference/defer_loading 依赖,全 provider 通用
- 重命名 ExecuteTool → ExecuteExtraTool,作为一等工具始终可用 - ToolSearchTool 输出改为纯文本(区分 core/deferred),移除 tool_reference blocks - 移除 modelSupportsToolReference() 及相关 GrowthBook 配置 - 移除 API 侧 defer_loading 字段和 tool search beta header 注入 - 简化 system prompt(工具使用指南从 ~120 行压缩到 ~10 行) - extractDiscoveredToolNames 支持文本格式解析(向后兼容旧 session 的 tool_reference) - 更新 promptEngineeringAudit 测试以匹配简化后的 prompt 结构 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -238,30 +238,29 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
// TXT 来源: {request_evaluation_checklist} — Step 0→1→2→3
|
||||
// ------------------------------------------------------------------
|
||||
describe('#1 Decision tree for tool selection', () => {
|
||||
test('prompt contains step-based tool selection guidance', async () => {
|
||||
test('prompt contains tool selection guidance via dedicated tools', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Step 0')
|
||||
expect(prompt).toContain('Step 1')
|
||||
expect(prompt).toContain('Step 2')
|
||||
expect(prompt).toContain('Step 3')
|
||||
expect(prompt).toContain('Prefer dedicated tools')
|
||||
expect(prompt).toContain('Reserve')
|
||||
expect(prompt).toContain('shell operations')
|
||||
})
|
||||
|
||||
test('decision tree has "stop at the first match" semantics', async () => {
|
||||
test('guidance distinguishes dedicated tools from Bash', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('stop at the first match')
|
||||
})
|
||||
|
||||
test('Step 0 teaches when NOT to use tools', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Step 0')
|
||||
expect(prompt).toContain('answer directly, no tool call')
|
||||
})
|
||||
|
||||
test('Step 1 prioritizes dedicated tools over Bash', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Step 1')
|
||||
expect(prompt).toContain('dedicated tool')
|
||||
})
|
||||
|
||||
test('lists core tools as directly callable', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Core tools')
|
||||
expect(prompt).toContain('can be called directly')
|
||||
})
|
||||
|
||||
test('provides concrete tool preference examples', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('over cat')
|
||||
expect(prompt).toContain('over sed')
|
||||
})
|
||||
})
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
@@ -271,24 +270,26 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
describe('#2 Anti-pattern guidance (when NOT to use tools)', () => {
|
||||
test('prompt says when NOT to use tools', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Do NOT use')
|
||||
const hasAntiPattern =
|
||||
prompt.includes('Do NOT use') ||
|
||||
prompt.includes('Reserve') ||
|
||||
prompt.includes('do not re-attempt')
|
||||
expect(hasAntiPattern).toBe(true)
|
||||
})
|
||||
|
||||
test('includes explicit "Do not use tools when" section', async () => {
|
||||
test('guidance covers Bash misuse', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Do not use tools when')
|
||||
const hasBashGuidance =
|
||||
prompt.includes('Reserve') && prompt.includes('shell operations')
|
||||
expect(hasBashGuidance).toBe(true)
|
||||
})
|
||||
|
||||
test('anti-pattern covers knowledge questions', async () => {
|
||||
test('anti-pattern covers file creation', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain(
|
||||
'programming concepts, syntax, or design patterns',
|
||||
)
|
||||
})
|
||||
|
||||
test('anti-pattern covers content already in context', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('already visible in context')
|
||||
const hasFileAntiPattern =
|
||||
prompt.includes('Do not create files unless') ||
|
||||
prompt.includes('prefer editing an existing file')
|
||||
expect(hasFileAntiPattern).toBe(true)
|
||||
})
|
||||
|
||||
test('includes file creation anti-pattern', async () => {
|
||||
@@ -305,24 +306,25 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
// TXT 来源: {core_search_behaviors}, {past_chats_tools}
|
||||
// ------------------------------------------------------------------
|
||||
describe('#6 Progressive fallback chain', () => {
|
||||
test('Grep/Glob fallback chain exists', async () => {
|
||||
test('prompt encourages searching before asking user', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('fallback chain')
|
||||
expect(prompt).toContain('search with')
|
||||
})
|
||||
|
||||
test('fallback includes broader pattern as first retry', async () => {
|
||||
test('search tools are available for discovery', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Broader pattern')
|
||||
expect(prompt).toContain('Grep')
|
||||
expect(prompt).toContain('Glob')
|
||||
})
|
||||
|
||||
test('fallback includes alternate naming conventions', async () => {
|
||||
test('fallback includes escalating to user via AskUserQuestion', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('camelCase vs snake_case')
|
||||
expect(prompt).toContain('AskUserQuestion')
|
||||
})
|
||||
|
||||
test('fallback ends with asking user after exhaustion', async () => {
|
||||
test('search before saying unknown is present', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('ask for guidance')
|
||||
expect(prompt).toContain('Search before saying unknown')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -331,30 +333,33 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
// TXT 来源: {examples}, {visualizer_examples}, {past_chats_tools}
|
||||
// ------------------------------------------------------------------
|
||||
describe('#3 Few-shot examples', () => {
|
||||
test('contains tool selection examples with arrow notation', async () => {
|
||||
test('contains concrete tool preference examples', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('→')
|
||||
expect(prompt).toContain('Tool selection examples')
|
||||
})
|
||||
|
||||
test('has multiple concrete Request→Action pairs (>=5)', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
const arrowCount = (prompt.match(/[""].+?[""] → /g) || []).length
|
||||
expect(arrowCount).toBeGreaterThanOrEqual(5)
|
||||
const hasExamples =
|
||||
prompt.includes('over cat') || prompt.includes('over sed')
|
||||
expect(hasExamples).toBe(true)
|
||||
})
|
||||
|
||||
test('examples cover different tool types', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Glob("**/*.tsx")')
|
||||
expect(prompt).toContain('Bash("bun test")')
|
||||
expect(prompt).toContain('Grep("TODO")')
|
||||
expect(prompt).toContain('answer directly')
|
||||
expect(prompt).toContain('Read')
|
||||
expect(prompt).toContain('Edit')
|
||||
expect(prompt).toContain('Grep')
|
||||
})
|
||||
|
||||
test('examples include negative cases (what NOT to use)', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('not Bash find')
|
||||
expect(prompt).toContain('not Bash sed')
|
||||
const hasNegative =
|
||||
prompt.includes('over cat') ||
|
||||
prompt.includes('over sed') ||
|
||||
prompt.includes('over find') ||
|
||||
prompt.includes('over grep')
|
||||
expect(hasNegative).toBe(true)
|
||||
})
|
||||
|
||||
test('core tools are enumerated', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Core tools')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -392,16 +397,18 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
expect(prompt).toContain('cost of pausing to confirm is low')
|
||||
})
|
||||
|
||||
test('frames search tools as cheap', async () => {
|
||||
test('guidance encourages searching over guessing', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('cheap operations')
|
||||
const hasSearchGuidance =
|
||||
prompt.includes('Search before saying unknown') ||
|
||||
prompt.includes('search with')
|
||||
expect(hasSearchGuidance).toBe(true)
|
||||
})
|
||||
|
||||
test('expanded cost asymmetry with multiple scenarios', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Cost asymmetry principle')
|
||||
expect(prompt).toContain('costs user trust')
|
||||
expect(prompt).toContain('breaks their flow')
|
||||
// Simplified prompt conveys cost via "search before saying unknown"
|
||||
expect(prompt).toContain('search with')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -432,32 +439,24 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
// TXT 来源: {search_usage_guidelines}, {past_chats_tools}
|
||||
// ------------------------------------------------------------------
|
||||
describe('#8 Query construction guidance', () => {
|
||||
test('includes Grep query construction advice', async () => {
|
||||
test('Grep is mentioned as a search tool', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('query construction')
|
||||
expect(prompt).toContain('content words')
|
||||
expect(prompt).toContain('Grep')
|
||||
})
|
||||
|
||||
test('Grep guidance teaches content words vs meta-descriptions', async () => {
|
||||
test('Glob is mentioned as a search tool', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('authenticate|login|signIn')
|
||||
expect(prompt).toContain('not "auth handling code"')
|
||||
expect(prompt).toContain('Glob')
|
||||
})
|
||||
|
||||
test('Grep guidance teaches pipe alternation for naming variants', async () => {
|
||||
test('search tools are referenced in "Search before saying unknown"', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('userId|user_id|userID')
|
||||
expect(prompt).toContain('Search before saying unknown')
|
||||
})
|
||||
|
||||
test('includes Glob query construction advice', async () => {
|
||||
test('dedicated tools are preferred over Bash equivalents', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Glob query construction')
|
||||
expect(prompt).toContain('**/*Auth*.ts')
|
||||
})
|
||||
|
||||
test('Glob guidance teaches narrowing by extension', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('**/*.test.ts')
|
||||
expect(prompt).toContain('Prefer dedicated tools')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -491,16 +490,15 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
// TXT 来源: {tool_discovery}, {core_search_behaviors}
|
||||
// ------------------------------------------------------------------
|
||||
describe('#10 Multi-step search strategy', () => {
|
||||
test('scales search effort to task complexity', async () => {
|
||||
test('encourages searching before concluding', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Scale search effort to task complexity')
|
||||
expect(prompt).toContain('Search before saying unknown')
|
||||
})
|
||||
|
||||
test('gives concrete complexity tiers', async () => {
|
||||
test('provides multiple search tools for different scopes', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Single file fix')
|
||||
expect(prompt).toContain('Cross-cutting change')
|
||||
expect(prompt).toContain('Architecture investigation')
|
||||
expect(prompt).toContain('Grep')
|
||||
expect(prompt).toContain('Glob')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -530,12 +528,12 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
describe('#22 Search before saying unknown', () => {
|
||||
test('instructs to search before claiming something does not exist', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('Search first, report results second')
|
||||
expect(prompt).toContain('Search before saying unknown')
|
||||
})
|
||||
|
||||
test('explicitly says do not say "I don\'t see that file"', async () => {
|
||||
test('core tools are listed as always available', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain("don't see that file")
|
||||
expect(prompt).toContain('call them directly')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -663,9 +661,9 @@ describe('Opus 4.7 Prompt Engineering Audit', () => {
|
||||
|
||||
test('tool_discovery: search before saying unavailable', async () => {
|
||||
const prompt = await getFullPrompt()
|
||||
expect(prompt).toContain('visible tool list is partial by design')
|
||||
expect(prompt).toContain('search for it')
|
||||
expect(prompt).toContain(
|
||||
'Only state something is unavailable after the search returns no match',
|
||||
'Only state something is unavailable after ToolSearch returns no match',
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user