mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-23 00:35:51 +00:00
feat: 工具层及 mcp 大重构 (#252)
* feat: 第一版大重构 * fix: 修复类型问题 * chore: 更新版本到 1.3.2 * Add brave as alternative WebSearchTool * fix: 修正顺序 * fix: 修复对穷鬼模式的 auto dream 和 session memory 越过 * feat: 穷鬼模式去除 session-summary * feat: 创建 builtin-tools 包,搬运所有工具实现 将 src/tools/ 下的全部 60 个工具目录迁移至 packages/builtin-tools/src/tools/, 内部导入路径已更新为 src/ alias 模式。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: 更新 src/ 中所有工具引用至 builtin-tools 包,删除 src/tools/ - src/tools.ts 及 178 个 src/ 文件的 import 路径从 ./tools/ 改为 builtin-tools/tools/ - 删除 src/tools/ 整个目录(已迁移至 packages/builtin-tools/) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: 添加 builtin-tools 路径别名至 tsconfig,更新 bun.lock - tsconfig.json 新增 builtin-tools/* 和 builtin-tools 路径映射 - 新增 packages/builtin-tools/src 至 include Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: 为 builtin-tools、mcp-client、agent-tools 添加 @claude-code-best 作用域前缀 所有包名及 import 路径统一添加 @claude-code-best/ 前缀: - builtin-tools → @claude-code-best/builtin-tools - mcp-client → @claude-code-best/mcp-client - agent-tools → @claude-code-best/agent-tools Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 修复 node 环境没有 bun 的问题 --------- Co-authored-by: Eric-Guo <eric.guocz@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
127
packages/builtin-tools/src/tools/WebSearchTool/UI.tsx
Normal file
127
packages/builtin-tools/src/tools/WebSearchTool/UI.tsx
Normal file
@@ -0,0 +1,127 @@
|
||||
import React from 'react'
|
||||
import { MessageResponse } from 'src/components/MessageResponse.js'
|
||||
import { TOOL_SUMMARY_MAX_LENGTH } from 'src/constants/toolLimits.js'
|
||||
import { Box, Text } from '@anthropic/ink'
|
||||
import type { ProgressMessage } from 'src/types/message.js'
|
||||
import { truncate } from 'src/utils/format.js'
|
||||
import type {
|
||||
Output,
|
||||
SearchResult,
|
||||
WebSearchProgress,
|
||||
} from './WebSearchTool.js'
|
||||
|
||||
function getSearchSummary(
|
||||
results: (SearchResult | string | null | undefined)[],
|
||||
): {
|
||||
searchCount: number
|
||||
totalResultCount: number
|
||||
} {
|
||||
let searchCount = 0
|
||||
let totalResultCount = 0
|
||||
|
||||
for (const result of results) {
|
||||
if (result != null && typeof result !== 'string') {
|
||||
searchCount++
|
||||
totalResultCount += result.content?.length ?? 0
|
||||
}
|
||||
}
|
||||
|
||||
return { searchCount, totalResultCount }
|
||||
}
|
||||
|
||||
export function renderToolUseMessage(
|
||||
{
|
||||
query,
|
||||
allowed_domains,
|
||||
blocked_domains,
|
||||
}: Partial<{
|
||||
query: string
|
||||
allowed_domains?: string[]
|
||||
blocked_domains?: string[]
|
||||
}>,
|
||||
{ verbose }: { verbose: boolean },
|
||||
): React.ReactNode {
|
||||
if (!query) {
|
||||
return null
|
||||
}
|
||||
|
||||
let message = ''
|
||||
|
||||
if (query) {
|
||||
message += `"${query}"`
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
if (allowed_domains && allowed_domains.length > 0) {
|
||||
message += `, only allowing domains: ${allowed_domains.join(', ')}`
|
||||
}
|
||||
|
||||
if (blocked_domains && blocked_domains.length > 0) {
|
||||
message += `, blocking domains: ${blocked_domains.join(', ')}`
|
||||
}
|
||||
}
|
||||
|
||||
return message
|
||||
}
|
||||
|
||||
export function renderToolUseProgressMessage(
|
||||
progressMessages: ProgressMessage<WebSearchProgress>[],
|
||||
): React.ReactNode {
|
||||
if (progressMessages.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const lastProgress = progressMessages[progressMessages.length - 1]
|
||||
if (!lastProgress?.data) {
|
||||
return null
|
||||
}
|
||||
|
||||
const data = lastProgress.data
|
||||
|
||||
switch (data.type) {
|
||||
case 'query_update':
|
||||
return (
|
||||
<MessageResponse>
|
||||
<Text dimColor>Searching: {data.query}</Text>
|
||||
</MessageResponse>
|
||||
)
|
||||
case 'search_results_received':
|
||||
return (
|
||||
<MessageResponse>
|
||||
<Text dimColor>
|
||||
Found {data.resultCount} results for "{data.query}"
|
||||
</Text>
|
||||
</MessageResponse>
|
||||
)
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export function renderToolResultMessage(output: Output): React.ReactNode {
|
||||
const { searchCount } = getSearchSummary(output.results ?? [])
|
||||
const timeDisplay =
|
||||
output.durationSeconds >= 1
|
||||
? `${Math.round(output.durationSeconds)}s`
|
||||
: `${Math.round(output.durationSeconds * 1000)}ms`
|
||||
|
||||
return (
|
||||
<Box justifyContent="space-between" width="100%">
|
||||
<MessageResponse height={1}>
|
||||
<Text>
|
||||
Did {searchCount} search
|
||||
{searchCount !== 1 ? 'es' : ''} in {timeDisplay}
|
||||
</Text>
|
||||
</MessageResponse>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
export function getToolUseSummary(
|
||||
input: Partial<{ query: string }> | undefined,
|
||||
): string | null {
|
||||
if (!input?.query) {
|
||||
return null
|
||||
}
|
||||
return truncate(input.query, TOOL_SUMMARY_MAX_LENGTH)
|
||||
}
|
||||
221
packages/builtin-tools/src/tools/WebSearchTool/WebSearchTool.ts
Normal file
221
packages/builtin-tools/src/tools/WebSearchTool/WebSearchTool.ts
Normal file
@@ -0,0 +1,221 @@
|
||||
import type { PermissionResult } from 'src/utils/permissions/PermissionResult.js'
|
||||
import { z } from 'zod/v4'
|
||||
import { buildTool, type ToolDef } from 'src/Tool.js'
|
||||
import { lazySchema } from 'src/utils/lazySchema.js'
|
||||
import { jsonStringify } from 'src/utils/slowOperations.js'
|
||||
import { createAdapter } from './adapters/index.js'
|
||||
import { getWebSearchPrompt, WEB_SEARCH_TOOL_NAME } from './prompt.js'
|
||||
import {
|
||||
getToolUseSummary,
|
||||
renderToolResultMessage,
|
||||
renderToolUseMessage,
|
||||
renderToolUseProgressMessage,
|
||||
} from './UI.js'
|
||||
|
||||
const inputSchema = lazySchema(() =>
|
||||
z.strictObject({
|
||||
query: z.string().min(2).describe('The search query to use'),
|
||||
allowed_domains: z
|
||||
.array(z.string())
|
||||
.optional()
|
||||
.describe('Only include search results from these domains'),
|
||||
blocked_domains: z
|
||||
.array(z.string())
|
||||
.optional()
|
||||
.describe('Never include search results from these domains'),
|
||||
}),
|
||||
)
|
||||
type InputSchema = ReturnType<typeof inputSchema>
|
||||
|
||||
const searchResultSchema = lazySchema(() => {
|
||||
const searchHitSchema = z.object({
|
||||
title: z.string().describe('The title of the search result'),
|
||||
url: z.string().describe('The URL of the search result'),
|
||||
snippet: z.string().optional().describe('A short description of the search result'),
|
||||
})
|
||||
|
||||
return z.object({
|
||||
tool_use_id: z.string().describe('ID of the tool use'),
|
||||
content: z.array(searchHitSchema).describe('Array of search hits'),
|
||||
})
|
||||
})
|
||||
|
||||
export type SearchResult = z.infer<ReturnType<typeof searchResultSchema>>
|
||||
|
||||
const outputSchema = lazySchema(() =>
|
||||
z.object({
|
||||
query: z.string().describe('The search query that was executed'),
|
||||
results: z
|
||||
.array(z.union([searchResultSchema(), z.string()]))
|
||||
.describe('Search results and/or text commentary from the model'),
|
||||
durationSeconds: z
|
||||
.number()
|
||||
.describe('Time taken to complete the search operation'),
|
||||
}),
|
||||
)
|
||||
type OutputSchema = ReturnType<typeof outputSchema>
|
||||
|
||||
export type Output = z.infer<OutputSchema>
|
||||
|
||||
// Re-export WebSearchProgress from centralized types to break import cycles
|
||||
export type { WebSearchProgress } from 'src/types/tools.js'
|
||||
|
||||
import type { WebSearchProgress } from 'src/types/tools.js'
|
||||
|
||||
export const WebSearchTool = buildTool({
|
||||
name: WEB_SEARCH_TOOL_NAME,
|
||||
searchHint: 'search the web for current information',
|
||||
maxResultSizeChars: 100_000,
|
||||
shouldDefer: true,
|
||||
async description(input) {
|
||||
return `Claude wants to search the web for: ${input.query}`
|
||||
},
|
||||
userFacingName() {
|
||||
return 'Web Search'
|
||||
},
|
||||
getToolUseSummary,
|
||||
getActivityDescription(input) {
|
||||
const summary = getToolUseSummary(input)
|
||||
return summary ? `Searching for ${summary}` : 'Searching the web'
|
||||
},
|
||||
isEnabled() {
|
||||
// Always enabled — the adapter factory selects the appropriate backend
|
||||
// (API server-side search or Bing fallback) based on provider capabilities.
|
||||
return true
|
||||
},
|
||||
get inputSchema(): InputSchema {
|
||||
return inputSchema()
|
||||
},
|
||||
get outputSchema(): OutputSchema {
|
||||
return outputSchema()
|
||||
},
|
||||
isConcurrencySafe() {
|
||||
return true
|
||||
},
|
||||
isReadOnly() {
|
||||
return true
|
||||
},
|
||||
toAutoClassifierInput(input) {
|
||||
return input.query
|
||||
},
|
||||
async checkPermissions(_input): Promise<PermissionResult> {
|
||||
return {
|
||||
behavior: 'passthrough',
|
||||
message: 'WebSearchTool requires permission.',
|
||||
suggestions: [
|
||||
{
|
||||
type: 'addRules',
|
||||
rules: [{ toolName: WEB_SEARCH_TOOL_NAME }],
|
||||
behavior: 'allow',
|
||||
destination: 'localSettings',
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
async prompt() {
|
||||
return getWebSearchPrompt()
|
||||
},
|
||||
renderToolUseMessage,
|
||||
renderToolUseProgressMessage,
|
||||
renderToolResultMessage,
|
||||
extractSearchText() {
|
||||
return ''
|
||||
},
|
||||
async validateInput(input) {
|
||||
const { query, allowed_domains, blocked_domains } = input
|
||||
if (!query.length) {
|
||||
return {
|
||||
result: false,
|
||||
message: 'Error: Missing query',
|
||||
errorCode: 1,
|
||||
}
|
||||
}
|
||||
if (allowed_domains?.length && blocked_domains?.length) {
|
||||
return {
|
||||
result: false,
|
||||
message:
|
||||
'Error: Cannot specify both allowed_domains and blocked_domains in the same request',
|
||||
errorCode: 2,
|
||||
}
|
||||
}
|
||||
return { result: true }
|
||||
},
|
||||
async call(input, context, _canUseTool, _parentMessage, onProgress) {
|
||||
const startTime = performance.now()
|
||||
const { query } = input
|
||||
|
||||
const adapter = createAdapter()
|
||||
const adapterResults = await adapter.search(query, {
|
||||
allowedDomains: input.allowed_domains,
|
||||
blockedDomains: input.blocked_domains,
|
||||
signal: context.abortController.signal,
|
||||
onProgress(progress) {
|
||||
if (onProgress) {
|
||||
const progressCounter = Date.now()
|
||||
onProgress({
|
||||
toolUseID: `search-progress-${progressCounter}`,
|
||||
data: progress,
|
||||
})
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const endTime = performance.now()
|
||||
const durationSeconds = (endTime - startTime) / 1000
|
||||
|
||||
// Convert adapter SearchResult[] to legacy Output format
|
||||
const results: (SearchResult | string)[] = []
|
||||
if (adapterResults.length > 0) {
|
||||
results.push({
|
||||
tool_use_id: 'adapter-search-1',
|
||||
content: adapterResults.map(r => ({ title: r.title, url: r.url, snippet: r.snippet })),
|
||||
})
|
||||
} else {
|
||||
results.push('No search results found.')
|
||||
}
|
||||
|
||||
const data: Output = {
|
||||
query,
|
||||
results,
|
||||
durationSeconds,
|
||||
}
|
||||
return { data }
|
||||
},
|
||||
mapToolResultToToolResultBlockParam(output, toolUseID) {
|
||||
const { query, results } = output
|
||||
|
||||
let formattedOutput = `Web search results for query: "${query}"\n\n`
|
||||
|
||||
;(results ?? []).forEach(result => {
|
||||
if (result == null) {
|
||||
return
|
||||
}
|
||||
if (typeof result === 'string') {
|
||||
formattedOutput += result + '\n\n'
|
||||
} else {
|
||||
if (result.content?.length > 0) {
|
||||
formattedOutput += 'Links:\n'
|
||||
for (const link of result.content) {
|
||||
formattedOutput += ` - [${link.title}](${link.url})`
|
||||
if (link.snippet) {
|
||||
formattedOutput += `: ${link.snippet}`
|
||||
}
|
||||
formattedOutput += '\n'
|
||||
}
|
||||
formattedOutput += '\n'
|
||||
} else {
|
||||
formattedOutput += 'No links found.\n\n'
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
formattedOutput +=
|
||||
'\nREMINDER: You MUST include the sources above in your response to the user using markdown hyperlinks.'
|
||||
|
||||
return {
|
||||
tool_use_id: toolUseID,
|
||||
type: 'tool_result',
|
||||
content: formattedOutput.trim(),
|
||||
}
|
||||
},
|
||||
} satisfies ToolDef<InputSchema, Output, WebSearchProgress>)
|
||||
@@ -0,0 +1,59 @@
|
||||
import { afterEach, describe, expect, mock, test } from 'bun:test'
|
||||
|
||||
let isFirstPartyBaseUrl = true
|
||||
|
||||
// Only mock the external dependency that controls adapter selection
|
||||
mock.module('src/utils/model/providers.js', () => ({
|
||||
isFirstPartyAnthropicBaseUrl: () => isFirstPartyBaseUrl,
|
||||
}))
|
||||
|
||||
const { createAdapter } = await import('../adapters/index')
|
||||
|
||||
const originalWebSearchAdapter = process.env.WEB_SEARCH_ADAPTER
|
||||
|
||||
afterEach(() => {
|
||||
isFirstPartyBaseUrl = true
|
||||
|
||||
if (originalWebSearchAdapter === undefined) {
|
||||
delete process.env.WEB_SEARCH_ADAPTER
|
||||
} else {
|
||||
process.env.WEB_SEARCH_ADAPTER = originalWebSearchAdapter
|
||||
}
|
||||
})
|
||||
|
||||
describe('createAdapter', () => {
|
||||
test('reuses the same instance when the selected backend does not change', () => {
|
||||
process.env.WEB_SEARCH_ADAPTER = 'brave'
|
||||
|
||||
const firstAdapter = createAdapter()
|
||||
const secondAdapter = createAdapter()
|
||||
|
||||
expect(firstAdapter).toBe(secondAdapter)
|
||||
expect(firstAdapter.constructor.name).toBe('BraveSearchAdapter')
|
||||
})
|
||||
|
||||
test('rebuilds the adapter when WEB_SEARCH_ADAPTER changes', () => {
|
||||
process.env.WEB_SEARCH_ADAPTER = 'brave'
|
||||
const braveAdapter = createAdapter()
|
||||
|
||||
process.env.WEB_SEARCH_ADAPTER = 'bing'
|
||||
const bingAdapter = createAdapter()
|
||||
|
||||
expect(bingAdapter).not.toBe(braveAdapter)
|
||||
expect(bingAdapter.constructor.name).toBe('BingSearchAdapter')
|
||||
})
|
||||
|
||||
test('selects the API adapter for first-party Anthropic URLs', () => {
|
||||
delete process.env.WEB_SEARCH_ADAPTER
|
||||
isFirstPartyBaseUrl = true
|
||||
|
||||
expect(createAdapter().constructor.name).toBe('ApiSearchAdapter')
|
||||
})
|
||||
|
||||
test('selects the Bing adapter for third-party Anthropic base URLs', () => {
|
||||
delete process.env.WEB_SEARCH_ADAPTER
|
||||
isFirstPartyBaseUrl = false
|
||||
|
||||
expect(createAdapter().constructor.name).toBe('BingSearchAdapter')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Integration test for BingSearchAdapter — hits the real Bing search.
|
||||
*
|
||||
* Usage:
|
||||
* bun run src/tools/WebSearchTool/__tests__/bingAdapter.integration.ts
|
||||
*
|
||||
* Optional env vars:
|
||||
* BING_QUERY — search query (default: "Claude AI Anthropic")
|
||||
*/
|
||||
|
||||
// Provide MACRO globals needed by the codebase when running outside dev mode
|
||||
if (!globalThis.MACRO) {
|
||||
globalThis.MACRO = { VERSION: '0.0.0-test', BUILD_TIME: '0' } as any
|
||||
}
|
||||
|
||||
import { BingSearchAdapter, extractBingResults } from '../adapters/bingAdapter'
|
||||
|
||||
const query = process.env.BING_QUERY || 'Claude AI Anthropic'
|
||||
|
||||
async function main() {
|
||||
console.log(`\n🔍 Searching Bing for: "${query}"\n`)
|
||||
|
||||
const adapter = new BingSearchAdapter()
|
||||
const startTime = Date.now()
|
||||
|
||||
const results = await adapter.search(query, {
|
||||
onProgress: (p) => {
|
||||
if (p.type === 'query_update') {
|
||||
console.log(` → Query sent: ${p.query}`)
|
||||
}
|
||||
if (p.type === 'search_results_received') {
|
||||
console.log(` → Received ${p.resultCount} results`)
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const elapsed = Date.now() - startTime
|
||||
console.log(`\n✅ Done in ${elapsed}ms — ${results.length} result(s)\n`)
|
||||
|
||||
if (results.length === 0) {
|
||||
console.log('⚠️ No results returned. Possible causes:')
|
||||
console.log(' - Bing returned a CAPTCHA or rate-limited the request')
|
||||
console.log(' - Network/firewall issue')
|
||||
console.log(' - Bing HTML structure changed')
|
||||
console.log(' - Anti-bot detection triggered\n')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
for (const [i, r] of results.entries()) {
|
||||
console.log(` ${i + 1}. ${r.title}`)
|
||||
console.log(` ${r.url}`)
|
||||
if (r.snippet) {
|
||||
const snippet = r.snippet.replace(/\n/g, ' ')
|
||||
console.log(` ${snippet.slice(0, 150)}${snippet.length > 150 ? '…' : ''}`)
|
||||
}
|
||||
console.log()
|
||||
}
|
||||
|
||||
// Validate result structure
|
||||
let passed = true
|
||||
for (const [i, r] of results.entries()) {
|
||||
if (!r.title || typeof r.title !== 'string') {
|
||||
console.error(`❌ Result ${i + 1}: missing or non-string title`, r)
|
||||
passed = false
|
||||
}
|
||||
if (!r.url || !r.url.startsWith('http')) {
|
||||
console.error(`❌ Result ${i + 1}: missing or non-http url`, r)
|
||||
passed = false
|
||||
}
|
||||
}
|
||||
|
||||
if (passed) {
|
||||
console.log('✅ All results have valid structure.\n')
|
||||
} else {
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error('❌ Fatal error:', e)
|
||||
process.exit(1)
|
||||
})
|
||||
@@ -0,0 +1,499 @@
|
||||
import { describe, expect, mock, test } from 'bun:test'
|
||||
import { extractBingResults, decodeHtmlEntities } from '../adapters/bingAdapter'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// decodeHtmlEntities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('decodeHtmlEntities', () => {
|
||||
test('decodes common named entities', () => {
|
||||
expect(decodeHtmlEntities('& < >')).toBe('& < >')
|
||||
})
|
||||
|
||||
test('decodes quote entities', () => {
|
||||
expect(decodeHtmlEntities('"hello"')).toBe('"hello"')
|
||||
})
|
||||
|
||||
test('decodes numeric and hex apostrophe entities', () => {
|
||||
expect(decodeHtmlEntities(''it's')).toBe("'it's")
|
||||
})
|
||||
|
||||
test('decodes to non-breaking space (\\u00A0)', () => {
|
||||
expect(decodeHtmlEntities('a b')).toBe('a\u00A0b')
|
||||
})
|
||||
|
||||
test('returns plain text unchanged', () => {
|
||||
expect(decodeHtmlEntities('hello world')).toBe('hello world')
|
||||
})
|
||||
|
||||
test('handles empty string', () => {
|
||||
expect(decodeHtmlEntities('')).toBe('')
|
||||
})
|
||||
|
||||
test('decodes multiple occurrences of the same entity', () => {
|
||||
expect(decodeHtmlEntities('a&b&c')).toBe('a&b&c')
|
||||
})
|
||||
|
||||
test('handles mixed entities in one string', () => {
|
||||
expect(decodeHtmlEntities('<a href="x">')).toBe('<a\u00A0href="x">')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// extractBingResults
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('extractBingResults', () => {
|
||||
test('extracts results from standard Bing HTML', () => {
|
||||
const html = `
|
||||
<ol id="b_results">
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com/page1" h="ID=SERP,1">Example Title 1</a></h2>
|
||||
<div class="b_caption">
|
||||
<p class="b_lineclamp">First result description</p>
|
||||
</div>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com/page2" h="ID=SERP,2">Example Title 2</a></h2>
|
||||
<div class="b_caption">
|
||||
<p class="b_lineclamp">Second result description</p>
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(2)
|
||||
expect(results[0]).toEqual({
|
||||
title: 'Example Title 1',
|
||||
url: 'https://example.com/page1',
|
||||
snippet: 'First result description',
|
||||
})
|
||||
expect(results[1]).toEqual({
|
||||
title: 'Example Title 2',
|
||||
url: 'https://example.com/page2',
|
||||
snippet: 'Second result description',
|
||||
})
|
||||
})
|
||||
|
||||
test('returns empty array when no b_algo blocks exist', () => {
|
||||
const html = `
|
||||
<ol id="b_results">
|
||||
<li class="b_ad">Ad result</li>
|
||||
<li class="b_ans">Answer card</li>
|
||||
</ol>
|
||||
`
|
||||
expect(extractBingResults(html)).toEqual([])
|
||||
})
|
||||
|
||||
test('returns empty array for empty HTML', () => {
|
||||
expect(extractBingResults('')).toEqual([])
|
||||
})
|
||||
|
||||
test('returns empty array for unrelated HTML', () => {
|
||||
expect(extractBingResults('<html><body>Hello</body></html>')).toEqual([])
|
||||
})
|
||||
|
||||
test('skips Bing-internal links', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="/search?q=more">More results</a></h2>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://www.bing.com/videos">Bing Videos</a></h2>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="#anchor">Jump link</a></h2>
|
||||
</li>
|
||||
`
|
||||
expect(extractBingResults(html)).toEqual([])
|
||||
})
|
||||
|
||||
test('strips HTML tags from titles', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Result with <strong>bold</strong> and <em>italic</em></a></h2>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].title).toBe('Result with bold and italic')
|
||||
})
|
||||
|
||||
test('decodes HTML entities in titles', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Tom & Jerry <cartoon></a></h2>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].title).toBe('Tom & Jerry <cartoon>')
|
||||
})
|
||||
|
||||
test('extracts snippet from b_lineclamp class', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Title</a></h2>
|
||||
<p class="b_lineclamp3 b_algo_slug">Lineclamp snippet text here</p>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].snippet).toBe('Lineclamp snippet text here')
|
||||
})
|
||||
|
||||
test('extracts snippet from b_caption paragraph fallback', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Title</a></h2>
|
||||
<div class="b_caption">
|
||||
<p>Caption paragraph text</p>
|
||||
</div>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].snippet).toBe('Caption paragraph text')
|
||||
})
|
||||
|
||||
test('extracts snippet from b_caption div fallback', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Title</a></h2>
|
||||
<div class="b_caption">Direct caption text without p tag</div>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].snippet).toBe('Direct caption text without p tag')
|
||||
})
|
||||
|
||||
test('returns undefined snippet when no caption exists', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Title Only</a></h2>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].snippet).toBeUndefined()
|
||||
})
|
||||
|
||||
test('handles mixed result types and only extracts b_algo', () => {
|
||||
const html = `
|
||||
<ol id="b_results">
|
||||
<li class="b_ad"><h2><a href="https://ad.com">Ad Title</a></h2></li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://real-result.com">Real Result</a></h2>
|
||||
<p class="b_lineclamp">A real snippet</p>
|
||||
</li>
|
||||
<li class="b_ans"><div>People also ask</div></li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://another.com">Another Result</a></h2>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(2)
|
||||
expect(results[0].title).toBe('Real Result')
|
||||
expect(results[1].title).toBe('Another Result')
|
||||
})
|
||||
|
||||
test('skips b_algo blocks without h2 > a structure', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<div>No link here</div>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Valid Result</a></h2>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].title).toBe('Valid Result')
|
||||
})
|
||||
|
||||
test('handles extra whitespace in h2 > a structure', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2>
|
||||
<a href="https://example.com" h="ID=SERP,1" >
|
||||
Whitespace Title
|
||||
</a>
|
||||
</h2>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].title).toBe('Whitespace Title')
|
||||
})
|
||||
|
||||
test('handles snippet with HTML entities', () => {
|
||||
const html = `
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com">Title</a></h2>
|
||||
<p class="b_lineclamp">5 < 10 & 10 > 5</p>
|
||||
</li>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results[0].snippet).toBe('5 < 10 & 10 > 5')
|
||||
})
|
||||
|
||||
test('handles real-world Bing HTML structure', () => {
|
||||
const html = `
|
||||
<ol id="b_results" role="main">
|
||||
<li class="b_algo" data-id="">
|
||||
<div class="b_title">
|
||||
<h2>
|
||||
<a href="https://docs.python.org/3/tutorial/index.html" target="_blank" h="ID=SERP,5125.1">
|
||||
Python Tutorial
|
||||
</a>
|
||||
</h2>
|
||||
</div>
|
||||
<div class="b_caption">
|
||||
<div class="b_attribution" u="0|5125|4976674477245">
|
||||
<cite>https://docs.python.org</cite>
|
||||
</div>
|
||||
<p class="b_lineclamp3">
|
||||
Welcome to the Python Tutorial. This tutorial introduces you to the basic concepts and features...
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2>
|
||||
<a href="https://realpython.com/python-guide/" h="ID=SERP,5125.2">
|
||||
Real Python Guide
|
||||
</a>
|
||||
</h2>
|
||||
<div class="b_caption">
|
||||
<div class="b_attribution">
|
||||
<cite>https://realpython.com</cite>
|
||||
</div>
|
||||
<p>
|
||||
The ultimate Python guide for beginners and experts alike.
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
const results = extractBingResults(html)
|
||||
expect(results).toHaveLength(2)
|
||||
expect(results[0].title).toBe('Python Tutorial')
|
||||
expect(results[0].url).toBe('https://docs.python.org/3/tutorial/index.html')
|
||||
expect(results[0].snippet).toContain('Welcome to the Python Tutorial')
|
||||
expect(results[1].title).toBe('Real Python Guide')
|
||||
expect(results[1].snippet).toContain('ultimate Python guide')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// BingSearchAdapter.search (integration with mocked axios)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('BingSearchAdapter.search', () => {
|
||||
// Dynamic import so mock.module() takes effect
|
||||
const createAdapter = async () => {
|
||||
const { BingSearchAdapter } = await import('../adapters/bingAdapter')
|
||||
return new BingSearchAdapter()
|
||||
}
|
||||
|
||||
const SAMPLE_HTML = `
|
||||
<ol id="b_results">
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com/result1">Result One</a></h2>
|
||||
<p class="b_lineclamp">Snippet one</p>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example.com/result2">Result Two</a></h2>
|
||||
<p class="b_lineclamp">Snippet two</p>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
|
||||
test('returns parsed results from fetched HTML', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: SAMPLE_HTML })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test query', {})
|
||||
expect(results).toHaveLength(2)
|
||||
expect(results[0].title).toBe('Result One')
|
||||
expect(results[1].title).toBe('Result Two')
|
||||
})
|
||||
|
||||
test('calls onProgress with query_update and search_results_received', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: SAMPLE_HTML })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const progressCalls: any[] = []
|
||||
const onProgress = (p: any) => progressCalls.push(p)
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await adapter.search('test', { onProgress })
|
||||
|
||||
expect(progressCalls).toHaveLength(2)
|
||||
expect(progressCalls[0].type).toBe('query_update')
|
||||
expect(progressCalls[0].query).toBe('test')
|
||||
expect(progressCalls[1].type).toBe('search_results_received')
|
||||
expect(progressCalls[1].resultCount).toBe(2)
|
||||
})
|
||||
|
||||
test('filters results by allowedDomains', async () => {
|
||||
const mixedHtml = `
|
||||
<ol id="b_results">
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://allowed.com/a">Allowed Result</a></h2>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://blocked.com/b">Blocked Result</a></h2>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: mixedHtml })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
allowedDomains: ['allowed.com'],
|
||||
})
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://allowed.com/a')
|
||||
})
|
||||
|
||||
test('filters results by blockedDomains', async () => {
|
||||
const mixedHtml = `
|
||||
<ol id="b_results">
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://good.com/a">Good Result</a></h2>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://spam.com/b">Spam Result</a></h2>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: mixedHtml })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
blockedDomains: ['spam.com'],
|
||||
})
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://good.com/a')
|
||||
})
|
||||
|
||||
test('filters subdomains with allowedDomains', async () => {
|
||||
const html = `
|
||||
<ol id="b_results">
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://docs.example.com/page">Subdomain Result</a></h2>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://other.com/page">Other Result</a></h2>
|
||||
</li>
|
||||
</ol>
|
||||
`
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: html })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
allowedDomains: ['example.com'],
|
||||
})
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://docs.example.com/page')
|
||||
})
|
||||
|
||||
test('throws AbortError when signal is already aborted', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock((_url: string, config: any) => {
|
||||
if (config?.signal?.aborted) {
|
||||
const err = new Error('canceled')
|
||||
;(err as any).__CANCEL__ = true
|
||||
return Promise.reject(err)
|
||||
}
|
||||
return Promise.resolve({ data: SAMPLE_HTML })
|
||||
}),
|
||||
isCancel: (e: any) => e?.__CANCEL__ === true,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const controller = new AbortController()
|
||||
controller.abort()
|
||||
|
||||
const { AbortError } = await import('src/utils/errors')
|
||||
await expect(
|
||||
adapter.search('test', { signal: controller.signal }),
|
||||
).rejects.toThrow(AbortError)
|
||||
})
|
||||
|
||||
test('re-throws non-abort axios errors', async () => {
|
||||
const networkError = new Error('Network error')
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.reject(networkError)),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await expect(adapter.search('test', {})).rejects.toThrow('Network error')
|
||||
})
|
||||
|
||||
test('encodes query parameter in URL', async () => {
|
||||
const axiosGet = mock(() => Promise.resolve({ data: SAMPLE_HTML }))
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: axiosGet,
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
mock.module('src/utils/http', () => ({
|
||||
getWebFetchUserAgent: () => 'TestAgent/1.0',
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await adapter.search('hello world & special=chars', {})
|
||||
|
||||
const calledUrl = (axiosGet.mock.calls as string[][])[0][0]
|
||||
expect(calledUrl).toContain('q=hello%20world%20%26%20special%3Dchars')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,106 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
import { extractBraveResults } from '../adapters/braveAdapter'
|
||||
|
||||
describe('extractBraveResults', () => {
|
||||
test('extracts generic grounding results', () => {
|
||||
const results = extractBraveResults({
|
||||
grounding: {
|
||||
generic: [
|
||||
{
|
||||
title: 'Example Title 1',
|
||||
url: 'https://example.com/page1',
|
||||
snippets: ['First result description'],
|
||||
},
|
||||
{
|
||||
title: 'Example Title 2',
|
||||
url: 'https://example.com/page2',
|
||||
snippets: ['Second result description'],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
expect(results).toEqual([
|
||||
{
|
||||
title: 'Example Title 1',
|
||||
url: 'https://example.com/page1',
|
||||
snippet: 'First result description',
|
||||
},
|
||||
{
|
||||
title: 'Example Title 2',
|
||||
url: 'https://example.com/page2',
|
||||
snippet: 'Second result description',
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
test('combines generic, poi, and map grounding results', () => {
|
||||
const results = extractBraveResults({
|
||||
grounding: {
|
||||
generic: [{ title: 'Generic', url: 'https://example.com/generic' }],
|
||||
poi: { title: 'POI', url: 'https://maps.example.com/poi' },
|
||||
map: [{ title: 'Map', url: 'https://maps.example.com/map' }],
|
||||
},
|
||||
})
|
||||
|
||||
expect(results).toEqual([
|
||||
{ title: 'Generic', url: 'https://example.com/generic', snippet: undefined },
|
||||
{ title: 'POI', url: 'https://maps.example.com/poi', snippet: undefined },
|
||||
{ title: 'Map', url: 'https://maps.example.com/map', snippet: undefined },
|
||||
])
|
||||
})
|
||||
|
||||
test('joins multiple snippets into one summary string', () => {
|
||||
const results = extractBraveResults({
|
||||
grounding: {
|
||||
generic: [
|
||||
{
|
||||
title: 'Joined Snippets',
|
||||
url: 'https://example.com/joined',
|
||||
snippets: ['First snippet.', 'Second snippet.'],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
expect(results[0].snippet).toBe('First snippet. Second snippet.')
|
||||
})
|
||||
|
||||
test('skips entries without a title or URL', () => {
|
||||
const results = extractBraveResults({
|
||||
grounding: {
|
||||
generic: [
|
||||
{ title: 'Missing URL' },
|
||||
{ url: 'https://example.com/missing-title' },
|
||||
{ title: 'Valid', url: 'https://example.com/valid' },
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
expect(results).toEqual([
|
||||
{ title: 'Valid', url: 'https://example.com/valid', snippet: undefined },
|
||||
])
|
||||
})
|
||||
|
||||
test('deduplicates repeated URLs across grounding buckets', () => {
|
||||
const results = extractBraveResults({
|
||||
grounding: {
|
||||
generic: [{ title: 'First', url: 'https://example.com/dup' }],
|
||||
poi: { title: 'Second', url: 'https://example.com/dup' },
|
||||
map: [{ title: 'Third', url: 'https://example.com/dup' }],
|
||||
},
|
||||
})
|
||||
|
||||
expect(results).toEqual([
|
||||
{ title: 'First', url: 'https://example.com/dup', snippet: undefined },
|
||||
])
|
||||
})
|
||||
|
||||
test('returns empty array when grounding is missing', () => {
|
||||
expect(extractBraveResults({})).toEqual([])
|
||||
})
|
||||
|
||||
test('returns empty array when grounding arrays are absent', () => {
|
||||
expect(extractBraveResults({ grounding: {} })).toEqual([])
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* Integration test for BraveSearchAdapter — hits Brave's LLM context API.
|
||||
*
|
||||
* Usage:
|
||||
* BRAVE_SEARCH_API_KEY=... bun run src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts
|
||||
*
|
||||
* Optional env vars:
|
||||
* BRAVE_QUERY — search query (default: "Claude AI Anthropic")
|
||||
* BRAVE_API_KEY — fallback key env var
|
||||
*/
|
||||
|
||||
if (!globalThis.MACRO) {
|
||||
globalThis.MACRO = { VERSION: '0.0.0-test', BUILD_TIME: '0' } as any
|
||||
}
|
||||
|
||||
import { BraveSearchAdapter } from '../adapters/braveAdapter'
|
||||
|
||||
const query = process.env.BRAVE_QUERY || 'Claude AI Anthropic'
|
||||
|
||||
async function main() {
|
||||
if (!process.env.BRAVE_SEARCH_API_KEY && !process.env.BRAVE_API_KEY) {
|
||||
console.error(
|
||||
'❌ Missing Brave API key. Set BRAVE_SEARCH_API_KEY or BRAVE_API_KEY.',
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
console.log(`\n🔍 Searching Brave for: "${query}"\n`)
|
||||
|
||||
const adapter = new BraveSearchAdapter()
|
||||
const startTime = Date.now()
|
||||
|
||||
const results = await adapter.search(query, {
|
||||
onProgress: p => {
|
||||
if (p.type === 'query_update') {
|
||||
console.log(` → Query sent: ${p.query}`)
|
||||
}
|
||||
if (p.type === 'search_results_received') {
|
||||
console.log(` → Received ${p.resultCount} results`)
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const elapsed = Date.now() - startTime
|
||||
console.log(`\n✅ Done in ${elapsed}ms — ${results.length} result(s)\n`)
|
||||
|
||||
if (results.length === 0) {
|
||||
console.log('⚠️ No results returned. Possible causes:')
|
||||
console.log(' - Brave returned no grounding data for the query')
|
||||
console.log(' - Network/firewall issue')
|
||||
console.log(' - Invalid or rate-limited Brave API key\n')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
for (const [i, r] of results.entries()) {
|
||||
console.log(` ${i + 1}. ${r.title}`)
|
||||
console.log(` ${r.url}`)
|
||||
if (r.snippet) {
|
||||
const snippet = r.snippet.replace(/\n/g, ' ')
|
||||
console.log(
|
||||
` ${snippet.slice(0, 150)}${snippet.length > 150 ? '…' : ''}`,
|
||||
)
|
||||
}
|
||||
console.log()
|
||||
}
|
||||
|
||||
let passed = true
|
||||
for (const [i, r] of results.entries()) {
|
||||
if (!r.title || typeof r.title !== 'string') {
|
||||
console.error(`❌ Result ${i + 1}: missing or non-string title`, r)
|
||||
passed = false
|
||||
}
|
||||
if (!r.url || !r.url.startsWith('http')) {
|
||||
console.error(`❌ Result ${i + 1}: missing or non-http url`, r)
|
||||
passed = false
|
||||
}
|
||||
}
|
||||
|
||||
if (passed) {
|
||||
console.log('✅ All results have valid structure.\n')
|
||||
} else {
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
main().catch(e => {
|
||||
console.error('❌ Fatal error:', e)
|
||||
process.exit(1)
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
|
||||
|
||||
const originalBraveSearchApiKey = process.env.BRAVE_SEARCH_API_KEY
|
||||
const originalBraveApiKey = process.env.BRAVE_API_KEY
|
||||
|
||||
describe('BraveSearchAdapter.search', () => {
|
||||
const createAdapter = async () => {
|
||||
const { BraveSearchAdapter } = await import('../adapters/braveAdapter')
|
||||
return new BraveSearchAdapter()
|
||||
}
|
||||
|
||||
const SAMPLE_RESPONSE = {
|
||||
grounding: {
|
||||
generic: [
|
||||
{
|
||||
title: 'Result One',
|
||||
url: 'https://example.com/result1',
|
||||
snippets: ['Snippet one'],
|
||||
},
|
||||
{
|
||||
title: 'Result Two',
|
||||
url: 'https://example.com/result2',
|
||||
snippets: ['Snippet two'],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
process.env.BRAVE_SEARCH_API_KEY = 'test-brave-key'
|
||||
delete process.env.BRAVE_API_KEY
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
mock.restore()
|
||||
|
||||
if (originalBraveSearchApiKey === undefined) {
|
||||
delete process.env.BRAVE_SEARCH_API_KEY
|
||||
} else {
|
||||
process.env.BRAVE_SEARCH_API_KEY = originalBraveSearchApiKey
|
||||
}
|
||||
|
||||
if (originalBraveApiKey === undefined) {
|
||||
delete process.env.BRAVE_API_KEY
|
||||
} else {
|
||||
process.env.BRAVE_API_KEY = originalBraveApiKey
|
||||
}
|
||||
})
|
||||
|
||||
test('returns parsed results from Brave LLM context payload', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test query', {})
|
||||
|
||||
expect(results).toHaveLength(2)
|
||||
expect(results[0]).toEqual({
|
||||
title: 'Result One',
|
||||
url: 'https://example.com/result1',
|
||||
snippet: 'Snippet one',
|
||||
})
|
||||
expect(results[1].title).toBe('Result Two')
|
||||
})
|
||||
|
||||
test('calls onProgress with query_update and search_results_received', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const progressCalls: any[] = []
|
||||
const onProgress = (p: any) => progressCalls.push(p)
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await adapter.search('test', { onProgress })
|
||||
|
||||
expect(progressCalls).toHaveLength(2)
|
||||
expect(progressCalls[0]).toEqual({
|
||||
type: 'query_update',
|
||||
query: 'test',
|
||||
})
|
||||
expect(progressCalls[1]).toEqual({
|
||||
type: 'search_results_received',
|
||||
resultCount: 2,
|
||||
query: 'test',
|
||||
})
|
||||
})
|
||||
|
||||
test('filters results by allowedDomains', async () => {
|
||||
const mixedResponse = {
|
||||
grounding: {
|
||||
generic: [
|
||||
{ title: 'Allowed', url: 'https://allowed.com/a' },
|
||||
{ title: 'Blocked', url: 'https://blocked.com/b' },
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: mixedResponse })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
allowedDomains: ['allowed.com'],
|
||||
})
|
||||
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://allowed.com/a')
|
||||
})
|
||||
|
||||
test('filters results by blockedDomains', async () => {
|
||||
const mixedResponse = {
|
||||
grounding: {
|
||||
generic: [
|
||||
{ title: 'Good', url: 'https://good.com/a' },
|
||||
{ title: 'Spam', url: 'https://spam.com/b' },
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: mixedResponse })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
blockedDomains: ['spam.com'],
|
||||
})
|
||||
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://good.com/a')
|
||||
})
|
||||
|
||||
test('filters subdomains with allowedDomains', async () => {
|
||||
const response = {
|
||||
grounding: {
|
||||
generic: [
|
||||
{ title: 'Subdomain', url: 'https://docs.example.com/page' },
|
||||
{ title: 'Other', url: 'https://other.com/page' },
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: response })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const results = await adapter.search('test', {
|
||||
allowedDomains: ['example.com'],
|
||||
})
|
||||
|
||||
expect(results).toHaveLength(1)
|
||||
expect(results[0].url).toBe('https://docs.example.com/page')
|
||||
})
|
||||
|
||||
test('throws AbortError when signal is already aborted', async () => {
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock((_url: string, config: any) => {
|
||||
if (config?.signal?.aborted) {
|
||||
const err = new Error('canceled')
|
||||
;(err as any).__CANCEL__ = true
|
||||
return Promise.reject(err)
|
||||
}
|
||||
return Promise.resolve({ data: SAMPLE_RESPONSE })
|
||||
}),
|
||||
isCancel: (e: any) => e?.__CANCEL__ === true,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
const controller = new AbortController()
|
||||
controller.abort()
|
||||
|
||||
const { AbortError } = await import('src/utils/errors')
|
||||
await expect(
|
||||
adapter.search('test', { signal: controller.signal }),
|
||||
).rejects.toThrow(AbortError)
|
||||
})
|
||||
|
||||
test('re-throws non-abort axios errors', async () => {
|
||||
const networkError = new Error('Network error')
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.reject(networkError)),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await expect(adapter.search('test', {})).rejects.toThrow('Network error')
|
||||
})
|
||||
|
||||
test('sends the documented HTTPS endpoint with query params and auth header', async () => {
|
||||
const axiosGet = mock(() => Promise.resolve({ data: SAMPLE_RESPONSE }))
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: axiosGet,
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await adapter.search('hello world & special=chars', {})
|
||||
|
||||
expect(axiosGet.mock.calls).toHaveLength(1)
|
||||
expect((axiosGet.mock.calls as any[][])[0][0]).toBe(
|
||||
'https://api.search.brave.com/res/v1/llm/context',
|
||||
)
|
||||
expect((axiosGet.mock.calls as any[][])[0][1]).toMatchObject({
|
||||
params: { q: 'hello world & special=chars' },
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'X-Subscription-Token': 'test-brave-key',
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
test('accepts BRAVE_API_KEY as a fallback env var', async () => {
|
||||
delete process.env.BRAVE_SEARCH_API_KEY
|
||||
process.env.BRAVE_API_KEY = 'fallback-key'
|
||||
|
||||
const axiosGet = mock(() => Promise.resolve({ data: SAMPLE_RESPONSE }))
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: axiosGet,
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await adapter.search('test', {})
|
||||
|
||||
expect((axiosGet.mock.calls as any[][])[0][1].headers).toMatchObject({
|
||||
'X-Subscription-Token': 'fallback-key',
|
||||
})
|
||||
})
|
||||
|
||||
test('throws when no Brave API key is configured', async () => {
|
||||
delete process.env.BRAVE_SEARCH_API_KEY
|
||||
delete process.env.BRAVE_API_KEY
|
||||
|
||||
mock.module('axios', () => ({
|
||||
default: {
|
||||
get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })),
|
||||
isCancel: () => false,
|
||||
},
|
||||
}))
|
||||
|
||||
const adapter = await createAdapter()
|
||||
await expect(adapter.search('test', {})).rejects.toThrow(
|
||||
'BraveSearchAdapter requires BRAVE_SEARCH_API_KEY or BRAVE_API_KEY',
|
||||
)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,173 @@
|
||||
/**
|
||||
* API-based search adapter — delegates to Anthropic's server-side
|
||||
* web_search_20250305 tool via a secondary API call.
|
||||
*/
|
||||
|
||||
import type {
|
||||
BetaContentBlock,
|
||||
BetaWebSearchTool20250305,
|
||||
} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
|
||||
import { queryModelWithStreaming } from 'src/services/api/claude.js'
|
||||
import { createUserMessage } from 'src/utils/messages.js'
|
||||
import { getMainLoopModel, getSmallFastModel } from 'src/utils/model/model.js'
|
||||
import { jsonParse } from 'src/utils/slowOperations.js'
|
||||
import { asSystemPrompt } from 'src/utils/systemPromptType.js'
|
||||
import type { SearchResult, SearchOptions, WebSearchAdapter } from './types.js'
|
||||
|
||||
function makeToolSchema(input: { allowedDomains?: string[]; blockedDomains?: string[] }): BetaWebSearchTool20250305 {
|
||||
return {
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search',
|
||||
allowed_domains: input.allowedDomains,
|
||||
blocked_domains: input.blockedDomains,
|
||||
max_uses: 8,
|
||||
}
|
||||
}
|
||||
|
||||
export class ApiSearchAdapter implements WebSearchAdapter {
|
||||
async search(
|
||||
query: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResult[]> {
|
||||
const { signal, onProgress, allowedDomains, blockedDomains } = options
|
||||
|
||||
const userMessage = createUserMessage({
|
||||
content: 'Perform a web search for the query: ' + query,
|
||||
})
|
||||
const toolSchema = makeToolSchema({ allowedDomains, blockedDomains })
|
||||
|
||||
const useHaiku = getFeatureValue_CACHED_MAY_BE_STALE('tengu_plum_vx3', false)
|
||||
|
||||
const queryStream = queryModelWithStreaming({
|
||||
messages: [userMessage],
|
||||
systemPrompt: asSystemPrompt([
|
||||
'You are an assistant for performing a web search tool use',
|
||||
]),
|
||||
thinkingConfig: useHaiku
|
||||
? { type: 'disabled' as const }
|
||||
: { type: 'enabled' as const, budgetTokens: 10000 },
|
||||
tools: [],
|
||||
signal: signal ?? new AbortController().signal,
|
||||
options: {
|
||||
getToolPermissionContext: async () => ({
|
||||
mode: 'default' as const,
|
||||
additionalWorkingDirectories: new Map(),
|
||||
alwaysAllowRules: {},
|
||||
alwaysDenyRules: {},
|
||||
alwaysAskRules: {},
|
||||
isBypassPermissionsModeAvailable: false,
|
||||
}),
|
||||
model: useHaiku ? getSmallFastModel() : getMainLoopModel(),
|
||||
toolChoice: useHaiku ? { type: 'tool' as const, name: 'web_search' } : undefined,
|
||||
isNonInteractiveSession: false,
|
||||
hasAppendSystemPrompt: false,
|
||||
extraToolSchemas: [toolSchema],
|
||||
querySource: 'web_search_tool' as const,
|
||||
agents: [],
|
||||
mcpTools: [],
|
||||
agentId: undefined,
|
||||
effortValue: undefined,
|
||||
},
|
||||
})
|
||||
|
||||
const allContentBlocks: BetaContentBlock[] = []
|
||||
let currentToolUseId: string | null = null
|
||||
let currentToolUseJson = ''
|
||||
const toolUseQueries = new Map<string, string>()
|
||||
let progressCounter = 0
|
||||
|
||||
for await (const event of queryStream) {
|
||||
if (event.type === 'assistant') {
|
||||
const msg = event as { message: { content: BetaContentBlock[] } }
|
||||
allContentBlocks.push(...msg.message.content)
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'stream_event') {
|
||||
const streamEvt = event as {
|
||||
event?: {
|
||||
type: string
|
||||
content_block?: { type: string; id?: string; tool_use_id?: string; content?: unknown; [key: string]: unknown }
|
||||
delta?: { type: string; partial_json?: string; [key: string]: unknown }
|
||||
[key: string]: unknown
|
||||
}
|
||||
}
|
||||
|
||||
if (streamEvt.event?.type === 'content_block_start') {
|
||||
const contentBlock = streamEvt.event.content_block
|
||||
if (contentBlock && contentBlock.type === 'server_tool_use') {
|
||||
currentToolUseId = contentBlock.id as string
|
||||
currentToolUseJson = ''
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (currentToolUseId && streamEvt.event?.type === 'content_block_delta') {
|
||||
const delta = streamEvt.event.delta
|
||||
if (delta?.type === 'input_json_delta' && delta.partial_json) {
|
||||
currentToolUseJson += delta.partial_json
|
||||
try {
|
||||
const queryMatch = currentToolUseJson.match(
|
||||
/"query"\s*:\s*"((?:[^"\\]|\\.)*)"/,
|
||||
)
|
||||
if (queryMatch && queryMatch[1]) {
|
||||
const parsedQuery = jsonParse('"' + queryMatch[1] + '"')
|
||||
if (
|
||||
!toolUseQueries.has(currentToolUseId) ||
|
||||
toolUseQueries.get(currentToolUseId) !== parsedQuery
|
||||
) {
|
||||
toolUseQueries.set(currentToolUseId, parsedQuery)
|
||||
progressCounter++
|
||||
onProgress?.({
|
||||
type: 'query_update',
|
||||
query: parsedQuery,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore parsing errors for partial JSON
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (streamEvt.event?.type === 'content_block_start') {
|
||||
const contentBlock = streamEvt.event.content_block
|
||||
if (contentBlock && contentBlock.type === 'web_search_tool_result') {
|
||||
const toolUseId = contentBlock.tool_use_id as string
|
||||
const actualQuery = toolUseQueries.get(toolUseId) || query
|
||||
const content = contentBlock.content
|
||||
progressCounter++
|
||||
onProgress?.({
|
||||
type: 'search_results_received',
|
||||
resultCount: Array.isArray(content) ? content.length : 0,
|
||||
query: actualQuery,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract SearchResult[] from content blocks
|
||||
return extractSearchResults(allContentBlocks)
|
||||
}
|
||||
}
|
||||
|
||||
function extractSearchResults(
|
||||
blocks: BetaContentBlock[],
|
||||
): SearchResult[] {
|
||||
const results: SearchResult[] = []
|
||||
|
||||
for (const block of blocks) {
|
||||
if (block.type === 'web_search_tool_result' && Array.isArray(block.content)) {
|
||||
for (const r of block.content as Array<{ title: string; url: string; page_age?: string; type?: string }>) {
|
||||
results.push({
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
/**
|
||||
* Bing-based search adapter — fetches Bing search pages and extracts
|
||||
* search results using regex pattern matching on raw HTML.
|
||||
*/
|
||||
|
||||
import axios from 'axios'
|
||||
import he from 'he'
|
||||
import { AbortError } from 'src/utils/errors.js'
|
||||
import type { SearchResult, SearchOptions, WebSearchAdapter } from './types.js'
|
||||
|
||||
const FETCH_TIMEOUT_MS = 30_000
|
||||
|
||||
/**
|
||||
* Browser-like headers to avoid Bing's anti-bot JS-rendered response.
|
||||
* These mimic Microsoft Edge on macOS to get full HTML search results.
|
||||
*/
|
||||
const BROWSER_HEADERS = {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
Pragma: 'no-cache',
|
||||
'Sec-Ch-Ua': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'Sec-Ch-Ua-Mobile': '?0',
|
||||
'Sec-Ch-Ua-Platform': '"macOS"',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
} as const
|
||||
|
||||
export class BingSearchAdapter implements WebSearchAdapter {
|
||||
async search(
|
||||
query: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResult[]> {
|
||||
const { signal, onProgress, allowedDomains, blockedDomains } = options
|
||||
|
||||
if (signal?.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
|
||||
onProgress?.({ type: 'query_update', query })
|
||||
|
||||
const url = `https://www.bing.com/search?q=${encodeURIComponent(query)}&setmkt=en-US`
|
||||
|
||||
const abortController = new AbortController()
|
||||
if (signal) {
|
||||
signal.addEventListener('abort', () => abortController.abort(), { once: true })
|
||||
}
|
||||
|
||||
let html: string
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
signal: abortController.signal,
|
||||
timeout: FETCH_TIMEOUT_MS,
|
||||
responseType: 'text',
|
||||
headers: BROWSER_HEADERS,
|
||||
})
|
||||
html = response.data
|
||||
} catch (e) {
|
||||
if (axios.isCancel(e) || abortController.signal.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
throw e
|
||||
}
|
||||
|
||||
if (abortController.signal.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
|
||||
const rawResults = extractBingResults(html)
|
||||
|
||||
// Client-side domain filtering
|
||||
const results = rawResults.filter((r) => {
|
||||
if (!r.url) return false
|
||||
try {
|
||||
const hostname = new URL(r.url).hostname
|
||||
if (allowedDomains?.length && !allowedDomains.some(d => hostname === d || hostname.endsWith('.' + d))) {
|
||||
return false
|
||||
}
|
||||
if (blockedDomains?.length && blockedDomains.some(d => hostname === d || hostname.endsWith('.' + d))) {
|
||||
return false
|
||||
}
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
onProgress?.({
|
||||
type: 'search_results_received',
|
||||
resultCount: results.length,
|
||||
query,
|
||||
})
|
||||
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract organic search results from Bing HTML.
|
||||
* Bing results live in <li class="b_algo"> blocks within <ol id="b_results">.
|
||||
*/
|
||||
export function extractBingResults(html: string): SearchResult[] {
|
||||
const results: SearchResult[] = []
|
||||
|
||||
const algoBlockRegex = /<li\s+class="b_algo"[^>]*>([\s\S]*?)<\/li>/gi
|
||||
let blockMatch: RegExpExecArray | null
|
||||
|
||||
while ((blockMatch = algoBlockRegex.exec(html)) !== null) {
|
||||
const block = blockMatch[1]
|
||||
|
||||
// Extract the primary link from <h2><a href="...">...</a></h2>
|
||||
const h2LinkRegex = /<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i
|
||||
const linkMatch = h2LinkRegex.exec(block)
|
||||
if (!linkMatch) continue
|
||||
|
||||
const rawUrl = decodeHtmlEntities(linkMatch[1])
|
||||
const titleHtml = linkMatch[2]
|
||||
|
||||
// Resolve Bing redirect URLs (bing.com/ck/a?...&u=a1aHR0cHM6Ly9...)
|
||||
// or skip Bing-internal / relative links
|
||||
const url = resolveBingUrl(rawUrl)
|
||||
if (!url) continue
|
||||
|
||||
const title = decodeHtmlEntities(
|
||||
titleHtml.replace(/<[^>]+>/g, '').trim(),
|
||||
)
|
||||
|
||||
// Extract snippet: try b_lineclamp → b_caption <p> → b_caption fallback
|
||||
const snippet = extractSnippet(block)
|
||||
|
||||
results.push({ title, url, snippet })
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
function extractSnippet(block: string): string | undefined {
|
||||
// 1. Try <p class="b_lineclamp...">
|
||||
const lineclampRegex = /<p[^>]*class="b_lineclamp[^"]*"[^>]*>([\s\S]*?)<\/p>/i
|
||||
let match = lineclampRegex.exec(block)
|
||||
if (match) {
|
||||
return decodeHtmlEntities(match[1].replace(/<[^>]+>/g, '').trim())
|
||||
}
|
||||
|
||||
// 2. Try <p> inside b_caption
|
||||
const captionPRegex = /<div[^>]*class="b_caption[^"]*"[^>]*>[\s\S]*?<p[^>]*>([\s\S]*?)<\/p>/i
|
||||
match = captionPRegex.exec(block)
|
||||
if (match) {
|
||||
return decodeHtmlEntities(match[1].replace(/<[^>]+>/g, '').trim())
|
||||
}
|
||||
|
||||
// 3. Fallback: any text inside b_caption <div>
|
||||
const fallbackRegex = /<div[^>]*class="b_caption[^"]*"[^>]*>([\s\S]*?)<\/div>/i
|
||||
const fallbackMatch = fallbackRegex.exec(block)
|
||||
if (fallbackMatch) {
|
||||
const text = fallbackMatch[1].replace(/<[^>]+>/g, '').trim()
|
||||
if (text) return decodeHtmlEntities(text)
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
export const decodeHtmlEntities = he.decode
|
||||
|
||||
/**
|
||||
* Resolve a Bing redirect URL to the actual target URL.
|
||||
* Bing uses URLs like: https://www.bing.com/ck/a?...&u=a1aHR0cHM6Ly9leGFtcGxlLmNvbQ...
|
||||
* The `u` query parameter is a base64-encoded URL prefixed with a1 (https) or a0 (http).
|
||||
* Returns `undefined` for Bing-internal or relative links that should be skipped.
|
||||
*/
|
||||
export function resolveBingUrl(rawUrl: string): string | undefined {
|
||||
// Skip relative / anchor links
|
||||
if (rawUrl.startsWith('/') || rawUrl.startsWith('#')) return undefined
|
||||
|
||||
// Try to extract the `u` parameter from Bing redirect URLs
|
||||
const uMatch = rawUrl.match(/[?&]u=([a-zA-Z0-9+/_=-]+)/)
|
||||
if (uMatch) {
|
||||
const encoded = uMatch[1]
|
||||
if (encoded.length >= 3) {
|
||||
const prefix = encoded.slice(0, 2)
|
||||
const b64 = encoded.slice(2)
|
||||
try {
|
||||
// Base64url decode (pad as needed)
|
||||
const padded = b64.replace(/-/g, '+').replace(/_/g, '/')
|
||||
const decoded = Buffer.from(padded, 'base64').toString('utf-8')
|
||||
if (decoded.startsWith('http')) return decoded
|
||||
} catch {
|
||||
// Fall through — not a valid base64 redirect
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Direct external URL (not a Bing-internal page)
|
||||
if (!rawUrl.includes('bing.com')) return rawUrl
|
||||
|
||||
return undefined
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Brave-based search adapter — fetches Brave's LLM context API and maps the
|
||||
* grounding payload into SearchResult objects.
|
||||
*/
|
||||
|
||||
import axios from 'axios'
|
||||
import { AbortError } from 'src/utils/errors.js'
|
||||
import type { SearchResult, SearchOptions, WebSearchAdapter } from './types.js'
|
||||
|
||||
const FETCH_TIMEOUT_MS = 30_000
|
||||
const BRAVE_LLM_CONTEXT_URL = 'https://api.search.brave.com/res/v1/llm/context'
|
||||
const BRAVE_API_KEY_ENV_VARS = ['BRAVE_SEARCH_API_KEY', 'BRAVE_API_KEY'] as const
|
||||
|
||||
interface BraveGroundingResult {
|
||||
title?: string
|
||||
url?: string
|
||||
snippets?: string[]
|
||||
}
|
||||
|
||||
interface BraveSearchResponse {
|
||||
grounding?: {
|
||||
generic?: BraveGroundingResult[]
|
||||
map?: BraveGroundingResult[]
|
||||
poi?: BraveGroundingResult | null
|
||||
}
|
||||
}
|
||||
|
||||
export class BraveSearchAdapter implements WebSearchAdapter {
|
||||
async search(
|
||||
query: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResult[]> {
|
||||
const { signal, onProgress, allowedDomains, blockedDomains } = options
|
||||
|
||||
if (signal?.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
|
||||
onProgress?.({ type: 'query_update', query })
|
||||
|
||||
const abortController = new AbortController()
|
||||
if (signal) {
|
||||
signal.addEventListener('abort', () => abortController.abort(), {
|
||||
once: true,
|
||||
})
|
||||
}
|
||||
|
||||
let payload: BraveSearchResponse
|
||||
try {
|
||||
const response = await axios.get<BraveSearchResponse>(
|
||||
BRAVE_LLM_CONTEXT_URL,
|
||||
{
|
||||
signal: abortController.signal,
|
||||
timeout: FETCH_TIMEOUT_MS,
|
||||
responseType: 'json',
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'X-Subscription-Token': getBraveApiKey(),
|
||||
},
|
||||
params: { q: query },
|
||||
},
|
||||
)
|
||||
payload = response.data
|
||||
} catch (e) {
|
||||
if (axios.isCancel(e) || abortController.signal.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
throw e
|
||||
}
|
||||
|
||||
if (abortController.signal.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
|
||||
const rawResults = extractBraveResults(payload)
|
||||
const results = rawResults.filter(r => {
|
||||
try {
|
||||
const hostname = new URL(r.url).hostname
|
||||
if (
|
||||
allowedDomains?.length &&
|
||||
!allowedDomains.some(
|
||||
d => hostname === d || hostname.endsWith('.' + d),
|
||||
)
|
||||
) {
|
||||
return false
|
||||
}
|
||||
if (
|
||||
blockedDomains?.length &&
|
||||
blockedDomains.some(d => hostname === d || hostname.endsWith('.' + d))
|
||||
) {
|
||||
return false
|
||||
}
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
onProgress?.({
|
||||
type: 'search_results_received',
|
||||
resultCount: results.length,
|
||||
query,
|
||||
})
|
||||
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
||||
export function extractBraveResults(
|
||||
payload: BraveSearchResponse,
|
||||
): SearchResult[] {
|
||||
const grounding = payload.grounding
|
||||
if (!grounding) {
|
||||
return []
|
||||
}
|
||||
|
||||
const entries = [
|
||||
...(Array.isArray(grounding.generic) ? grounding.generic : []),
|
||||
...(grounding.poi ? [grounding.poi] : []),
|
||||
...(Array.isArray(grounding.map) ? grounding.map : []),
|
||||
]
|
||||
|
||||
const seenUrls = new Set<string>()
|
||||
const results: SearchResult[] = []
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry?.url || !entry.title || seenUrls.has(entry.url)) {
|
||||
continue
|
||||
}
|
||||
|
||||
seenUrls.add(entry.url)
|
||||
results.push({
|
||||
title: entry.title,
|
||||
url: entry.url,
|
||||
snippet: normalizeSnippet(entry.snippets),
|
||||
})
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
function normalizeSnippet(snippets: string[] | undefined): string | undefined {
|
||||
if (!Array.isArray(snippets)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const normalized = snippets
|
||||
.map(snippet => snippet.trim())
|
||||
.filter(snippet => snippet.length > 0)
|
||||
|
||||
if (normalized.length === 0) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return normalized.join(' ')
|
||||
}
|
||||
|
||||
function getBraveApiKey(): string {
|
||||
for (const envVar of BRAVE_API_KEY_ENV_VARS) {
|
||||
const value = process.env[envVar]?.trim()
|
||||
if (value) {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
'BraveSearchAdapter requires BRAVE_SEARCH_API_KEY or BRAVE_API_KEY',
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Search adapter factory — selects the appropriate backend by checking
|
||||
* whether the API base URL points to Anthropic's official endpoint.
|
||||
*/
|
||||
|
||||
import { isFirstPartyAnthropicBaseUrl } from 'src/utils/model/providers.js'
|
||||
import { ApiSearchAdapter } from './apiAdapter.js'
|
||||
import { BingSearchAdapter } from './bingAdapter.js'
|
||||
import { BraveSearchAdapter } from './braveAdapter.js'
|
||||
import type { WebSearchAdapter } from './types.js'
|
||||
|
||||
export type {
|
||||
SearchResult,
|
||||
SearchOptions,
|
||||
SearchProgress,
|
||||
WebSearchAdapter,
|
||||
} from './types.js'
|
||||
|
||||
let cachedAdapter: WebSearchAdapter | null = null
|
||||
let cachedAdapterKey: 'api' | 'bing' | 'brave' | null = null
|
||||
|
||||
export function createAdapter(): WebSearchAdapter {
|
||||
const envAdapter = process.env.WEB_SEARCH_ADAPTER
|
||||
const adapterKey =
|
||||
envAdapter === 'api' || envAdapter === 'bing' || envAdapter === 'brave'
|
||||
? envAdapter
|
||||
: isFirstPartyAnthropicBaseUrl()
|
||||
? 'api'
|
||||
: 'bing'
|
||||
|
||||
if (cachedAdapter && cachedAdapterKey === adapterKey) return cachedAdapter
|
||||
|
||||
if (adapterKey === 'api') {
|
||||
cachedAdapter = new ApiSearchAdapter()
|
||||
cachedAdapterKey = 'api'
|
||||
return cachedAdapter
|
||||
}
|
||||
if (adapterKey === 'brave') {
|
||||
cachedAdapter = new BraveSearchAdapter()
|
||||
cachedAdapterKey = 'brave'
|
||||
return cachedAdapter
|
||||
}
|
||||
|
||||
cachedAdapter = new BingSearchAdapter()
|
||||
cachedAdapterKey = 'bing'
|
||||
return cachedAdapter
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
export interface SearchResult {
|
||||
title: string
|
||||
url: string
|
||||
snippet?: string
|
||||
}
|
||||
|
||||
export interface SearchOptions {
|
||||
allowedDomains?: string[]
|
||||
blockedDomains?: string[]
|
||||
signal?: AbortSignal
|
||||
onProgress?: (progress: SearchProgress) => void
|
||||
}
|
||||
|
||||
export interface SearchProgress {
|
||||
type: 'query_update' | 'search_results_received'
|
||||
query?: string
|
||||
resultCount?: number
|
||||
}
|
||||
|
||||
export interface WebSearchAdapter {
|
||||
search(query: string, options: SearchOptions): Promise<SearchResult[]>
|
||||
}
|
||||
34
packages/builtin-tools/src/tools/WebSearchTool/prompt.ts
Normal file
34
packages/builtin-tools/src/tools/WebSearchTool/prompt.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { getLocalMonthYear } from 'src/constants/common.js'
|
||||
|
||||
export const WEB_SEARCH_TOOL_NAME = 'WebSearch'
|
||||
|
||||
export function getWebSearchPrompt(): string {
|
||||
const currentMonthYear = getLocalMonthYear()
|
||||
return `
|
||||
- Allows Claude to search the web and use the results to inform responses
|
||||
- Provides up-to-date information for current events and recent data
|
||||
- Returns search result information formatted as search result blocks, including links as markdown hyperlinks
|
||||
- Use this tool for accessing information beyond Claude's knowledge cutoff
|
||||
- Searches are performed automatically within a single API call
|
||||
|
||||
CRITICAL REQUIREMENT - You MUST follow this:
|
||||
- After answering the user's question, you MUST include a "Sources:" section at the end of your response
|
||||
- In the Sources section, list all relevant URLs from the search results as markdown hyperlinks: [Title](URL)
|
||||
- This is MANDATORY - never skip including sources in your response
|
||||
- Example format:
|
||||
|
||||
[Your answer here]
|
||||
|
||||
Sources:
|
||||
- [Source Title 1](https://example.com/1)
|
||||
- [Source Title 2](https://example.com/2)
|
||||
|
||||
Usage notes:
|
||||
- Domain filtering is supported to include or block specific websites
|
||||
- Web search is only available in the US
|
||||
|
||||
IMPORTANT - Use the correct year in search queries:
|
||||
- The current month is ${currentMonthYear}. You MUST use this year when searching for recent information, documentation, or current events.
|
||||
- Example: If the user asks for "latest React docs", search for "React documentation" with the current year, NOT last year
|
||||
`
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
// Auto-generated type stub — replace with real implementation
|
||||
export type getLocalMonthYear = any;
|
||||
@@ -0,0 +1,2 @@
|
||||
// Auto-generated type stub — replace with real implementation
|
||||
export type getAPIProvider = any;
|
||||
@@ -0,0 +1,2 @@
|
||||
// Auto-generated type stub — replace with real implementation
|
||||
export type PermissionResult = any;
|
||||
Reference in New Issue
Block a user