Files
claude-code/packages/builtin-tools/src/tools/WebSearchTool/adapters/bingAdapter.ts
claude-code-best 2fb1c9dcd8 feat: 工具层及 mcp 大重构 (#252)
* feat: 第一版大重构

* fix: 修复类型问题

* chore: 更新版本到 1.3.2

* Add brave as alternative WebSearchTool

* fix: 修正顺序

* fix: 修复对穷鬼模式的 auto dream 和 session memory 越过

* feat: 穷鬼模式去除 session-summary

* feat: 创建 builtin-tools 包,搬运所有工具实现

将 src/tools/ 下的全部 60 个工具目录迁移至 packages/builtin-tools/src/tools/,
内部导入路径已更新为 src/ alias 模式。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* refactor: 更新 src/ 中所有工具引用至 builtin-tools 包,删除 src/tools/

- src/tools.ts 及 178 个 src/ 文件的 import 路径从 ./tools/ 改为 builtin-tools/tools/
- 删除 src/tools/ 整个目录(已迁移至 packages/builtin-tools/)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: 添加 builtin-tools 路径别名至 tsconfig,更新 bun.lock

- tsconfig.json 新增 builtin-tools/* 和 builtin-tools 路径映射
- 新增 packages/builtin-tools/src 至 include

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* refactor: 为 builtin-tools、mcp-client、agent-tools 添加 @claude-code-best 作用域前缀

所有包名及 import 路径统一添加 @claude-code-best/ 前缀:
- builtin-tools → @claude-code-best/builtin-tools
- mcp-client → @claude-code-best/mcp-client
- agent-tools → @claude-code-best/agent-tools

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 修复 node 环境没有 bun 的问题

---------

Co-authored-by: Eric-Guo <eric.guocz@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-13 09:52:05 +08:00

205 lines
6.3 KiB
TypeScript

/**
* Bing-based search adapter — fetches Bing search pages and extracts
* search results using regex pattern matching on raw HTML.
*/
import axios from 'axios'
import he from 'he'
import { AbortError } from 'src/utils/errors.js'
import type { SearchResult, SearchOptions, WebSearchAdapter } from './types.js'
const FETCH_TIMEOUT_MS = 30_000
/**
* Browser-like headers to avoid Bing's anti-bot JS-rendered response.
* These mimic Microsoft Edge on macOS to get full HTML search results.
*/
const BROWSER_HEADERS = {
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
Pragma: 'no-cache',
'Sec-Ch-Ua': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"macOS"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
} as const
export class BingSearchAdapter implements WebSearchAdapter {
async search(
query: string,
options: SearchOptions,
): Promise<SearchResult[]> {
const { signal, onProgress, allowedDomains, blockedDomains } = options
if (signal?.aborted) {
throw new AbortError()
}
onProgress?.({ type: 'query_update', query })
const url = `https://www.bing.com/search?q=${encodeURIComponent(query)}&setmkt=en-US`
const abortController = new AbortController()
if (signal) {
signal.addEventListener('abort', () => abortController.abort(), { once: true })
}
let html: string
try {
const response = await axios.get(url, {
signal: abortController.signal,
timeout: FETCH_TIMEOUT_MS,
responseType: 'text',
headers: BROWSER_HEADERS,
})
html = response.data
} catch (e) {
if (axios.isCancel(e) || abortController.signal.aborted) {
throw new AbortError()
}
throw e
}
if (abortController.signal.aborted) {
throw new AbortError()
}
const rawResults = extractBingResults(html)
// Client-side domain filtering
const results = rawResults.filter((r) => {
if (!r.url) return false
try {
const hostname = new URL(r.url).hostname
if (allowedDomains?.length && !allowedDomains.some(d => hostname === d || hostname.endsWith('.' + d))) {
return false
}
if (blockedDomains?.length && blockedDomains.some(d => hostname === d || hostname.endsWith('.' + d))) {
return false
}
} catch {
return false
}
return true
})
onProgress?.({
type: 'search_results_received',
resultCount: results.length,
query,
})
return results
}
}
/**
* Extract organic search results from Bing HTML.
* Bing results live in <li class="b_algo"> blocks within <ol id="b_results">.
*/
export function extractBingResults(html: string): SearchResult[] {
const results: SearchResult[] = []
const algoBlockRegex = /<li\s+class="b_algo"[^>]*>([\s\S]*?)<\/li>/gi
let blockMatch: RegExpExecArray | null
while ((blockMatch = algoBlockRegex.exec(html)) !== null) {
const block = blockMatch[1]
// Extract the primary link from <h2><a href="...">...</a></h2>
const h2LinkRegex = /<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i
const linkMatch = h2LinkRegex.exec(block)
if (!linkMatch) continue
const rawUrl = decodeHtmlEntities(linkMatch[1])
const titleHtml = linkMatch[2]
// Resolve Bing redirect URLs (bing.com/ck/a?...&u=a1aHR0cHM6Ly9...)
// or skip Bing-internal / relative links
const url = resolveBingUrl(rawUrl)
if (!url) continue
const title = decodeHtmlEntities(
titleHtml.replace(/<[^>]+>/g, '').trim(),
)
// Extract snippet: try b_lineclamp → b_caption <p> → b_caption fallback
const snippet = extractSnippet(block)
results.push({ title, url, snippet })
}
return results
}
function extractSnippet(block: string): string | undefined {
// 1. Try <p class="b_lineclamp...">
const lineclampRegex = /<p[^>]*class="b_lineclamp[^"]*"[^>]*>([\s\S]*?)<\/p>/i
let match = lineclampRegex.exec(block)
if (match) {
return decodeHtmlEntities(match[1].replace(/<[^>]+>/g, '').trim())
}
// 2. Try <p> inside b_caption
const captionPRegex = /<div[^>]*class="b_caption[^"]*"[^>]*>[\s\S]*?<p[^>]*>([\s\S]*?)<\/p>/i
match = captionPRegex.exec(block)
if (match) {
return decodeHtmlEntities(match[1].replace(/<[^>]+>/g, '').trim())
}
// 3. Fallback: any text inside b_caption <div>
const fallbackRegex = /<div[^>]*class="b_caption[^"]*"[^>]*>([\s\S]*?)<\/div>/i
const fallbackMatch = fallbackRegex.exec(block)
if (fallbackMatch) {
const text = fallbackMatch[1].replace(/<[^>]+>/g, '').trim()
if (text) return decodeHtmlEntities(text)
}
return undefined
}
export const decodeHtmlEntities = he.decode
/**
* Resolve a Bing redirect URL to the actual target URL.
* Bing uses URLs like: https://www.bing.com/ck/a?...&u=a1aHR0cHM6Ly9leGFtcGxlLmNvbQ...
* The `u` query parameter is a base64-encoded URL prefixed with a1 (https) or a0 (http).
* Returns `undefined` for Bing-internal or relative links that should be skipped.
*/
export function resolveBingUrl(rawUrl: string): string | undefined {
// Skip relative / anchor links
if (rawUrl.startsWith('/') || rawUrl.startsWith('#')) return undefined
// Try to extract the `u` parameter from Bing redirect URLs
const uMatch = rawUrl.match(/[?&]u=([a-zA-Z0-9+/_=-]+)/)
if (uMatch) {
const encoded = uMatch[1]
if (encoded.length >= 3) {
const prefix = encoded.slice(0, 2)
const b64 = encoded.slice(2)
try {
// Base64url decode (pad as needed)
const padded = b64.replace(/-/g, '+').replace(/_/g, '/')
const decoded = Buffer.from(padded, 'base64').toString('utf-8')
if (decoded.startsWith('http')) return decoded
} catch {
// Fall through — not a valid base64 redirect
}
}
}
// Direct external URL (not a Bing-internal page)
if (!rawUrl.includes('bing.com')) return rawUrl
return undefined
}