feat: 重构 WebSearch/WebFetch,新增 Tavily 适配器及 /web-tools 面板

- WebSearch: 默认 Tavily,适配器优先级 WEB_SEARCH_ADAPTER > settings.webSearchAdapter > tavily
- WebFetch: 支持 Tavily /extract 返回 Markdown,移除 domain blacklist 远程检查
- 新增 /web-tools 命令面板(Search/Fetch 双 Tab + 二级配置菜单)
- 新增 settings 字段: webSearchAdapter, webFetchAdapter, tavilyEndpointUrl, braveApiKey, exaApiKey, exaEndpointUrl, webFetchHttpTimeoutMs
- 适配器联动: Tavily/Exa 从 settings 读取 endpoint 和 API key

Co-Authored-By: deepseek-v4-pro <deepseek-ai@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-06-15 15:54:02 +08:00
parent 2714bbf812
commit 9d845d77b9
10 changed files with 1005 additions and 160 deletions

View File

@@ -17,23 +17,9 @@ import { asSystemPrompt } from 'src/utils/systemPromptType.js'
import { isPreapprovedHost } from './preapproved.js'
import { makeSecondaryModelPrompt } from './prompt.js'
// Custom error classes for domain blocking
class DomainBlockedError extends Error {
constructor(domain: string) {
super(`Claude Code is unable to fetch from ${domain}`)
this.name = 'DomainBlockedError'
}
}
class DomainCheckFailedError extends Error {
constructor(domain: string) {
super(
`Unable to verify if domain ${domain} is safe to fetch. This may be due to network restrictions or enterprise security policies blocking claude.ai.`,
)
this.name = 'DomainCheckFailedError'
}
}
const DEFAULT_TAVILY_EXTRACT_URL = 'https://tavily.claude-code-best.win/extract'
// Custom error class for egress proxy blocks
class EgressBlockedError extends Error {
constructor(public readonly domain: string) {
super(
@@ -68,18 +54,8 @@ const URL_CACHE = new LRUCache<string, CacheEntry>({
ttl: CACHE_TTL_MS,
})
// Separate cache for preflight domain checks. URL_CACHE is URL-keyed, so
// fetching two paths on the same domain triggers two identical preflight
// HTTP round-trips to api.anthropic.com. This hostname-keyed cache avoids
// that. Only 'allowed' is cached — blocked/failed re-check on next attempt.
const DOMAIN_CHECK_CACHE = new LRUCache<string, true>({
max: 128,
ttl: 5 * 60 * 1000, // 5 minutes — shorter than URL_CACHE TTL
})
export function clearWebFetchCache(): void {
URL_CACHE.clear()
DOMAIN_CHECK_CACHE.clear()
}
function responseHeaderToString(value: unknown): string | undefined {
@@ -143,9 +119,6 @@ const MAX_HTTP_CONTENT_LENGTH = 10 * 1024 * 1024
// Prevents hanging indefinitely on slow/unresponsive servers.
const FETCH_TIMEOUT_MS = 60_000
// Timeout for the domain blocklist preflight check (10 seconds).
const DOMAIN_CHECK_TIMEOUT_MS = 10_000
// Cap same-host redirect hops. Without this a malicious server can return
// a redirect loop (/a → /b → /a …) and the per-request FETCH_TIMEOUT_MS
// resets on every hop, hanging the tool until user interrupt. 10 matches
@@ -196,40 +169,6 @@ export function validateURL(url: string): boolean {
return true
}
type DomainCheckResult =
| { status: 'allowed' }
| { status: 'blocked' }
| { status: 'check_failed'; error: Error }
export async function checkDomainBlocklist(
domain: string,
): Promise<DomainCheckResult> {
if (DOMAIN_CHECK_CACHE.has(domain)) {
return { status: 'allowed' }
}
try {
const response = await axios.get(
`https://api.anthropic.com/api/web/domain_info?domain=${encodeURIComponent(domain)}`,
{ timeout: DOMAIN_CHECK_TIMEOUT_MS },
)
if (response.status === 200) {
if (response.data.can_fetch === true) {
DOMAIN_CHECK_CACHE.set(domain, true)
return { status: 'allowed' }
}
return { status: 'blocked' }
}
// Non-200 status but didn't throw
return {
status: 'check_failed',
error: new Error(`Domain check returned status ${response.status}`),
}
} catch (e) {
logError(e)
return { status: 'check_failed', error: e as Error }
}
}
/**
* Check if a redirect is safe to follow
* Allows redirects that:
@@ -412,23 +351,6 @@ export async function getURLMarkdownContent(
const hostname = parsedUrl.hostname
// Check if the user has opted to skip the blocklist check
// This is for enterprise customers with restrictive security policies
// that prevent outbound connections to claude.ai
const settings = getSettings_DEPRECATED()
if (settings.skipWebFetchPreflight === false) {
const checkResult = await checkDomainBlocklist(hostname)
switch (checkResult.status) {
case 'allowed':
// Continue with the fetch
break
case 'blocked':
throw new DomainBlockedError(hostname)
case 'check_failed':
throw new DomainCheckFailedError(hostname)
}
}
if (process.env.USER_TYPE === 'ant') {
logEvent('tengu_web_fetch_host', {
hostname:
@@ -436,13 +358,6 @@ export async function getURLMarkdownContent(
})
}
} catch (e) {
if (
e instanceof DomainBlockedError ||
e instanceof DomainCheckFailedError
) {
// Expected user-facing failures - re-throw without logging as internal error
throw e
}
logError(e)
}
@@ -513,6 +428,109 @@ export async function getURLMarkdownContent(
return entry
}
/**
* Fetch URL content via Tavily Extract API, which directly returns Markdown.
* This skips the HTML→Markdown conversion (turndown) and the secondary
* model call (queryHaiku) — Tavily already delivers clean Markdown.
*/
export async function fetchContentWithTavily(
url: string,
abortController: AbortController,
): Promise<FetchedContent | RedirectInfo> {
if (!validateURL(url)) {
throw new Error('Invalid URL')
}
// Check cache (LRUCache handles TTL automatically)
const cachedEntry = URL_CACHE.get(url)
if (cachedEntry) {
return {
bytes: cachedEntry.bytes,
code: cachedEntry.code,
codeText: cachedEntry.codeText,
content: cachedEntry.content,
contentType: cachedEntry.contentType,
persistedPath: cachedEntry.persistedPath,
persistedSize: cachedEntry.persistedSize,
}
}
let parsedUrl: URL
try {
parsedUrl = new URL(url)
} catch {
throw new Error('Invalid URL')
}
// Upgrade http to https if needed
if (parsedUrl.protocol === 'http:') {
parsedUrl.protocol = 'https:'
url = parsedUrl.toString()
}
const abortSignal = abortController.signal
const settings = getSettings_DEPRECATED() as Record<string, unknown> & {
tavilyEndpointUrl?: string
}
const baseUrl = settings.tavilyEndpointUrl || DEFAULT_TAVILY_EXTRACT_URL
// Derive extract URL from the base Tavily endpoint
const extractUrl = baseUrl.endsWith('/search')
? baseUrl.replace(/\/search$/, '/extract')
: baseUrl.endsWith('/extract')
? baseUrl
: `${baseUrl.replace(/\/$/, '')}/extract`
const response = await axios.post<{ url: string; raw_content: string }>(
extractUrl,
{
urls: [url],
},
{
signal: abortSignal,
timeout: FETCH_TIMEOUT_MS,
headers: { 'Content-Type': 'application/json' },
},
)
if (abortSignal.aborted) {
throw new AbortError()
}
const rawContent = response.data?.raw_content ?? ''
// If raw_content is a JSON string (extract may return {url:..., raw_content:...}
// per URL), unwrap it.
let markdownContent = rawContent
if (!markdownContent.trim()) {
// Try to extract from results array
const resp = response.data as unknown as {
results?: Array<{ raw_content?: string }>
}
const results = resp.results ?? []
if (results.length > 0 && results[0].raw_content) {
markdownContent = results[0].raw_content
}
}
if (!markdownContent.trim()) {
throw new Error(
`Tavily Extract returned empty content for ${url}. The page may require authentication or JavaScript rendering.`,
)
}
const contentBytes = Buffer.byteLength(markdownContent)
const entry: CacheEntry = {
bytes: contentBytes,
code: 200,
codeText: 'OK',
content: markdownContent,
contentType: 'text/markdown',
}
URL_CACHE.set(url, entry, { size: Math.max(1, contentBytes) })
return entry
}
export async function applyPromptToMarkdown(
prompt: string,
markdownContent: string,