mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-19 06:45:50 +00:00
feat: 重构 WebSearch/WebFetch,新增 Tavily 适配器及 /web-tools 面板
- WebSearch: 默认 Tavily,适配器优先级 WEB_SEARCH_ADAPTER > settings.webSearchAdapter > tavily - WebFetch: 支持 Tavily /extract 返回 Markdown,移除 domain blacklist 远程检查 - 新增 /web-tools 命令面板(Search/Fetch 双 Tab + 二级配置菜单) - 新增 settings 字段: webSearchAdapter, webFetchAdapter, tavilyEndpointUrl, braveApiKey, exaApiKey, exaEndpointUrl, webFetchHttpTimeoutMs - 适配器联动: Tavily/Exa 从 settings 读取 endpoint 和 API key Co-Authored-By: deepseek-v4-pro <deepseek-ai@claude-code-best.win>
This commit is contained in:
@@ -17,23 +17,9 @@ import { asSystemPrompt } from 'src/utils/systemPromptType.js'
|
||||
import { isPreapprovedHost } from './preapproved.js'
|
||||
import { makeSecondaryModelPrompt } from './prompt.js'
|
||||
|
||||
// Custom error classes for domain blocking
|
||||
class DomainBlockedError extends Error {
|
||||
constructor(domain: string) {
|
||||
super(`Claude Code is unable to fetch from ${domain}`)
|
||||
this.name = 'DomainBlockedError'
|
||||
}
|
||||
}
|
||||
|
||||
class DomainCheckFailedError extends Error {
|
||||
constructor(domain: string) {
|
||||
super(
|
||||
`Unable to verify if domain ${domain} is safe to fetch. This may be due to network restrictions or enterprise security policies blocking claude.ai.`,
|
||||
)
|
||||
this.name = 'DomainCheckFailedError'
|
||||
}
|
||||
}
|
||||
const DEFAULT_TAVILY_EXTRACT_URL = 'https://tavily.claude-code-best.win/extract'
|
||||
|
||||
// Custom error class for egress proxy blocks
|
||||
class EgressBlockedError extends Error {
|
||||
constructor(public readonly domain: string) {
|
||||
super(
|
||||
@@ -68,18 +54,8 @@ const URL_CACHE = new LRUCache<string, CacheEntry>({
|
||||
ttl: CACHE_TTL_MS,
|
||||
})
|
||||
|
||||
// Separate cache for preflight domain checks. URL_CACHE is URL-keyed, so
|
||||
// fetching two paths on the same domain triggers two identical preflight
|
||||
// HTTP round-trips to api.anthropic.com. This hostname-keyed cache avoids
|
||||
// that. Only 'allowed' is cached — blocked/failed re-check on next attempt.
|
||||
const DOMAIN_CHECK_CACHE = new LRUCache<string, true>({
|
||||
max: 128,
|
||||
ttl: 5 * 60 * 1000, // 5 minutes — shorter than URL_CACHE TTL
|
||||
})
|
||||
|
||||
export function clearWebFetchCache(): void {
|
||||
URL_CACHE.clear()
|
||||
DOMAIN_CHECK_CACHE.clear()
|
||||
}
|
||||
|
||||
function responseHeaderToString(value: unknown): string | undefined {
|
||||
@@ -143,9 +119,6 @@ const MAX_HTTP_CONTENT_LENGTH = 10 * 1024 * 1024
|
||||
// Prevents hanging indefinitely on slow/unresponsive servers.
|
||||
const FETCH_TIMEOUT_MS = 60_000
|
||||
|
||||
// Timeout for the domain blocklist preflight check (10 seconds).
|
||||
const DOMAIN_CHECK_TIMEOUT_MS = 10_000
|
||||
|
||||
// Cap same-host redirect hops. Without this a malicious server can return
|
||||
// a redirect loop (/a → /b → /a …) and the per-request FETCH_TIMEOUT_MS
|
||||
// resets on every hop, hanging the tool until user interrupt. 10 matches
|
||||
@@ -196,40 +169,6 @@ export function validateURL(url: string): boolean {
|
||||
return true
|
||||
}
|
||||
|
||||
type DomainCheckResult =
|
||||
| { status: 'allowed' }
|
||||
| { status: 'blocked' }
|
||||
| { status: 'check_failed'; error: Error }
|
||||
|
||||
export async function checkDomainBlocklist(
|
||||
domain: string,
|
||||
): Promise<DomainCheckResult> {
|
||||
if (DOMAIN_CHECK_CACHE.has(domain)) {
|
||||
return { status: 'allowed' }
|
||||
}
|
||||
try {
|
||||
const response = await axios.get(
|
||||
`https://api.anthropic.com/api/web/domain_info?domain=${encodeURIComponent(domain)}`,
|
||||
{ timeout: DOMAIN_CHECK_TIMEOUT_MS },
|
||||
)
|
||||
if (response.status === 200) {
|
||||
if (response.data.can_fetch === true) {
|
||||
DOMAIN_CHECK_CACHE.set(domain, true)
|
||||
return { status: 'allowed' }
|
||||
}
|
||||
return { status: 'blocked' }
|
||||
}
|
||||
// Non-200 status but didn't throw
|
||||
return {
|
||||
status: 'check_failed',
|
||||
error: new Error(`Domain check returned status ${response.status}`),
|
||||
}
|
||||
} catch (e) {
|
||||
logError(e)
|
||||
return { status: 'check_failed', error: e as Error }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a redirect is safe to follow
|
||||
* Allows redirects that:
|
||||
@@ -412,23 +351,6 @@ export async function getURLMarkdownContent(
|
||||
|
||||
const hostname = parsedUrl.hostname
|
||||
|
||||
// Check if the user has opted to skip the blocklist check
|
||||
// This is for enterprise customers with restrictive security policies
|
||||
// that prevent outbound connections to claude.ai
|
||||
const settings = getSettings_DEPRECATED()
|
||||
if (settings.skipWebFetchPreflight === false) {
|
||||
const checkResult = await checkDomainBlocklist(hostname)
|
||||
switch (checkResult.status) {
|
||||
case 'allowed':
|
||||
// Continue with the fetch
|
||||
break
|
||||
case 'blocked':
|
||||
throw new DomainBlockedError(hostname)
|
||||
case 'check_failed':
|
||||
throw new DomainCheckFailedError(hostname)
|
||||
}
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logEvent('tengu_web_fetch_host', {
|
||||
hostname:
|
||||
@@ -436,13 +358,6 @@ export async function getURLMarkdownContent(
|
||||
})
|
||||
}
|
||||
} catch (e) {
|
||||
if (
|
||||
e instanceof DomainBlockedError ||
|
||||
e instanceof DomainCheckFailedError
|
||||
) {
|
||||
// Expected user-facing failures - re-throw without logging as internal error
|
||||
throw e
|
||||
}
|
||||
logError(e)
|
||||
}
|
||||
|
||||
@@ -513,6 +428,109 @@ export async function getURLMarkdownContent(
|
||||
return entry
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch URL content via Tavily Extract API, which directly returns Markdown.
|
||||
* This skips the HTML→Markdown conversion (turndown) and the secondary
|
||||
* model call (queryHaiku) — Tavily already delivers clean Markdown.
|
||||
*/
|
||||
export async function fetchContentWithTavily(
|
||||
url: string,
|
||||
abortController: AbortController,
|
||||
): Promise<FetchedContent | RedirectInfo> {
|
||||
if (!validateURL(url)) {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
// Check cache (LRUCache handles TTL automatically)
|
||||
const cachedEntry = URL_CACHE.get(url)
|
||||
if (cachedEntry) {
|
||||
return {
|
||||
bytes: cachedEntry.bytes,
|
||||
code: cachedEntry.code,
|
||||
codeText: cachedEntry.codeText,
|
||||
content: cachedEntry.content,
|
||||
contentType: cachedEntry.contentType,
|
||||
persistedPath: cachedEntry.persistedPath,
|
||||
persistedSize: cachedEntry.persistedSize,
|
||||
}
|
||||
}
|
||||
|
||||
let parsedUrl: URL
|
||||
try {
|
||||
parsedUrl = new URL(url)
|
||||
} catch {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
// Upgrade http to https if needed
|
||||
if (parsedUrl.protocol === 'http:') {
|
||||
parsedUrl.protocol = 'https:'
|
||||
url = parsedUrl.toString()
|
||||
}
|
||||
|
||||
const abortSignal = abortController.signal
|
||||
|
||||
const settings = getSettings_DEPRECATED() as Record<string, unknown> & {
|
||||
tavilyEndpointUrl?: string
|
||||
}
|
||||
const baseUrl = settings.tavilyEndpointUrl || DEFAULT_TAVILY_EXTRACT_URL
|
||||
// Derive extract URL from the base Tavily endpoint
|
||||
const extractUrl = baseUrl.endsWith('/search')
|
||||
? baseUrl.replace(/\/search$/, '/extract')
|
||||
: baseUrl.endsWith('/extract')
|
||||
? baseUrl
|
||||
: `${baseUrl.replace(/\/$/, '')}/extract`
|
||||
|
||||
const response = await axios.post<{ url: string; raw_content: string }>(
|
||||
extractUrl,
|
||||
{
|
||||
urls: [url],
|
||||
},
|
||||
{
|
||||
signal: abortSignal,
|
||||
timeout: FETCH_TIMEOUT_MS,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
)
|
||||
|
||||
if (abortSignal.aborted) {
|
||||
throw new AbortError()
|
||||
}
|
||||
|
||||
const rawContent = response.data?.raw_content ?? ''
|
||||
// If raw_content is a JSON string (extract may return {url:..., raw_content:...}
|
||||
// per URL), unwrap it.
|
||||
let markdownContent = rawContent
|
||||
if (!markdownContent.trim()) {
|
||||
// Try to extract from results array
|
||||
const resp = response.data as unknown as {
|
||||
results?: Array<{ raw_content?: string }>
|
||||
}
|
||||
const results = resp.results ?? []
|
||||
if (results.length > 0 && results[0].raw_content) {
|
||||
markdownContent = results[0].raw_content
|
||||
}
|
||||
}
|
||||
|
||||
if (!markdownContent.trim()) {
|
||||
throw new Error(
|
||||
`Tavily Extract returned empty content for ${url}. The page may require authentication or JavaScript rendering.`,
|
||||
)
|
||||
}
|
||||
|
||||
const contentBytes = Buffer.byteLength(markdownContent)
|
||||
|
||||
const entry: CacheEntry = {
|
||||
bytes: contentBytes,
|
||||
code: 200,
|
||||
codeText: 'OK',
|
||||
content: markdownContent,
|
||||
contentType: 'text/markdown',
|
||||
}
|
||||
URL_CACHE.set(url, entry, { size: Math.max(1, contentBytes) })
|
||||
return entry
|
||||
}
|
||||
|
||||
export async function applyPromptToMarkdown(
|
||||
prompt: string,
|
||||
markdownContent: string,
|
||||
|
||||
Reference in New Issue
Block a user