feat: 添加 builtin-tools 增强与测试覆盖

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-22 16:25:51 +00:00 · 2026-04-22 22:38:10 +08:00
parent 7881cc617c
commit d208855f07
12 changed files with 1449 additions and 985 deletions
--- a/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts
+++ b/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts
@@ -9,19 +9,11 @@ const inputSchema = lazySchema(() =>
  z.strictObject({
    url: z
      .string()
-      .describe('URL to navigate to in the browser.'),
+      .describe('URL to fetch and extract content from.'),
    action: z
-      .enum(['navigate', 'screenshot', 'click', 'type', 'scroll'])
+      .enum(['navigate', 'screenshot'])
      .optional()
-      .describe('Browser action to perform. Defaults to "navigate".'),
-    selector: z
-      .string()
-      .optional()
-      .describe('CSS selector for click/type actions.'),
-    text: z
-      .string()
-      .optional()
-      .describe('Text to type when action is "type".'),
+      .describe('Action to perform. "navigate" fetches page content (default). "screenshot" returns a text snapshot of the page.'),
  }),
 )
 type InputSchema = ReturnType<typeof inputSchema>
@@ -45,16 +37,24 @@ export const WebBrowserTool = buildTool({
  },

  async description() {
-    return 'Browse the web using an embedded browser'
+    return 'Fetch and read web page content via HTTP'
  },
  async prompt() {
-    return `Open and interact with web pages in an embedded browser. Supports navigation, screenshots, clicking, typing, and scrolling.
+    return `Fetch web pages via HTTP and extract their text content. This is a lightweight browser tool (HTTP fetch, not a full browser engine).
+
+Supported actions:
+- navigate: Fetch a URL and extract page title + text content
+- screenshot: Same as navigate (returns text snapshot, not a visual screenshot)
+
+Limitations:
+- No JavaScript execution — only sees server-rendered HTML
+- click/type/scroll require a full browser runtime (not available)
+- For full browser interaction, use the Claude-in-Chrome MCP tools instead

 Use this for:
- Viewing web pages and their content
- Taking screenshots of UI
- Interacting with web applications
- Testing web endpoints with full browser rendering`
+- Reading web page content and documentation
+- Checking API endpoints that return HTML
+- Quick page title/content extraction`
  },

  isConcurrencySafe() {
@@ -85,12 +85,84 @@ Use this for:
  },

  async call(input: BrowserInput) {
-    // Browser integration requires the WEB_BROWSER_TOOL runtime (Bun WebView).
+    const action = input.action ?? 'navigate'
+
+    if (action === 'navigate' || action === 'screenshot') {
+      // Fetch the page content via HTTP
+      try {
+        const response = await fetch(input.url, {
+          headers: {
+            'User-Agent':
+              'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            Accept:
+              'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+          },
+          redirect: 'follow',
+        })
+
+        if (!response.ok) {
+          return {
+            data: {
+              title: `HTTP ${response.status}`,
+              url: input.url,
+              content: `Error: ${response.status} ${response.statusText}`,
+            },
+          }
+        }
+
+        const html = await response.text()
+
+        // Extract title
+        const titleMatch = html.match(/<title[^>]*>([^<]*)<\/title>/i)
+        const title = titleMatch?.[1]?.trim() ?? ''
+
+        // Extract text content (strip HTML tags, scripts, styles)
+        let textContent = html
+          .replace(/<script[\s\S]*?<\/script>/gi, '')
+          .replace(/<style[\s\S]*?<\/style>/gi, '')
+          .replace(/<[^>]+>/g, ' ')
+          .replace(/\s+/g, ' ')
+          .trim()
+
+        // Truncate to reasonable size
+        if (textContent.length > 50_000) {
+          textContent = textContent.slice(0, 50_000) + '\n[truncated]'
+        }
+
+        if (action === 'screenshot') {
+          return {
+            data: {
+              title,
+              url: response.url,
+              content: `[Text snapshot — visual screenshots require Chrome browser tools]\n\n${textContent}`,
+            },
+          }
+        }
+
+        return {
+          data: {
+            title,
+            url: response.url,
+            content: textContent,
+          },
+        }
+      } catch (err) {
+        return {
+          data: {
+            title: 'Error',
+            url: input.url,
+            content: `Failed to fetch: ${err instanceof Error ? err.message : String(err)}`,
+          },
+        }
+      }
+    }
+
+    // Unreachable — schema only allows navigate/screenshot
    return {
      data: {
        title: '',
        url: input.url,
-        content: 'Web browser requires the WEB_BROWSER_TOOL runtime.',
+        content: `Unknown action "${action}".`,
      },
    }
  },