fix: Edit 工具增加 Tab/空格规范化匹配，修复中文和缩进文件编辑失败

Read 工具输出将 Tab 渲染为空格，用户复制后 Edit 工具无法匹配。在 findActualString 中增加 Tab→空格规范化回退匹配，并精确映射回原始文件位置。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-23 00:35:51 +00:00 · 2026-04-27 16:52:11 +08:00
parent c6338917e5
commit 4cbef9667d
2 changed files with 179 additions and 1 deletions
--- a/packages/builtin-tools/src/tools/FileEditTool/tests/utils.test.ts
+++ b/packages/builtin-tools/src/tools/FileEditTool/tests/utils.test.ts
@@ -106,6 +106,84 @@ describe("findActualString", () => {
    const result = findActualString("hello", "");
    expect(result).toBe("");
  });
  // ── Tab/space normalization (Bug #2 reproduction) ──
  test("finds match when search uses spaces but file uses tabs", () => {
    // File content uses Tab indentation
    const fileContent = "\tif (x) {\n\t\treturn 1;\n\t}";
    // User copies from Read output which renders tabs as spaces
    const searchWithSpaces = "    if (x) {\n        return 1;\n    }";
    const result = findActualString(fileContent, searchWithSpaces);
    expect(result).not.toBeNull();
    expect(result).toBe(fileContent);
  });
  test("finds match when search mixes tabs and spaces inconsistently", () => {
    const fileContent = "\tconst x = 1; // comment";
    const searchMixed = "    const x = 1; // comment";
    const result = findActualString(fileContent, searchMixed);
    expect(result).not.toBeNull();
  });
  test("finds match for single-line tab-to-space mismatch", () => {
    const fileContent = "\t\torder_price = NormalizeDouble(ask, digits);";
    const searchSpaces = "        order_price = NormalizeDouble(ask, digits);";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
  });
  // ── CJK / UTF-8 characters (Bug #1 reproduction) ──
  test("finds match with CJK characters in content", () => {
    const fileContent = "input int x = 620; // 止盈点数(点) — 32个pip=320点";
    const result = findActualString(fileContent, fileContent);
    expect(result).toBe(fileContent);
  });
  test("finds match with CJK characters when tab/space differs", () => {
    const fileContent = "\t// 向上突破 → Sell Limit (逆方向做空)";
    const searchSpaces = "    // 向上突破 → Sell Limit (逆方向做空)";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
    expect(result).toBe(fileContent);
  });
  // ── Multiline with tabs + CJK (combined Bug #1 + #2) ──
  test("finds multiline match with tabs and CJK characters", () => {
    const fileContent = "\tif(effective_dir == BREAKOUT_UP)\n\t\t{\n\t\t\t// 向上突破\n\t\t}";
    const searchSpaces = "    if(effective_dir == BREAKOUT_UP)\n        {\n            // 向上突破\n        }";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
    expect(result).toBe(fileContent);
  });
  // ── Returned string must be a valid substring of fileContent ──
  test("returned string from tab match is a real substring of fileContent", () => {
    const fileContent = "prefix\n\t\tindented code\nsuffix";
    const searchSpaces = "prefix\n        indented code\nsuffix";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
    expect(fileContent.includes(result!)).toBe(true);
  });
  test("returned string from partial tab match is a real substring", () => {
    const fileContent = "line1\n\tif (x) {\n\t\tdoStuff();\n\t}\nline5";
    const searchSpaces = "    if (x) {\n        doStuff();\n    }";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
    expect(fileContent.includes(result!)).toBe(true);
  });
  test("tab match with mixed indentation levels", () => {
    const fileContent = "class Foo {\n\t\tmethod1() {\n\t\t\treturn 42;\n\t\t}\n}";
    const searchSpaces = "class Foo {\n        method1() {\n            return 42;\n        }\n}";
    const result = findActualString(fileContent, searchSpaces);
    expect(result).not.toBeNull();
    expect(fileContent.includes(result!)).toBe(true);
  });
 });
 // ─── preserveQuoteStyle ─────────────────────────────────────────────────
--- a/packages/builtin-tools/src/tools/FileEditTool/utils.ts
+++ b/packages/builtin-tools/src/tools/FileEditTool/utils.ts
@@ -63,9 +63,26 @@ export function stripTrailingWhitespace(str: string): string {
  return result
 }
 /**
 * Normalizes whitespace for fuzzy matching by converting tabs to spaces
 * and collapsing leading whitespace on each line to a canonical form.
 * This handles the case where Read tool output renders tabs as spaces,
 * so users copy spaces from the output but the file actually has tabs.
 */
 function normalizeWhitespace(str: string): string {
  return str.replace(/\t/g, '    ')
 }
 /**
 * Finds the actual string in the file content that matches the search string,
- * accounting for quote normalization
+ * accounting for quote normalization and tab/space differences.
 *
 * Matching cascade:
 * 1. Exact match
 * 2. Quote normalization (curly → straight quotes)
 * 3. Tab/space normalization (tabs ↔ spaces in leading whitespace)
 * 4. Quote + tab/space normalization combined
 *
 * @param fileContent The file content to search in
 * @param searchString The string to search for
 * @returns The actual string found in the file, or null if not found
@@ -89,9 +106,92 @@ export function findActualString(
    return fileContent.substring(searchIndex, searchIndex + searchString.length)
  }
  // Try with tab/space normalization — handles the case where Read output
  // renders tabs as spaces and the user copies the rendered version
  const wsNormalizedFile = normalizeWhitespace(fileContent)
  const wsNormalizedSearch = normalizeWhitespace(searchString)
  const wsSearchIndex = wsNormalizedFile.indexOf(wsNormalizedSearch)
  if (wsSearchIndex !== -1) {
    // Map the match position back to the original file content.
    // We need to find the corresponding range in the original string.
    return mapNormalizedMatchBackToFile(fileContent, wsNormalizedFile, wsSearchIndex, wsNormalizedSearch.length)
  }
  // Try combined: quote normalization + tab/space normalization
  const combinedFile = normalizeWhitespace(normalizedFile)
  const combinedSearch = normalizeWhitespace(normalizedSearch)
  const combinedIndex = combinedFile.indexOf(combinedSearch)
  if (combinedIndex !== -1) {
    return mapNormalizedMatchBackToFile(fileContent, combinedFile, combinedIndex, combinedSearch.length)
  }
  return null
 }
 /**
 * Given a match found in a normalized version of fileContent, map the match
 * position back to the original fileContent and extract the corresponding
 * substring.
 *
 * Strategy: walk through both strings character by character, building a
 * mapping from normalized offset to original offset. When a tab is expanded
 * to 4 spaces in the normalized version, the normalized offset advances by 4
 * while the original offset advances by 1.
 */
 function mapNormalizedMatchBackToFile(
  fileContent: string,
  normalizedFile: string,
  normalizedStart: number,
  normalizedLength: number,
 ): string {
  // Build a sparse mapping from normalized position → original position.
  // We only need to map the range [normalizedStart, normalizedStart + normalizedLength].
  let normPos = 0
  let origPos = 0
  let origStart = -1
  let origEnd = -1
  while (origPos < fileContent.length && normPos <= normalizedStart + normalizedLength) {
    if (normPos === normalizedStart) {
      origStart = origPos
    }
    if (normPos === normalizedStart + normalizedLength) {
      origEnd = origPos
      break
    }
    const origChar = fileContent[origPos]!
    if (origChar === '\t') {
      // Tab expands to 4 spaces in normalized version
      const nextNormPos = normPos + 4
      // If normalizedStart falls within this expanded tab, snap to origPos
      if (normPos < normalizedStart && nextNormPos > normalizedStart && origStart === -1) {
        origStart = origPos
      }
      if (normPos < normalizedStart + normalizedLength && nextNormPos > normalizedStart + normalizedLength && origEnd === -1) {
        origEnd = origPos + 1
      }
      normPos = nextNormPos
      origPos++
    } else {
      normPos++
      origPos++
    }
  }
  // Fallback: if we couldn't map precisely, use character-count heuristic
  if (origStart === -1) origStart = 0
  if (origEnd === -1) {
    // Approximate: use the ratio of original to normalized length
    const ratio = fileContent.length / normalizedFile.length
    origEnd = Math.round(origStart + normalizedLength * ratio)
  }
  return fileContent.substring(origStart, origEnd)
 }
 /**
 * When old_string matched via quote normalization (curly quotes in file,
 * straight quotes from model), apply the same curly quote style to new_string