diff --git a/packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts b/packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts index eca237141..3745c38bc 100644 --- a/packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts +++ b/packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts @@ -106,6 +106,84 @@ describe("findActualString", () => { const result = findActualString("hello", ""); expect(result).toBe(""); }); + + // ── Tab/space normalization (Bug #2 reproduction) ── + + test("finds match when search uses spaces but file uses tabs", () => { + // File content uses Tab indentation + const fileContent = "\tif (x) {\n\t\treturn 1;\n\t}"; + // User copies from Read output which renders tabs as spaces + const searchWithSpaces = " if (x) {\n return 1;\n }"; + const result = findActualString(fileContent, searchWithSpaces); + expect(result).not.toBeNull(); + expect(result).toBe(fileContent); + }); + + test("finds match when search mixes tabs and spaces inconsistently", () => { + const fileContent = "\tconst x = 1; // comment"; + const searchMixed = " const x = 1; // comment"; + const result = findActualString(fileContent, searchMixed); + expect(result).not.toBeNull(); + }); + + test("finds match for single-line tab-to-space mismatch", () => { + const fileContent = "\t\torder_price = NormalizeDouble(ask, digits);"; + const searchSpaces = " order_price = NormalizeDouble(ask, digits);"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + }); + + // ── CJK / UTF-8 characters (Bug #1 reproduction) ── + + test("finds match with CJK characters in content", () => { + const fileContent = "input int x = 620; // 止盈点数(点) — 32个pip=320点"; + const result = findActualString(fileContent, fileContent); + expect(result).toBe(fileContent); + }); + + test("finds match with CJK characters when tab/space differs", () => { + const fileContent = "\t// 向上突破 → Sell Limit (逆方向做空)"; + const searchSpaces = " // 向上突破 → Sell Limit (逆方向做空)"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + expect(result).toBe(fileContent); + }); + + // ── Multiline with tabs + CJK (combined Bug #1 + #2) ── + + test("finds multiline match with tabs and CJK characters", () => { + const fileContent = "\tif(effective_dir == BREAKOUT_UP)\n\t\t{\n\t\t\t// 向上突破\n\t\t}"; + const searchSpaces = " if(effective_dir == BREAKOUT_UP)\n {\n // 向上突破\n }"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + expect(result).toBe(fileContent); + }); + + // ── Returned string must be a valid substring of fileContent ── + + test("returned string from tab match is a real substring of fileContent", () => { + const fileContent = "prefix\n\t\tindented code\nsuffix"; + const searchSpaces = "prefix\n indented code\nsuffix"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + expect(fileContent.includes(result!)).toBe(true); + }); + + test("returned string from partial tab match is a real substring", () => { + const fileContent = "line1\n\tif (x) {\n\t\tdoStuff();\n\t}\nline5"; + const searchSpaces = " if (x) {\n doStuff();\n }"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + expect(fileContent.includes(result!)).toBe(true); + }); + + test("tab match with mixed indentation levels", () => { + const fileContent = "class Foo {\n\t\tmethod1() {\n\t\t\treturn 42;\n\t\t}\n}"; + const searchSpaces = "class Foo {\n method1() {\n return 42;\n }\n}"; + const result = findActualString(fileContent, searchSpaces); + expect(result).not.toBeNull(); + expect(fileContent.includes(result!)).toBe(true); + }); }); // ─── preserveQuoteStyle ───────────────────────────────────────────────── diff --git a/packages/builtin-tools/src/tools/FileEditTool/utils.ts b/packages/builtin-tools/src/tools/FileEditTool/utils.ts index 6de429b34..2709ba423 100644 --- a/packages/builtin-tools/src/tools/FileEditTool/utils.ts +++ b/packages/builtin-tools/src/tools/FileEditTool/utils.ts @@ -63,9 +63,26 @@ export function stripTrailingWhitespace(str: string): string { return result } +/** + * Normalizes whitespace for fuzzy matching by converting tabs to spaces + * and collapsing leading whitespace on each line to a canonical form. + * This handles the case where Read tool output renders tabs as spaces, + * so users copy spaces from the output but the file actually has tabs. + */ +function normalizeWhitespace(str: string): string { + return str.replace(/\t/g, ' ') +} + /** * Finds the actual string in the file content that matches the search string, - * accounting for quote normalization + * accounting for quote normalization and tab/space differences. + * + * Matching cascade: + * 1. Exact match + * 2. Quote normalization (curly → straight quotes) + * 3. Tab/space normalization (tabs ↔ spaces in leading whitespace) + * 4. Quote + tab/space normalization combined + * * @param fileContent The file content to search in * @param searchString The string to search for * @returns The actual string found in the file, or null if not found @@ -89,9 +106,92 @@ export function findActualString( return fileContent.substring(searchIndex, searchIndex + searchString.length) } + // Try with tab/space normalization — handles the case where Read output + // renders tabs as spaces and the user copies the rendered version + const wsNormalizedFile = normalizeWhitespace(fileContent) + const wsNormalizedSearch = normalizeWhitespace(searchString) + + const wsSearchIndex = wsNormalizedFile.indexOf(wsNormalizedSearch) + if (wsSearchIndex !== -1) { + // Map the match position back to the original file content. + // We need to find the corresponding range in the original string. + return mapNormalizedMatchBackToFile(fileContent, wsNormalizedFile, wsSearchIndex, wsNormalizedSearch.length) + } + + // Try combined: quote normalization + tab/space normalization + const combinedFile = normalizeWhitespace(normalizedFile) + const combinedSearch = normalizeWhitespace(normalizedSearch) + + const combinedIndex = combinedFile.indexOf(combinedSearch) + if (combinedIndex !== -1) { + return mapNormalizedMatchBackToFile(fileContent, combinedFile, combinedIndex, combinedSearch.length) + } + return null } +/** + * Given a match found in a normalized version of fileContent, map the match + * position back to the original fileContent and extract the corresponding + * substring. + * + * Strategy: walk through both strings character by character, building a + * mapping from normalized offset to original offset. When a tab is expanded + * to 4 spaces in the normalized version, the normalized offset advances by 4 + * while the original offset advances by 1. + */ +function mapNormalizedMatchBackToFile( + fileContent: string, + normalizedFile: string, + normalizedStart: number, + normalizedLength: number, +): string { + // Build a sparse mapping from normalized position → original position. + // We only need to map the range [normalizedStart, normalizedStart + normalizedLength]. + let normPos = 0 + let origPos = 0 + let origStart = -1 + let origEnd = -1 + + while (origPos < fileContent.length && normPos <= normalizedStart + normalizedLength) { + if (normPos === normalizedStart) { + origStart = origPos + } + if (normPos === normalizedStart + normalizedLength) { + origEnd = origPos + break + } + + const origChar = fileContent[origPos]! + if (origChar === '\t') { + // Tab expands to 4 spaces in normalized version + const nextNormPos = normPos + 4 + // If normalizedStart falls within this expanded tab, snap to origPos + if (normPos < normalizedStart && nextNormPos > normalizedStart && origStart === -1) { + origStart = origPos + } + if (normPos < normalizedStart + normalizedLength && nextNormPos > normalizedStart + normalizedLength && origEnd === -1) { + origEnd = origPos + 1 + } + normPos = nextNormPos + origPos++ + } else { + normPos++ + origPos++ + } + } + + // Fallback: if we couldn't map precisely, use character-count heuristic + if (origStart === -1) origStart = 0 + if (origEnd === -1) { + // Approximate: use the ratio of original to normalized length + const ratio = fileContent.length / normalizedFile.length + origEnd = Math.round(origStart + normalizedLength * ratio) + } + + return fileContent.substring(origStart, origEnd) +} + /** * When old_string matched via quote normalization (curly quotes in file, * straight quotes from model), apply the same curly quote style to new_string