feat: 添加 GBK 编码自动检测支持,文件读写工具透明处理非 UTF-8 文件

新增 encoding.ts 核心模块实现三层编码检测(BOM → UTF-8 fatal → GBK 回退),
改造同步/异步读取路径和写入路径,使 FileReadTool/FileEditTool/FileWriteTool
能正确处理 GBK 编码文件。包含完整单元测试和 spec 文档。

Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-05-10 20:50:12 +08:00
parent 6e1d3d8f47
commit 0ce8f7a1cb
22 changed files with 1728 additions and 121 deletions

View File

@@ -22,6 +22,7 @@ import {
detectLineEndingsForString,
type LineEndingType,
} from './fileRead.js'
import { type FileEncoding, decodeBuffer, encodeString } from './encoding.js'
import { fileReadCache } from './fileReadCache.js'
import { getFsImplementation, safeResolvePath } from './fsOperations.js'
import { logError } from './log.js'
@@ -84,7 +85,7 @@ export async function getFileModificationTimeAsync(
export function writeTextContent(
filePath: string,
content: string,
encoding: BufferEncoding,
encoding: FileEncoding,
endings: LineEndingType,
): void {
let toWrite = content
@@ -94,10 +95,38 @@ export function writeTextContent(
toWrite = content.replaceAll('\r\n', '\n').split('\n').join('\r\n')
}
writeFileSyncAndFlush_DEPRECATED(filePath, toWrite, { encoding })
// Check if encoding is directly supported by Node.js fs
const BUFFER_ENCODINGS = new Set<string>([
'utf8',
'utf-8',
'utf16le',
'ucs2',
'ucs-2',
'ascii',
'latin1',
'binary',
'base64',
'hex',
])
if (BUFFER_ENCODINGS.has(encoding)) {
writeFileSyncAndFlush_DEPRECATED(filePath, toWrite, {
encoding: encoding as BufferEncoding,
})
} else {
// Non-BufferEncoding (e.g. gbk): use encodeString to get Buffer
const { buffer, converted } = encodeString(toWrite, encoding)
writeFileSyncAndFlush_DEPRECATED(filePath, buffer, { buffer })
if (converted) {
logForDebugging(
`writeTextContent: encoding '${encoding}' unsupported for write, fell back to UTF-8 for ${filePath}`,
{ level: 'warn' },
)
}
}
}
export function detectFileEncoding(filePath: string): BufferEncoding {
export function detectFileEncoding(filePath: string): FileEncoding {
try {
const fs = getFsImplementation()
const { resolvedPath } = safeResolvePath(fs, filePath)
@@ -119,14 +148,14 @@ export function detectFileEncoding(filePath: string): BufferEncoding {
export function detectLineEndings(
filePath: string,
encoding: BufferEncoding = 'utf8',
encoding: FileEncoding = 'utf8',
): LineEndingType {
try {
const fs = getFsImplementation()
const { resolvedPath } = safeResolvePath(fs, filePath)
const { buffer, bytesRead } = fs.readSync(resolvedPath, { length: 4096 })
const content = buffer.toString(encoding, 0, bytesRead)
const content = decodeBuffer(buffer.subarray(0, bytesRead), encoding)
return detectLineEndingsForString(content)
} catch (error) {
logError(error)
@@ -361,8 +390,10 @@ export function readFileSyncCached(filePath: string): string {
*/
export function writeFileSyncAndFlush_DEPRECATED(
filePath: string,
content: string,
options: { encoding: BufferEncoding; mode?: number } = { encoding: 'utf-8' },
content: string | Buffer,
options: { encoding?: BufferEncoding; mode?: number; buffer?: Buffer } = {
encoding: 'utf-8',
},
): void {
const fs = getFsImplementation()
@@ -403,26 +434,30 @@ export function writeFileSyncAndFlush_DEPRECATED(
}
}
// Determine write mode before try/catch so both paths can use it
const isBufferWrite = Buffer.isBuffer(content) || options.buffer !== undefined
const writeData = options.buffer ?? content
try {
logForDebugging(`Writing to temp file: ${tempPath}`)
// Write to temp file with flush and mode (if specified for new file)
const writeOptions: {
encoding: BufferEncoding
encoding?: BufferEncoding
flush: boolean
mode?: number
} = {
encoding: options.encoding,
flush: true,
...(isBufferWrite ? {} : { encoding: options.encoding ?? 'utf-8' }),
}
// Only set mode in writeFileSync for new files to ensure atomic permission setting
if (!targetExists && options.mode !== undefined) {
writeOptions.mode = options.mode
}
fsWriteFileSync(tempPath, content, writeOptions)
fsWriteFileSync(tempPath, writeData, writeOptions)
logForDebugging(
`Temp file written successfully, size: ${content.length} bytes`,
`Temp file written successfully, size: ${typeof writeData === 'string' ? writeData.length : writeData.byteLength} bytes`,
)
// For existing files or if mode was not set atomically, apply permissions
@@ -454,19 +489,19 @@ export function writeFileSyncAndFlush_DEPRECATED(
logForDebugging(`Falling back to non-atomic write for ${targetPath}`)
try {
const fallbackOptions: {
encoding: BufferEncoding
encoding?: BufferEncoding
flush: boolean
mode?: number
} = {
encoding: options.encoding,
flush: true,
...(isBufferWrite ? {} : { encoding: options.encoding ?? 'utf-8' }),
}
// Only set mode for new files
if (!targetExists && options.mode !== undefined) {
fallbackOptions.mode = options.mode
}
fsWriteFileSync(targetPath, content, fallbackOptions)
fsWriteFileSync(targetPath, writeData, fallbackOptions)
logForDebugging(
`File ${targetPath} written successfully with non-atomic fallback`,
)