mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-18 14:25:51 +00:00
feat: 添加 GBK 编码自动检测支持,文件读写工具透明处理非 UTF-8 文件
新增 encoding.ts 核心模块实现三层编码检测(BOM → UTF-8 fatal → GBK 回退), 改造同步/异步读取路径和写入路径,使 FileReadTool/FileEditTool/FileWriteTool 能正确处理 GBK 编码文件。包含完整单元测试和 spec 文档。 Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
This commit is contained in:
@@ -13,39 +13,24 @@
|
||||
*/
|
||||
|
||||
import { logForDebugging } from './debug.js'
|
||||
import { type FileEncoding, decodeBuffer, detectEncoding } from './encoding.js'
|
||||
import { getFsImplementation, safeResolvePath } from './fsOperations.js'
|
||||
|
||||
export type LineEndingType = 'CRLF' | 'LF'
|
||||
|
||||
export function detectEncodingForResolvedPath(
|
||||
resolvedPath: string,
|
||||
): BufferEncoding {
|
||||
): FileEncoding {
|
||||
const { buffer, bytesRead } = getFsImplementation().readSync(resolvedPath, {
|
||||
length: 4096,
|
||||
})
|
||||
|
||||
// Empty files should default to utf8, not ascii
|
||||
// This fixes a bug where writing emojis/CJK to empty files caused corruption
|
||||
// Empty files default to utf8 — nothing to detect
|
||||
if (bytesRead === 0) {
|
||||
return 'utf8'
|
||||
}
|
||||
|
||||
if (bytesRead >= 2) {
|
||||
if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le'
|
||||
}
|
||||
|
||||
if (
|
||||
bytesRead >= 3 &&
|
||||
buffer[0] === 0xef &&
|
||||
buffer[1] === 0xbb &&
|
||||
buffer[2] === 0xbf
|
||||
) {
|
||||
return 'utf8'
|
||||
}
|
||||
|
||||
// For non-empty files, default to utf8 since it's a superset of ascii
|
||||
// and handles all Unicode characters properly
|
||||
return 'utf8'
|
||||
return detectEncoding(buffer.subarray(0, bytesRead))
|
||||
}
|
||||
|
||||
export function detectLineEndingsForString(content: string): LineEndingType {
|
||||
@@ -74,7 +59,7 @@ export function detectLineEndingsForString(content: string): LineEndingType {
|
||||
*/
|
||||
export function readFileSyncWithMetadata(filePath: string): {
|
||||
content: string
|
||||
encoding: BufferEncoding
|
||||
encoding: FileEncoding
|
||||
lineEndings: LineEndingType
|
||||
} {
|
||||
const fs = getFsImplementation()
|
||||
@@ -85,10 +70,10 @@ export function readFileSyncWithMetadata(filePath: string): {
|
||||
}
|
||||
|
||||
const encoding = detectEncodingForResolvedPath(resolvedPath)
|
||||
const raw = fs.readFileSync(resolvedPath, { encoding })
|
||||
// Detect line endings from the raw head before CRLF normalization erases
|
||||
// the distinction. 4096 code units is ≥ detectLineEndings's 4096-byte
|
||||
// readSync sample (line endings are ASCII, so the unit mismatch is moot).
|
||||
// Read raw Buffer first — readFileSync encoding option only accepts
|
||||
// BufferEncoding, not gbk etc.
|
||||
const rawBuffer = fs.readFileBytesSync(resolvedPath)
|
||||
const raw = decodeBuffer(rawBuffer, encoding)
|
||||
const lineEndings = detectLineEndingsForString(raw.slice(0, 4096))
|
||||
return {
|
||||
content: raw.replaceAll('\r\n', '\n'),
|
||||
|
||||
Reference in New Issue
Block a user