Revert "feat: 添加 GBK 编码自动检测支持,文件读写工具透明处理非 UTF-8 文件"

This reverts commit 0ce8f7a1cb.
This commit is contained in:
claude-code-best
2026-05-10 22:57:30 +08:00
parent 43c20a43c2
commit aaabf0c168
22 changed files with 120 additions and 1727 deletions

View File

@@ -13,24 +13,39 @@
*/
import { logForDebugging } from './debug.js'
import { type FileEncoding, decodeBuffer, detectEncoding } from './encoding.js'
import { getFsImplementation, safeResolvePath } from './fsOperations.js'
export type LineEndingType = 'CRLF' | 'LF'
export function detectEncodingForResolvedPath(
resolvedPath: string,
): FileEncoding {
): BufferEncoding {
const { buffer, bytesRead } = getFsImplementation().readSync(resolvedPath, {
length: 4096,
})
// Empty files default to utf8 nothing to detect
// Empty files should default to utf8, not ascii
// This fixes a bug where writing emojis/CJK to empty files caused corruption
if (bytesRead === 0) {
return 'utf8'
}
return detectEncoding(buffer.subarray(0, bytesRead))
if (bytesRead >= 2) {
if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le'
}
if (
bytesRead >= 3 &&
buffer[0] === 0xef &&
buffer[1] === 0xbb &&
buffer[2] === 0xbf
) {
return 'utf8'
}
// For non-empty files, default to utf8 since it's a superset of ascii
// and handles all Unicode characters properly
return 'utf8'
}
export function detectLineEndingsForString(content: string): LineEndingType {
@@ -59,7 +74,7 @@ export function detectLineEndingsForString(content: string): LineEndingType {
*/
export function readFileSyncWithMetadata(filePath: string): {
content: string
encoding: FileEncoding
encoding: BufferEncoding
lineEndings: LineEndingType
} {
const fs = getFsImplementation()
@@ -70,10 +85,10 @@ export function readFileSyncWithMetadata(filePath: string): {
}
const encoding = detectEncodingForResolvedPath(resolvedPath)
// Read raw Buffer first — readFileSync encoding option only accepts
// BufferEncoding, not gbk etc.
const rawBuffer = fs.readFileBytesSync(resolvedPath)
const raw = decodeBuffer(rawBuffer, encoding)
const raw = fs.readFileSync(resolvedPath, { encoding })
// Detect line endings from the raw head before CRLF normalization erases
// the distinction. 4096 code units is ≥ detectLineEndings's 4096-byte
// readSync sample (line endings are ASCII, so the unit mismatch is moot).
const lineEndings = detectLineEndingsForString(raw.slice(0, 4096))
return {
content: raw.replaceAll('\r\n', '\n'),