mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-18 06:15:51 +00:00
fix: 修复非 UTF-8 编码文件读写 round-trip 字节损坏
GBK 文件编辑后被错误写入为 UTF-8(Buffer.from 不支持 gbk 编码,
encodeString 静默 fallback),latin1/ANSI 文件 0x80-0x9F 范围字节因
TextDecoder('latin1') 与 Buffer.from('latin1') 编解码不对称而被篡改。
修复:latin1 解码改用严格 ISO-8859-1 映射保证与 Buffer.from 对称;
GBK 编码通过 TextDecoder 反向构建查找表实现零依赖编码器。
Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
This commit is contained in:
@@ -70,6 +70,17 @@ describe('decodeBuffer', () => {
|
||||
const buf = Buffer.alloc(0)
|
||||
expect(decodeBuffer(buf, 'utf-8')).toBe('')
|
||||
})
|
||||
|
||||
test('decodes latin1 using strict ISO-8859-1 mapping', () => {
|
||||
// 0x80 should decode to U+0080 (control char), NOT € (U+20AC)
|
||||
const buf = Buffer.from([0x80, 0x85, 0x9c, 0xa0, 0xff])
|
||||
const decoded = decodeBuffer(buf, 'latin1')
|
||||
expect(decoded.charCodeAt(0)).toBe(0x80)
|
||||
expect(decoded.charCodeAt(1)).toBe(0x85)
|
||||
expect(decoded.charCodeAt(2)).toBe(0x9c)
|
||||
expect(decoded.charCodeAt(3)).toBe(0xa0)
|
||||
expect(decoded.charCodeAt(4)).toBe(0xff)
|
||||
})
|
||||
})
|
||||
|
||||
describe('encodeString', () => {
|
||||
@@ -91,12 +102,71 @@ describe('encodeString', () => {
|
||||
expect(decodeBuffer(buffer, 'utf-16le')).toBe('Hello')
|
||||
})
|
||||
|
||||
test('handles GBK encoding (may convert)', () => {
|
||||
test('encodes GBK string correctly', () => {
|
||||
const { buffer, converted } = encodeString('你好', 'gbk')
|
||||
expect(buffer).toBeInstanceOf(Buffer)
|
||||
expect(typeof converted).toBe('boolean')
|
||||
if (!converted) {
|
||||
expect(decodeBuffer(buffer, 'gbk')).toBe('你好')
|
||||
}
|
||||
expect(converted).toBe(false)
|
||||
expect(buffer.toString('hex')).toBe('c4e3bac3')
|
||||
})
|
||||
|
||||
test('GBK round-trip preserves bytes', () => {
|
||||
// "测试文件" in GBK
|
||||
const original = Buffer.from([
|
||||
0xb2, 0xe2, 0xca, 0xd4, 0xce, 0xc4, 0xbc, 0xfe,
|
||||
])
|
||||
const decoded = decodeBuffer(original, 'gbk')
|
||||
const { buffer } = encodeString(decoded, 'gbk')
|
||||
expect(buffer.equals(original)).toBe(true)
|
||||
})
|
||||
|
||||
test('GBK encoding handles mixed ASCII and CJK', () => {
|
||||
// "Hello你好" in GBK: 48 65 6c 6c 6f c4 e3 ba c3
|
||||
const { buffer, converted } = encodeString('Hello你好', 'gbk')
|
||||
expect(converted).toBe(false)
|
||||
expect(buffer.toString('hex')).toBe('48656c6c6fc4e3bac3')
|
||||
})
|
||||
|
||||
test('latin1 round-trip preserves all byte values', () => {
|
||||
// Test the full 0x80-0xFF range that previously broke
|
||||
const bytes = Buffer.from([
|
||||
0x80, 0x81, 0x85, 0x8c, 0x9c, 0xa0, 0xc0, 0xe9, 0xf6, 0xfc, 0xff,
|
||||
])
|
||||
const decoded = decodeBuffer(bytes, 'latin1')
|
||||
const { buffer } = encodeString(decoded, 'latin1')
|
||||
expect(buffer.equals(bytes)).toBe(true)
|
||||
})
|
||||
|
||||
test('latin1 encoding does not set converted flag', () => {
|
||||
const { buffer, converted } = encodeString('test\x80\x90', 'latin1')
|
||||
expect(converted).toBe(false)
|
||||
expect(buffer.toString('hex')).toBe('746573748090')
|
||||
})
|
||||
})
|
||||
|
||||
describe('round-trip consistency', () => {
|
||||
test('GBK file survives full read-decode-encode cycle', () => {
|
||||
const original = Buffer.from([0xc4, 0xe3, 0xba, 0xc3, 0x0d, 0x0a])
|
||||
const enc = detectEncoding(original)
|
||||
expect(enc).toBe('gbk')
|
||||
const decoded = decodeBuffer(original, enc)
|
||||
const { buffer } = encodeString(decoded, enc)
|
||||
expect(buffer.equals(original)).toBe(true)
|
||||
})
|
||||
|
||||
test('latin1 file survives full read-decode-encode cycle', () => {
|
||||
const original = Buffer.from([0x80, 0x90, 0xa0, 0xff, 0x41, 0x42])
|
||||
const enc = detectEncoding(original)
|
||||
expect(enc).toBe('latin1')
|
||||
const decoded = decodeBuffer(original, enc)
|
||||
const { buffer } = encodeString(decoded, enc)
|
||||
expect(buffer.equals(original)).toBe(true)
|
||||
})
|
||||
|
||||
test('UTF-8 file survives full read-decode-encode cycle', () => {
|
||||
const original = Buffer.from('Hello 世界', 'utf-8')
|
||||
const enc = detectEncoding(original)
|
||||
expect(enc).toBe('utf-8')
|
||||
const decoded = decodeBuffer(original, enc)
|
||||
const { buffer } = encodeString(decoded, enc)
|
||||
expect(buffer.equals(original)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user