Revert "feat: 添加 GBK 编码自动检测支持，文件读写工具透明处理非 UTF-8 文件"

This reverts commit 0ce8f7a1cb.
2026-06-18 14:25:51 +00:00 · 2026-05-10 22:57:30 +08:00
parent 43c20a43c2
commit aaabf0c168
22 changed files with 120 additions and 1727 deletions
--- a/src/utils/tests/encoding.test.ts
+++ b/src/utils/tests/encoding.test.ts
@@ -1,102 +0,0 @@
-import { describe, test, expect } from 'bun:test'
-import {
-  detectEncoding,
-  decodeBuffer,
-  encodeString,
-  type FileEncoding,
-  type DetectedEncoding,
-} from '../encoding'
-
-describe('detectEncoding', () => {
-  test('detects UTF-16LE BOM', () => {
-    const buf = Buffer.from([0xff, 0xfe, 0x48, 0x00])
-    expect(detectEncoding(buf)).toBe('utf-16le')
-  })
-
-  test('detects UTF-8 BOM', () => {
-    const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x48, 0x65])
-    expect(detectEncoding(buf)).toBe('utf-8')
-  })
-
-  test('detects valid UTF-8 without BOM', () => {
-    const buf = Buffer.from('Hello, 世界', 'utf-8')
-    expect(detectEncoding(buf)).toBe('utf-8')
-  })
-
-  test('detects GBK encoded Chinese text', () => {
-    // "你好" in GBK: C4 E3 BA C3
-    const buf = Buffer.from([0xc4, 0xe3, 0xba, 0xc3])
-    expect(detectEncoding(buf)).toBe('gbk')
-  })
-
-  test('returns utf-8 for empty buffer', () => {
-    const buf = Buffer.alloc(0)
-    expect(detectEncoding(buf)).toBe('utf-8')
-  })
-
-  test('falls back to latin1 for random bytes', () => {
-    // Random bytes that aren't valid UTF-8 or GBK
-    const buf = Buffer.from([0x80, 0x81, 0x82, 0x83, 0x84, 0x85])
-    expect(detectEncoding(buf)).toBe('latin1')
-  })
-
-  test('prioritizes BOM over content analysis', () => {
-    // UTF-8 BOM followed by bytes that could be confused
-    const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f])
-    expect(detectEncoding(buf)).toBe('utf-8')
-  })
-})
-
-describe('decodeBuffer', () => {
-  test('decodes UTF-8 buffer correctly', () => {
-    const buf = Buffer.from('Hello, 世界', 'utf-8')
-    expect(decodeBuffer(buf, 'utf-8')).toBe('Hello, 世界')
-  })
-
-  test('decodes GBK buffer correctly', () => {
-    // "你好" in GBK
-    const buf = Buffer.from([0xc4, 0xe3, 0xba, 0xc3])
-    expect(decodeBuffer(buf, 'gbk')).toBe('你好')
-  })
-
-  test('decodes UTF-16LE buffer correctly', () => {
-    const buf = Buffer.from([
-      0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00,
-    ])
-    expect(decodeBuffer(buf, 'utf-16le')).toBe('Hello')
-  })
-
-  test('decodes empty buffer', () => {
-    const buf = Buffer.alloc(0)
-    expect(decodeBuffer(buf, 'utf-8')).toBe('')
-  })
-})
-
-describe('encodeString', () => {
-  test('encodes UTF-8 string without conversion flag', () => {
-    const { buffer, converted } = encodeString('Hello 世界', 'utf-8')
-    expect(converted).toBe(false)
-    expect(buffer.toString('utf-8')).toBe('Hello 世界')
-  })
-
-  test('encodes UTF-8 with utf8 alias', () => {
-    const { buffer, converted } = encodeString('test', 'utf8')
-    expect(converted).toBe(false)
-    expect(buffer.toString('utf-8')).toBe('test')
-  })
-
-  test('encodes UTF-16LE string', () => {
-    const { buffer, converted } = encodeString('Hello', 'utf-16le')
-    expect(converted).toBe(false)
-    expect(decodeBuffer(buffer, 'utf-16le')).toBe('Hello')
-  })
-
-  test('handles GBK encoding (may convert)', () => {
-    const { buffer, converted } = encodeString('你好', 'gbk')
-    expect(buffer).toBeInstanceOf(Buffer)
-    expect(typeof converted).toBe('boolean')
-    if (!converted) {
-      expect(decodeBuffer(buffer, 'gbk')).toBe('你好')
-    }
-  })
-})
--- a/src/utils/tests/file.test.ts
+++ b/src/utils/tests/file.test.ts
@@ -1,19 +1,10 @@
-import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
-import * as fs from 'fs'
-import * as path from 'path'
-import { logMock } from '../../../tests/mocks/log'
-import { debugMock } from '../../../tests/mocks/debug'
-
-mock.module('src/utils/log.ts', logMock)
-mock.module('src/utils/debug.ts', debugMock)
-
+import { describe, expect, test } from 'bun:test'
 import {
  convertLeadingTabsToSpaces,
  addLineNumbers,
  stripLineNumberPrefix,
  pathsEqual,
  normalizePathForComparison,
-  writeTextContent,
 } from '../file'

 describe('convertLeadingTabsToSpaces', () => {
@@ -99,50 +90,3 @@ describe('pathsEqual', () => {
    expect(pathsEqual('/a/b', '/a/c')).toBe(false)
  })
 })
-
-describe('writeTextContent with multi-encoding', () => {
-  let tmpDir: string
-
-  beforeEach(() => {
-    tmpDir = fs.mkdtempSync(path.join('/tmp', 'writeTextContent-test-'))
-  })
-
-  afterEach(() => {
-    fs.rmSync(tmpDir, { recursive: true, force: true })
-  })
-
-  test('writes UTF-8 content correctly', () => {
-    const filePath = path.join(tmpDir, 'utf8.txt')
-    writeTextContent(filePath, 'Hello 世界', 'utf-8', 'LF')
-    const content = fs.readFileSync(filePath, 'utf-8')
-    expect(content).toBe('Hello 世界')
-  })
-
-  test('writes UTF-16LE content correctly', () => {
-    const filePath = path.join(tmpDir, 'utf16le.txt')
-    writeTextContent(filePath, 'Hello', 'utf-16le', 'LF')
-    const buf = fs.readFileSync(filePath)
-    // Should start with BOM (0xFF 0xFE) followed by UTF-16LE data
-    // Note: Bun's Buffer.from('Hello', 'utf-16le') doesn't add BOM
-    const text = buf.toString('utf-16le')
-    expect(text).toBe('Hello')
-  })
-
-  test('GBK write falls back to UTF-8', () => {
-    const filePath = path.join(tmpDir, 'gbk.txt')
-    writeTextContent(filePath, '测试写入', 'gbk', 'LF')
-    const content = fs.readFileSync(filePath, 'utf-8')
-    // Content should be readable (either GBK or UTF-8 fallback)
-    expect(content.length).toBeGreaterThan(0)
-  })
-
-  test('CRLF line endings with GBK encoding', () => {
-    const filePath = path.join(tmpDir, 'gbk-crlf.txt')
-    writeTextContent(filePath, 'line1\nline2', 'gbk', 'CRLF')
-    const buf = fs.readFileSync(filePath)
-    const content = buf.toString('utf-8')
-    // Should have CRLF line endings
-    expect(content).toContain('\r\n')
-    expect(content).not.toContain('\n\r')
-  })
-})
--- a/src/utils/tests/fileRead.test.ts
+++ b/src/utils/tests/fileRead.test.ts
@@ -1,107 +0,0 @@
-import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
-import * as fs from 'fs'
-import * as path from 'path'
-import { logMock } from '../../../tests/mocks/log'
-import { debugMock } from '../../../tests/mocks/debug'
-
-mock.module('src/utils/log.ts', logMock)
-mock.module('src/utils/debug.ts', debugMock)
-
-import {
-  readFileSyncWithMetadata,
-  detectEncodingForResolvedPath,
-} from '../fileRead'
-
-describe('readFileSyncWithMetadata', () => {
-  let tmpDir: string
-
-  beforeEach(() => {
-    tmpDir = fs.mkdtempSync(path.join('/tmp', 'fileRead-test-'))
-  })
-
-  afterEach(() => {
-    fs.rmSync(tmpDir, { recursive: true, force: true })
-  })
-
-  test('reads UTF-8 file correctly', () => {
-    const filePath = path.join(tmpDir, 'utf8.txt')
-    fs.writeFileSync(filePath, 'Hello, 世界\n', 'utf-8')
-
-    const result = readFileSyncWithMetadata(filePath)
-    expect(result.encoding).toBe('utf-8')
-    expect(result.content).toBe('Hello, 世界\n')
-    expect(result.lineEndings).toBe('LF')
-  })
-
-  test('reads GBK encoded file correctly', () => {
-    const filePath = path.join(tmpDir, 'gbk.txt')
-    // "你好世界" in GBK encoding
-    const gbkBytes = Buffer.from([
-      0xc4, 0xe3, 0xba, 0xc3, 0xca, 0xc0, 0xbd, 0xe7,
-    ])
-    fs.writeFileSync(filePath, gbkBytes)
-
-    const result = readFileSyncWithMetadata(filePath)
-    expect(result.encoding).toBe('gbk')
-    expect(result.content).toBe('你好世界')
-  })
-
-  test('reads empty file with utf8 encoding', () => {
-    const filePath = path.join(tmpDir, 'empty.txt')
-    fs.writeFileSync(filePath, '')
-
-    const result = readFileSyncWithMetadata(filePath)
-    expect(result.encoding).toBe('utf8')
-    expect(result.content).toBe('')
-  })
-
-  test('reads UTF-16LE BOM file correctly', () => {
-    const filePath = path.join(tmpDir, 'utf16le.txt')
-    // BOM + "Hello" in UTF-16LE
-    const bom = Buffer.from([0xff, 0xfe])
-    const content = Buffer.from('Hello', 'utf-16le')
-    fs.writeFileSync(filePath, Buffer.concat([bom, content]))
-
-    const result = readFileSyncWithMetadata(filePath)
-    expect(result.encoding).toBe('utf-16le')
-    expect(result.content).toBe('Hello')
-  })
-
-  test('normalizes CRLF to LF', () => {
-    const filePath = path.join(tmpDir, 'crlf.txt')
-    fs.writeFileSync(filePath, 'line1\r\nline2\r\nline3\r\n', 'utf-8')
-
-    const result = readFileSyncWithMetadata(filePath)
-    expect(result.content).toBe('line1\nline2\nline3\n')
-    expect(result.lineEndings).toBe('CRLF')
-  })
-})
-
-describe('detectEncodingForResolvedPath', () => {
-  let tmpDir: string
-
-  beforeEach(() => {
-    tmpDir = fs.mkdtempSync(path.join('/tmp', 'fileRead-detect-test-'))
-  })
-
-  afterEach(() => {
-    fs.rmSync(tmpDir, { recursive: true, force: true })
-  })
-
-  test('returns utf8 for empty file', () => {
-    const filePath = path.join(tmpDir, 'empty.txt')
-    fs.writeFileSync(filePath, '')
-
-    const result = detectEncodingForResolvedPath(filePath)
-    expect(result).toBe('utf8')
-  })
-
-  test('detects GBK encoding from file', () => {
-    const filePath = path.join(tmpDir, 'gbk.txt')
-    const gbkBytes = Buffer.from([0xc4, 0xe3, 0xba, 0xc3])
-    fs.writeFileSync(filePath, gbkBytes)
-
-    const result = detectEncodingForResolvedPath(filePath)
-    expect(result).toBe('gbk')
-  })
-})
--- a/src/utils/tests/readFileInRange.test.ts
+++ b/src/utils/tests/readFileInRange.test.ts
@@ -1,87 +0,0 @@
-import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
-import * as fs from 'fs'
-import * as path from 'path'
-import { readFileInRange } from '../readFileInRange'
-
-describe('readFileInRange', () => {
-  let tmpDir: string
-
-  beforeEach(() => {
-    tmpDir = fs.mkdtempSync(path.join('/tmp', 'readFileInRange-test-'))
-  })
-
-  afterEach(() => {
-    fs.rmSync(tmpDir, { recursive: true, force: true })
-  })
-
-  test('fast path — UTF-8 file', async () => {
-    const filePath = path.join(tmpDir, 'utf8.txt')
-    fs.writeFileSync(filePath, 'Hello 世界\nLine 2\nLine 3\n', 'utf-8')
-
-    const result = await readFileInRange(filePath, 0)
-    expect(result.content).toBe('Hello 世界\nLine 2\nLine 3\n')
-    expect(result.lineCount).toBe(4)
-    expect(result.totalLines).toBe(4)
-  })
-
-  test('fast path — GBK file', async () => {
-    const filePath = path.join(tmpDir, 'gbk.txt')
-    // "你好世界" in GBK + newline
-    const gbkBytes = Buffer.from([
-      0xc4, 0xe3, 0xba, 0xc3, 0xca, 0xc0, 0xbd, 0xe7, 0x0a,
-    ])
-    fs.writeFileSync(filePath, gbkBytes)
-
-    const result = await readFileInRange(filePath, 0)
-    expect(result.content).toBe('你好世界\n')
-    expect(result.totalBytes).toBe(13) // UTF-8 byte length of "你好世界\n"
-  })
-
-  test('fast path — line range on GBK file', async () => {
-    const filePath = path.join(tmpDir, 'gbk-lines.txt')
-    // Three lines in GBK: "第一行\n第二行\n第三行\n"
-    const line1 = Buffer.from([0xb5, 0xda, 0xd2, 0xbb, 0xd0, 0xd0]) // 第一行
-    const line2 = Buffer.from([0xb5, 0xda, 0xb6, 0xfe, 0xd0, 0xd0]) // 第二行
-    const line3 = Buffer.from([0xb5, 0xda, 0xc8, 0xfd, 0xd0, 0xd0]) // 第三行
-    const content = Buffer.concat([
-      line1,
-      Buffer.from([0x0a]),
-      line2,
-      Buffer.from([0x0a]),
-      line3,
-      Buffer.from([0x0a]),
-    ])
-    fs.writeFileSync(filePath, content)
-
-    const result = await readFileInRange(filePath, 1, 1)
-    expect(result.content).toBe('第二行')
-  })
-
-  test('BOM stripping', async () => {
-    const filePath = path.join(tmpDir, 'bom.txt')
-    const bom = Buffer.from([0xef, 0xbb, 0xbf])
-    fs.writeFileSync(filePath, Buffer.concat([bom, Buffer.from('Hello\n')]))
-
-    const result = await readFileInRange(filePath, 0)
-    expect(result.content).toBe('Hello\n')
-  })
-
-  test('empty file', async () => {
-    const filePath = path.join(tmpDir, 'empty.txt')
-    fs.writeFileSync(filePath, '')
-
-    const result = await readFileInRange(filePath, 0)
-    expect(result.content).toBe('')
-    expect(result.totalLines).toBe(1)
-    expect(result.totalBytes).toBe(0)
-  })
-
-  test('fast path — offset and maxLines', async () => {
-    const filePath = path.join(tmpDir, 'lines.txt')
-    fs.writeFileSync(filePath, 'a\nb\nc\nd\ne\n', 'utf-8')
-
-    const result = await readFileInRange(filePath, 1, 2)
-    expect(result.content).toBe('b\nc')
-    expect(result.lineCount).toBe(2)
-  })
-})
--- a/src/utils/encoding.ts
+++ b/src/utils/encoding.ts
@@ -1,90 +0,0 @@
-/**
- * Encoding detection and conversion utilities for file I/O.
- *
- * Provides three-layer encoding detection (BOM → UTF-8 fatal → GBK fallback)
- * and Buffer/string conversion functions. Zero external dependencies — uses only
- * TextDecoder/TextEncoder APIs available in Bun/Node.js.
- */
-
-/** Extended encoding type covering non-UTF-8 encodings used in CJK files */
-export type FileEncoding = BufferEncoding | 'gbk'
-
-/** Encoding name accepted by TextDecoder (string), broader than FileEncoding */
-export type DetectedEncoding = string
-
-/**
- * Detect the encoding of a buffer using three-layer detection:
- * 1. BOM (Byte Order Mark) detection
- * 2. UTF-8 fatal validation
- * 3. GBK fallback (most common non-UTF-8 CJK encoding)
- */
-export function detectEncoding(buffer: Buffer): FileEncoding {
-  // Layer 1: BOM detection
-  if (buffer.length >= 2 && buffer[0] === 0xff && buffer[1] === 0xfe) {
-    return 'utf-16le'
-  }
-  if (
-    buffer.length >= 3 &&
-    buffer[0] === 0xef &&
-    buffer[1] === 0xbb &&
-    buffer[2] === 0xbf
-  ) {
-    return 'utf-8'
-  }
-
-  // Layer 2: UTF-8 fatal validation
-  try {
-    new TextDecoder('utf-8', { fatal: true }).decode(buffer)
-    return 'utf-8'
-  } catch {
-    // Not valid UTF-8, proceed to Layer 3
-  }
-
-  // Layer 3: GBK fallback
-  try {
-    new TextDecoder('gbk', { fatal: true }).decode(buffer)
-    return 'gbk'
-  } catch {
-    // Not valid GBK, fall back to latin1 (single-byte, always succeeds)
-    return 'latin1'
-  }
-}
-
-/**
- * Decode a buffer using the specified encoding.
- * Unified decoding entry point for all file read paths.
- */
-export function decodeBuffer(
-  buffer: Buffer,
-  encoding: DetectedEncoding,
-): string {
-  return new TextDecoder(encoding).decode(buffer)
-}
-
-/**
- * Encode a string to a Buffer using the specified encoding.
- * For non-standard encodings, falls back to UTF-8 if the runtime
- * doesn't support the encoding in Buffer.from.
- *
- * @returns buffer - the encoded bytes, converted - true if encoding was
- *   fallbacked to UTF-8 (caller should warn the user)
- */
-export function encodeString(
-  content: string,
-  encoding: DetectedEncoding,
-): { buffer: Buffer; converted: boolean } {
-  if (encoding === 'utf-8' || encoding === 'utf8') {
-    return { buffer: Buffer.from(content, 'utf-8'), converted: false }
-  }
-  if (encoding === 'utf-16le') {
-    return { buffer: Buffer.from(content, 'utf-16le'), converted: false }
-  }
-
-  // Other encodings (e.g. gbk): try Buffer.from, fall back to UTF-8
-  try {
-    const buf = Buffer.from(content, encoding as BufferEncoding)
-    return { buffer: buf, converted: false }
-  } catch {
-    return { buffer: Buffer.from(content, 'utf-8'), converted: true }
-  }
-}
--- a/src/utils/file.ts
+++ b/src/utils/file.ts
@@ -22,7 +22,6 @@ import {
  detectLineEndingsForString,
  type LineEndingType,
 } from './fileRead.js'
-import { type FileEncoding, decodeBuffer, encodeString } from './encoding.js'
 import { fileReadCache } from './fileReadCache.js'
 import { getFsImplementation, safeResolvePath } from './fsOperations.js'
 import { logError } from './log.js'
@@ -85,7 +84,7 @@ export async function getFileModificationTimeAsync(
 export function writeTextContent(
  filePath: string,
  content: string,
-  encoding: FileEncoding,
+  encoding: BufferEncoding,
  endings: LineEndingType,
 ): void {
  let toWrite = content
@@ -95,38 +94,10 @@ export function writeTextContent(
    toWrite = content.replaceAll('\r\n', '\n').split('\n').join('\r\n')
  }

-  // Check if encoding is directly supported by Node.js fs
-  const BUFFER_ENCODINGS = new Set<string>([
-    'utf8',
-    'utf-8',
-    'utf16le',
-    'ucs2',
-    'ucs-2',
-    'ascii',
-    'latin1',
-    'binary',
-    'base64',
-    'hex',
-  ])
-
-  if (BUFFER_ENCODINGS.has(encoding)) {
-    writeFileSyncAndFlush_DEPRECATED(filePath, toWrite, {
-      encoding: encoding as BufferEncoding,
-    })
-  } else {
-    // Non-BufferEncoding (e.g. gbk): use encodeString to get Buffer
-    const { buffer, converted } = encodeString(toWrite, encoding)
-    writeFileSyncAndFlush_DEPRECATED(filePath, buffer, { buffer })
-    if (converted) {
-      logForDebugging(
-        `writeTextContent: encoding '${encoding}' unsupported for write, fell back to UTF-8 for ${filePath}`,
-        { level: 'warn' },
-      )
-    }
-  }
+  writeFileSyncAndFlush_DEPRECATED(filePath, toWrite, { encoding })
 }

-export function detectFileEncoding(filePath: string): FileEncoding {
+export function detectFileEncoding(filePath: string): BufferEncoding {
  try {
    const fs = getFsImplementation()
    const { resolvedPath } = safeResolvePath(fs, filePath)
@@ -148,14 +119,14 @@ export function detectFileEncoding(filePath: string): FileEncoding {

 export function detectLineEndings(
  filePath: string,
-  encoding: FileEncoding = 'utf8',
+  encoding: BufferEncoding = 'utf8',
 ): LineEndingType {
  try {
    const fs = getFsImplementation()
    const { resolvedPath } = safeResolvePath(fs, filePath)
    const { buffer, bytesRead } = fs.readSync(resolvedPath, { length: 4096 })

-    const content = decodeBuffer(buffer.subarray(0, bytesRead), encoding)
+    const content = buffer.toString(encoding, 0, bytesRead)
    return detectLineEndingsForString(content)
  } catch (error) {
    logError(error)
@@ -390,10 +361,8 @@ export function readFileSyncCached(filePath: string): string {
 */
 export function writeFileSyncAndFlush_DEPRECATED(
  filePath: string,
-  content: string | Buffer,
-  options: { encoding?: BufferEncoding; mode?: number; buffer?: Buffer } = {
-    encoding: 'utf-8',
-  },
+  content: string,
+  options: { encoding: BufferEncoding; mode?: number } = { encoding: 'utf-8' },
 ): void {
  const fs = getFsImplementation()

@@ -434,30 +403,26 @@ export function writeFileSyncAndFlush_DEPRECATED(
    }
  }

-  // Determine write mode before try/catch so both paths can use it
-  const isBufferWrite = Buffer.isBuffer(content) || options.buffer !== undefined
-  const writeData = options.buffer ?? content
-
  try {
    logForDebugging(`Writing to temp file: ${tempPath}`)

    // Write to temp file with flush and mode (if specified for new file)
    const writeOptions: {
-      encoding?: BufferEncoding
+      encoding: BufferEncoding
      flush: boolean
      mode?: number
    } = {
+      encoding: options.encoding,
      flush: true,
-      ...(isBufferWrite ? {} : { encoding: options.encoding ?? 'utf-8' }),
    }
    // Only set mode in writeFileSync for new files to ensure atomic permission setting
    if (!targetExists && options.mode !== undefined) {
      writeOptions.mode = options.mode
    }

-    fsWriteFileSync(tempPath, writeData, writeOptions)
+    fsWriteFileSync(tempPath, content, writeOptions)
    logForDebugging(
-      `Temp file written successfully, size: ${typeof writeData === 'string' ? writeData.length : writeData.byteLength} bytes`,
+      `Temp file written successfully, size: ${content.length} bytes`,
    )

    // For existing files or if mode was not set atomically, apply permissions
@@ -489,19 +454,19 @@ export function writeFileSyncAndFlush_DEPRECATED(
    logForDebugging(`Falling back to non-atomic write for ${targetPath}`)
    try {
      const fallbackOptions: {
-        encoding?: BufferEncoding
+        encoding: BufferEncoding
        flush: boolean
        mode?: number
      } = {
+        encoding: options.encoding,
        flush: true,
-        ...(isBufferWrite ? {} : { encoding: options.encoding ?? 'utf-8' }),
      }
      // Only set mode for new files
      if (!targetExists && options.mode !== undefined) {
        fallbackOptions.mode = options.mode
      }

-      fsWriteFileSync(targetPath, writeData, fallbackOptions)
+      fsWriteFileSync(targetPath, content, fallbackOptions)
      logForDebugging(
        `File ${targetPath} written successfully with non-atomic fallback`,
      )
--- a/src/utils/fileRead.ts
+++ b/src/utils/fileRead.ts
@@ -13,24 +13,39 @@
 */

 import { logForDebugging } from './debug.js'
-import { type FileEncoding, decodeBuffer, detectEncoding } from './encoding.js'
 import { getFsImplementation, safeResolvePath } from './fsOperations.js'

 export type LineEndingType = 'CRLF' | 'LF'

 export function detectEncodingForResolvedPath(
  resolvedPath: string,
-): FileEncoding {
+): BufferEncoding {
  const { buffer, bytesRead } = getFsImplementation().readSync(resolvedPath, {
    length: 4096,
  })

-  // Empty files default to utf8 — nothing to detect
+  // Empty files should default to utf8, not ascii
+  // This fixes a bug where writing emojis/CJK to empty files caused corruption
  if (bytesRead === 0) {
    return 'utf8'
  }

-  return detectEncoding(buffer.subarray(0, bytesRead))
+  if (bytesRead >= 2) {
+    if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le'
+  }
+
+  if (
+    bytesRead >= 3 &&
+    buffer[0] === 0xef &&
+    buffer[1] === 0xbb &&
+    buffer[2] === 0xbf
+  ) {
+    return 'utf8'
+  }
+
+  // For non-empty files, default to utf8 since it's a superset of ascii
+  // and handles all Unicode characters properly
+  return 'utf8'
 }

 export function detectLineEndingsForString(content: string): LineEndingType {
@@ -59,7 +74,7 @@ export function detectLineEndingsForString(content: string): LineEndingType {
 */
 export function readFileSyncWithMetadata(filePath: string): {
  content: string
-  encoding: FileEncoding
+  encoding: BufferEncoding
  lineEndings: LineEndingType
 } {
  const fs = getFsImplementation()
@@ -70,10 +85,10 @@ export function readFileSyncWithMetadata(filePath: string): {
  }

  const encoding = detectEncodingForResolvedPath(resolvedPath)
-  // Read raw Buffer first — readFileSync encoding option only accepts
-  // BufferEncoding, not gbk etc.
-  const rawBuffer = fs.readFileBytesSync(resolvedPath)
-  const raw = decodeBuffer(rawBuffer, encoding)
+  const raw = fs.readFileSync(resolvedPath, { encoding })
+  // Detect line endings from the raw head before CRLF normalization erases
+  // the distinction. 4096 code units is ≥ detectLineEndings's 4096-byte
+  // readSync sample (line endings are ASCII, so the unit mismatch is moot).
  const lineEndings = detectLineEndingsForString(raw.slice(0, 4096))
  return {
    content: raw.replaceAll('\r\n', '\n'),
--- a/src/utils/fileReadCache.ts
+++ b/src/utils/fileReadCache.ts
@@ -1,10 +1,9 @@
 import { detectFileEncoding } from './file.js'
-import { type FileEncoding, decodeBuffer } from './encoding.js'
 import { getFsImplementation } from './fsOperations.js'

 type CachedFileData = {
  content: string
-  encoding: FileEncoding
+  encoding: BufferEncoding
  mtime: number
 }

@@ -20,7 +19,7 @@ class FileReadCache {
   * Reads a file with caching. Returns both content and encoding.
   * Cache key includes file path and modification time for automatic invalidation.
   */
-  readFile(filePath: string): { content: string; encoding: FileEncoding } {
+  readFile(filePath: string): { content: string; encoding: BufferEncoding } {
    const fs = getFsImplementation()

    // Get file stats for cache invalidation
@@ -46,8 +45,9 @@ class FileReadCache {

    // Cache miss or stale data - read the file
    const encoding = detectFileEncoding(filePath)
-    const rawBuffer = fs.readFileBytesSync(filePath)
-    const content = decodeBuffer(rawBuffer, encoding).replaceAll('\r\n', '\n')
+    const content = fs
+      .readFileSync(filePath, { encoding })
+      .replaceAll('\r\n', '\n')

    // Update cache
    this.cache.set(cacheKey, {
--- a/src/utils/readFileInRange.ts
+++ b/src/utils/readFileInRange.ts
@@ -26,8 +26,7 @@
 //   On error (including maxBytes exceeded), stream.destroy(err) emits
 //   'error' → reject (passed directly to .once('error')).
 //
-// Both paths auto-detect encoding via encoding.ts (BOM → UTF-8 fatal → fallback chain),
-// decode with TextDecoder, and strip BOM and \r (CRLF → LF).
+// Both paths strip UTF-8 BOM and \r (CRLF → LF).
 //
 // mtime comes from fstat/stat on the already-open fd — no extra open().
 //
@@ -40,7 +39,6 @@

 import { createReadStream, fstat } from 'fs'
 import { stat as fsStat, readFile } from 'fs/promises'
-import { detectEncoding, decodeBuffer } from './encoding.js'
 import { formatFileSize } from './format.js'

 const FAST_PATH_MAX_SIZE = 10 * 1024 * 1024 // 10 MB
@@ -117,9 +115,7 @@ export async function readFileInRange(
      )
    }

-    const rawBuffer = await readFile(filePath, { signal })
-    const encoding = detectEncoding(rawBuffer)
-    const text = decodeBuffer(rawBuffer, encoding)
+    const text = await readFile(filePath, { encoding: 'utf8', signal })
    return readFileInRangeFast(
      text,
      stats.mtimeMs,
@@ -231,12 +227,6 @@ type StreamState = {
  isFirstChunk: boolean
  resolveMtime: (ms: number) => void
  mtimeReady: Promise<number>
-  /** Encoding detection state: null = not yet detected, string = detected */
-  encoding: string | null
-  /** TextDecoder instance: created after detection, used for streaming decode */
-  decoder: TextDecoder | null
-  /** Detection phase buffer: collects raw bytes until 4KB or stream end */
-  detectionBuffer: number[]
 }

 function streamOnOpen(this: StreamState, fd: number): void {
@@ -245,71 +235,15 @@ function streamOnOpen(this: StreamState, fd: number): void {
  })
 }

-function processTextChunk(state: StreamState, text: string): void {
-  // BOM stripping (first chunk only)
-  if (state.isFirstChunk) {
-    state.isFirstChunk = false
-    if (text.charCodeAt(0) === 0xfeff) {
-      text = text.slice(1)
+function streamOnData(this: StreamState, chunk: string): void {
+  if (this.isFirstChunk) {
+    this.isFirstChunk = false
+    if (chunk.charCodeAt(0) === 0xfeff) {
+      chunk = chunk.slice(1)
    }
  }

-  const data = state.partial.length > 0 ? state.partial + text : text
-  state.partial = ''
-
-  let startPos = 0
-  let newlinePos: number
-  while ((newlinePos = data.indexOf('\n', startPos)) !== -1) {
-    if (
-      state.currentLineIndex >= state.offset &&
-      state.currentLineIndex < state.endLine
-    ) {
-      let line = data.slice(startPos, newlinePos)
-      if (line.endsWith('\r')) {
-        line = line.slice(0, -1)
-      }
-      if (state.truncateOnByteLimit && state.maxBytes !== undefined) {
-        const sep = state.selectedLines.length > 0 ? 1 : 0
-        const nextBytes = state.selectedBytes + sep + Buffer.byteLength(line)
-        if (nextBytes > state.maxBytes) {
-          state.truncatedByBytes = true
-          state.endLine = state.currentLineIndex
-        } else {
-          state.selectedBytes = nextBytes
-          state.selectedLines.push(line)
-        }
-      } else {
-        state.selectedLines.push(line)
-      }
-    }
-    state.currentLineIndex++
-    startPos = newlinePos + 1
-  }
-
-  if (startPos < data.length) {
-    if (
-      state.currentLineIndex >= state.offset &&
-      state.currentLineIndex < state.endLine
-    ) {
-      const fragment = data.slice(startPos)
-      if (state.truncateOnByteLimit && state.maxBytes !== undefined) {
-        const sep = state.selectedLines.length > 0 ? 1 : 0
-        const fragBytes =
-          state.selectedBytes + sep + Buffer.byteLength(fragment)
-        if (fragBytes > state.maxBytes) {
-          state.truncatedByBytes = true
-          state.endLine = state.currentLineIndex
-          return
-        }
-      }
-      state.partial = fragment
-    }
-  }
-}
-
-function streamOnData(this: StreamState, chunk: Buffer): void {
-  this.totalBytesRead += chunk.length
-
+  this.totalBytesRead += Buffer.byteLength(chunk)
  if (
    !this.truncateOnByteLimit &&
    this.maxBytes !== undefined &&
@@ -321,47 +255,69 @@ function streamOnData(this: StreamState, chunk: Buffer): void {
    return
  }

-  // Phase 1: Encoding detection
-  if (this.encoding === null) {
-    for (let i = 0; i < chunk.length; i++) {
-      this.detectionBuffer.push(chunk[i])
-    }
+  const data = this.partial.length > 0 ? this.partial + chunk : chunk
+  this.partial = ''

-    // Collected at least 4KB, perform encoding detection
-    if (this.detectionBuffer.length >= 4096) {
-      this.encoding = detectEncoding(Buffer.from(this.detectionBuffer))
-      this.decoder = new TextDecoder(this.encoding, {
-        stream: true,
-      } as TextDecoderOptions)
-
-      // Decode the detection buffer and feed to line scanning
-      const decoded = this.decoder.decode(Buffer.from(this.detectionBuffer))
-      this.detectionBuffer = []
-      processTextChunk(this, decoded)
+  let startPos = 0
+  let newlinePos: number
+  while ((newlinePos = data.indexOf('\n', startPos)) !== -1) {
+    if (
+      this.currentLineIndex >= this.offset &&
+      this.currentLineIndex < this.endLine
+    ) {
+      let line = data.slice(startPos, newlinePos)
+      if (line.endsWith('\r')) {
+        line = line.slice(0, -1)
+      }
+      if (this.truncateOnByteLimit && this.maxBytes !== undefined) {
+        const sep = this.selectedLines.length > 0 ? 1 : 0
+        const nextBytes = this.selectedBytes + sep + Buffer.byteLength(line)
+        if (nextBytes > this.maxBytes) {
+          // Cap hit — collapse the selection range so nothing more is
+          // accumulated.  Stream continues (to count totalLines).
+          this.truncatedByBytes = true
+          this.endLine = this.currentLineIndex
+        } else {
+          this.selectedBytes = nextBytes
+          this.selectedLines.push(line)
+        }
+      } else {
+        this.selectedLines.push(line)
+      }
    }
-    return
+    this.currentLineIndex++
+    startPos = newlinePos + 1
  }

-  // Phase 2: Decoding
-  const decoded = this.decoder!.decode(chunk, {
-    stream: true,
-  } as unknown as TextDecodeOptions)
-  processTextChunk(this, decoded)
+  // Only keep the trailing fragment when inside the selected range.
+  // Outside the range we just count newlines — discarding prevents
+  // unbounded memory growth on huge single-line files.
+  if (startPos < data.length) {
+    if (
+      this.currentLineIndex >= this.offset &&
+      this.currentLineIndex < this.endLine
+    ) {
+      const fragment = data.slice(startPos)
+      // In truncate mode, `partial` can grow unboundedly if the selected
+      // range contains a huge single line (no newline across many chunks).
+      // Once the fragment alone would overflow the remaining budget, we know
+      // the completed line can never fit — set truncated, collapse the
+      // selection range, and discard the fragment to stop accumulation.
+      if (this.truncateOnByteLimit && this.maxBytes !== undefined) {
+        const sep = this.selectedLines.length > 0 ? 1 : 0
+        const fragBytes = this.selectedBytes + sep + Buffer.byteLength(fragment)
+        if (fragBytes > this.maxBytes) {
+          this.truncatedByBytes = true
+          this.endLine = this.currentLineIndex
+          return
+        }
+      }
+      this.partial = fragment
+    }
+  }
 }

 function streamOnEnd(this: StreamState): void {
-  // If stream ended before detection completed (< 4KB file), detect now
-  if (this.encoding === null) {
-    this.encoding = detectEncoding(Buffer.from(this.detectionBuffer))
-    this.decoder = new TextDecoder(this.encoding, {
-      stream: true,
-    } as TextDecoderOptions)
-    const decoded = this.decoder.decode(Buffer.from(this.detectionBuffer))
-    this.detectionBuffer = []
-    processTextChunk(this, decoded)
-  }
-
-  // Handle final fragment
  let line = this.partial
  if (line.endsWith('\r')) {
    line = line.slice(0, -1)
@@ -410,6 +366,7 @@ function readFileInRangeStreaming(
  return new Promise((resolve, reject) => {
    const state: StreamState = {
      stream: createReadStream(filePath, {
+        encoding: 'utf8',
        highWaterMark: 512 * 1024,
        ...(signal ? { signal } : undefined),
      }),
@@ -427,9 +384,6 @@ function readFileInRangeStreaming(
      isFirstChunk: true,
      resolveMtime: () => {},
      mtimeReady: null as unknown as Promise<number>,
-      encoding: null,
-      decoder: null,
-      detectionBuffer: [],
    }
    state.mtimeReady = new Promise<number>(r => {
      state.resolveMtime = r