我的 Emacs 文件夹中有一批文本文件是 gb2312 编码格式，我希望将它们都改成 utf-8 格式，使用 lisp 如何编程

czqhurricane · 2024 年10 月 27 日 08:13

(require 'dired)

(defun convert-gb2312-to-utf8 ()
  "Convert all files in the current Dired buffer from GB2312 to UTF-8."
  (interactive)
  (dired-mark-files-regexp "\\.c$")  ; 标记所有 .txt 文件
  (dired-map-over-marks
   (lambda (file mark)
     (let ((coding-system-for-read 'gb2312)
           (coding-system-for-write 'utf-8))
       (with-temp-buffer
                (insert-file-contents file)
                (set-buffer-file-coding-system coding-system-for-write)
                (write-file file)))
     nil)
   nil))

;; 使用方法： ;; 1. 打开 Dired 模式，进入包含这些文件的目录。 ;; 2. 运行 =M-x convert-gb2312-to-utf8= 。

可是 C 文件可以被正确标记文件，但是dired-map-over-marks没有被调用，不知道为啥？

LdBeth · 2024 年10 月 27 日 20:21

用法错了，看看 dired.el 里的用例

wcq062821 · 2024 年10 月 28 日 02:00

如果文件多的话，用elisp太慢了，我比较喜欢调用python来处理这个事情

(defun my/recursively-convert-source-files-from-GBK-to-UTF-8 ()
  "递归的把当前目录的源文件的编码从GBK转成UTF-8。"
  (interactive)
  (let* ((home-dir (getenv "HOME"))
         (python-file (expand-file-name "~/.doom.d/python-tools/gbk2utf-8.py" home-dir))
         (local-root (vc-git-root buffer-file-name))
         (result nil))
    (when (yes-or-no-p (format "Do you want to execute the command in the directory: %s?" local-root))
      (setq default-directory local-root)
      (let* ((command (list "python3" python-file))
             (output-buffer (generate-new-buffer "*gbk2utf-8-output*"))
             (exit-code (apply 'call-process (car command) nil output-buffer t (cdr command))))
        (if (eq exit-code 0)
            (progn
              (setq result (with-current-buffer output-buffer
                             (buffer-string)))
              (message "result: %s" result)
              (kill-buffer output-buffer))
          (message "Error running command. Check *gbk2utf-8-output* buffer for details.")
          (switch-to-buffer output-buffer))))
    result))

gbk2utf-8.py

import os

def getDirFiles(dir):
    fileList = []
    for ff in os.listdir(dir):
        # 过滤隐藏文件夹
        if ff.startswith('.'):
            continue
        filePath = os.path.join(dir, ff)
        if os.path.isdir(filePath):
            fileList.extend(getDirFiles(filePath))
        else:
            if ff.lower().endswith('.h') or ff.lower().endswith('.c') or ff.lower().endswith('.cpp'):
                fileList.append(os.path.join(dir, ff))
    return fileList

originEncodeList = ['gbk', 'Big5', 'iso-latin-1-dos']
if __name__ == '__main__':
    objFiles = getDirFiles('.')
    failList = []
    for f in objFiles:
        try:
            with open(f, 'rb')  as fpr:
                buf = fpr.read().decode('utf-8')
                # print('utf-8 encode  ok')
                #
                # 繁体转换成简体
                # from langconv import *
                # if f.find('main.c') != -1:
                #     simple = Converter('zh-hans').convert(buf)
                #     with open(f+'_simple.c', 'w', encoding='utf-8') as fpw:
                #         fpw.write(simple)
                #
                # 本身是UTF-8 格式 不需要转换
                continue
        except:
            pass

        print(f)
        for ec in originEncodeList:
            try:
                with open(f, 'r', encoding=ec)  as fpr:
                    buf = fpr.read()
                    with open(f+'.utf8', 'w', encoding='utf-8') as fpw:
                        fpw.write(buf)
                os.remove(f)
                os.rename(f+'.utf8', f)
                break
            except:
                if f not in failList:
                    failList.append(f)
                # print('err f : %s encoding : %s'%(f, ec))
                # if os.path.exists(f+'.utf8'):
                #     os.remove(f+'.utf8')
    if failList:
        print('====================================')
        print('err files :')
        for f in failList:
            print(f)
        print('====================================')
    else:
        print('All Done!')

netjune · 2024 年10 月 28 日 04:27

这个功能最快的当然是vim, argdo命令太强大了