(require 'dired)
(defun convert-gb2312-to-utf8 ()
"Convert all files in the current Dired buffer from GB2312 to UTF-8."
(interactive)
(dired-mark-files-regexp "\\.c$") ; 标记所有 .txt 文件
(dired-map-over-marks
(lambda (file mark)
(let ((coding-system-for-read 'gb2312)
(coding-system-for-write 'utf-8))
(with-temp-buffer
(insert-file-contents file)
(set-buffer-file-coding-system coding-system-for-write)
(write-file file)))
nil)
nil))
;; 使用方法:
;; 1. 打开 Dired 模式,进入包含这些文件的目录。
;; 2. 运行 =M-x convert-gb2312-to-utf8= 。
可是 C 文件可以被正确标记文件,但是dired-map-over-marks没有被调用,不知道为啥?
如果文件多的话, 用elisp太慢了, 我比较喜欢调用python来处理这个事情
(defun my/recursively-convert-source-files-from-GBK-to-UTF-8 ()
"递归的把当前目录的源文件的编码从GBK转成UTF-8。"
(interactive)
(let* ((home-dir (getenv "HOME"))
(python-file (expand-file-name "~/.doom.d/python-tools/gbk2utf-8.py" home-dir))
(local-root (vc-git-root buffer-file-name))
(result nil))
(when (yes-or-no-p (format "Do you want to execute the command in the directory: %s?" local-root))
(setq default-directory local-root)
(let* ((command (list "python3" python-file))
(output-buffer (generate-new-buffer "*gbk2utf-8-output*"))
(exit-code (apply 'call-process (car command) nil output-buffer t (cdr command))))
(if (eq exit-code 0)
(progn
(setq result (with-current-buffer output-buffer
(buffer-string)))
(message "result: %s" result)
(kill-buffer output-buffer))
(message "Error running command. Check *gbk2utf-8-output* buffer for details.")
(switch-to-buffer output-buffer))))
result))
gbk2utf-8.py
import os
def getDirFiles(dir):
fileList = []
for ff in os.listdir(dir):
# 过滤隐藏文件夹
if ff.startswith('.'):
continue
filePath = os.path.join(dir, ff)
if os.path.isdir(filePath):
fileList.extend(getDirFiles(filePath))
else:
if ff.lower().endswith('.h') or ff.lower().endswith('.c') or ff.lower().endswith('.cpp'):
fileList.append(os.path.join(dir, ff))
return fileList
originEncodeList = ['gbk', 'Big5', 'iso-latin-1-dos']
if __name__ == '__main__':
objFiles = getDirFiles('.')
failList = []
for f in objFiles:
try:
with open(f, 'rb') as fpr:
buf = fpr.read().decode('utf-8')
# print('utf-8 encode ok')
#
# 繁体转换成简体
# from langconv import *
# if f.find('main.c') != -1:
# simple = Converter('zh-hans').convert(buf)
# with open(f+'_simple.c', 'w', encoding='utf-8') as fpw:
# fpw.write(simple)
#
# 本身是UTF-8 格式 不需要转换
continue
except:
pass
print(f)
for ec in originEncodeList:
try:
with open(f, 'r', encoding=ec) as fpr:
buf = fpr.read()
with open(f+'.utf8', 'w', encoding='utf-8') as fpw:
fpw.write(buf)
os.remove(f)
os.rename(f+'.utf8', f)
break
except:
if f not in failList:
failList.append(f)
# print('err f : %s encoding : %s'%(f, ec))
# if os.path.exists(f+'.utf8'):
# os.remove(f+'.utf8')
if failList:
print('====================================')
print('err files :')
for f in failList:
print(f)
print('====================================')
else:
print('All Done!')
这个功能最快的当然是vim, argdo命令太强大了