利用 gptel tools 完成了 meow-mark-word 的中文分词

我利用 https://www.youtube.com/watch?v=2VoOoS4cEV0 中提到的 tools(属于订阅付费,因此 tools 的具体内容不在这里分享,主要分享个思路),有效的读取需要获取的 emacs 内置包/第三方包的代码,只喂给 AI 相关的代码,节省 token 的同时,还可以将范围扩展到整个包。

我自己尝试过阅读 emt 和 meow 的代码,希望能够把 meow-mark-word 函数利用 emt 扩写,兼容中文的情况,达到中文上按 w 选中光标下按照 emt 分词规则分词的中文词汇。自己能力有限一直未能完成。

借助 tools 我很快完成了两个包相关函数的对比,AI 帮我提取有用的部分后测试,完全可以达到我的预期。meow/EXPLANATION.org at master · meow-edit/meow · GitHub 这里提到的 case 也可以在扩展后的函数下操作。

(defun meow-mark-word-or-chinese (n)
  "Mark current word under cursor, handling both English and Chinese text.

This function uses EMT's segmentation for Chinese and default behavior for English.
The selection will be expandable with `meow-next-word' and `meow-back-word'.
The selected word will be added to `regexp-search-ring' and highlighted.

Use a negative argument to create a backward selection."
  (interactive "p")
  ;; Ensure that EMT is loaded
  (emt-ensure)
  (let* ((direction (if (< n 0) 'backward 'forward))
         (bounds (emt--get-bounds-at-point
                  (emt--move-by-word-decide-bounds-direction direction)))
         (beg (car bounds))
         (end (cdr bounds)))
    (if (eq beg end)
        ;; Use default Meow for English words
        (meow-mark-thing meow-word-thing 'word (< n 0) "\\<%s\\>")
      ;; Use EMT segmentation for Chinese
      (let* ((text (buffer-substring-no-properties beg end))
             (segments (append (emt-split text) nil))
             (pos (- (point) beg))
             (segment-bounds (car segments)))
        ;; Find the correct segment
        (dolist (bound segments)
          (when (and (>= pos (car bound)) (< pos (cdr bound)))
            (setq segment-bounds bound)))
        (when segment-bounds
          (let* ((seg-beg (+ beg (car segment-bounds)))
                 (seg-end (+ beg (cdr segment-bounds)))
                 (segment-text (buffer-substring-no-properties seg-beg seg-end))
                 (regexp (regexp-quote segment-text)))
            (let ((selection (meow--make-selection (cons 'expand 'word) seg-beg seg-end)))
              (meow--select selection (< n 0))
              (meow--push-search regexp)
              (meow--highlight-regexp-in-buffer regexp))))))))
8 个赞
(defun meow-mark-thing (thing type &optional backward regexp-format)
  "Make expandable selection of THING, with TYPE and forward/BACKWARD direction.

THING is a symbol usable by `forward-thing', which see.

TYPE is a symbol. Usual values are `word' or `line'.

The selection will be made in the \\='forward\\=' direction unless BACKWARD is
non-nil.

When REGEXP-FORMAT is non-nil and a string, the content of the selection will be
quoted to regexp, then pushed into `regexp-search-ring' which will be read by
`meow-search' and other commands. In this case, REGEXP-FORMAT is used as a
format-string to format the regexp-quoted selection content (which is passed as
a string to `format'). Further matches of this formatted search will be
highlighted in the buffer."
  (interactive "p")
  ;; Ensure that EMT is loaded
  (emt-ensure)
  (let* ((direction (if backward 'backward 'forward))
         (bounds (emt--get-bounds-at-point
                  (emt--move-by-word-decide-bounds-direction direction)))
         (beg (car bounds))
         (end (cdr bounds)))
    (if (eq beg end)
        ;; Default behavior for non-CJK text
        (let* ((bounds (bounds-of-thing-at-point thing))
               (beg (car bounds))
               (end (cdr bounds)))
          (when beg
            (thread-first
              (meow--make-selection (cons 'expand type) beg end)
              (meow--select backward))
            (when (stringp regexp-format)
              (let ((search (format regexp-format (regexp-quote (buffer-substring-no-properties beg end)))))
                (meow--push-search search)
                (meow--highlight-regexp-in-buffer search)))))
      ;; Use EMT segmentation for CJK text
      (let* ((text (buffer-substring-no-properties beg end))
             (segments (append (emt-split text) nil))
             (pos (- (point) beg))
             (segment-bounds (car segments)))
        ;; Find the correct segment
        (dolist (bound segments)
          (when (and (>= pos (car bound)) (< pos (cdr bound)))
            (setq segment-bounds bound)))
        (when segment-bounds
          (let* ((seg-beg (+ beg (car segment-bounds)))
                 (seg-end (+ beg (cdr segment-bounds)))
                 (segment-text (buffer-substring-no-properties seg-beg seg-end))
                 (regexp (regexp-quote segment-text)))
            (let ((selection (meow--make-selection (cons 'expand 'word) seg-beg seg-end)))
              (meow--select selection backward)
              (meow--push-search regexp)
              (meow--highlight-regexp-in-buffer regexp))))))))

(defun meow-next-thing (thing type n &optional include-syntax)
  "Create non-expandable selection of TYPE to the end of the next Nth THING.

If N is negative, select to the beginning of the previous Nth thing instead."
  (unless (equal type (cdr (meow--selection-type)))
    (meow--cancel-selection))
  (unless include-syntax
    (setq include-syntax
          (let ((thing-include-syntax
                 (or (alist-get thing meow-next-thing-include-syntax)
                     '("" ""))))
            (if (> n 0)
                (car thing-include-syntax)
              (cadr thing-include-syntax)))))
  (let* ((expand (equal (cons 'expand type) (meow--selection-type)))
         (_ (when expand
              (if (< n 0) (meow--direction-backward)
                (meow--direction-forward))))
         (new-type (if expand (cons 'expand type) (cons 'select type)))
         (m (point))
         (p (save-mark-and-excursion
              (if (and (fboundp 'emt--move-by-word) (looking-at-p "\\cc"))
                  ;; Use EMT for CJK words
                  (emt--move-by-word (if (> n 0) 'forward 'backward))
                ;; Fallback to original behavior
                (forward-thing thing n))
              (unless (= (point) m)
                (point)))))
    (when p
      (thread-first
        (meow--make-selection
         new-type
         (meow--fix-thing-selection-mark thing p m include-syntax)
         p
         expand)
        (meow--select))
      (meow--maybe-highlight-num-positions
       (cons (apply-partially #'meow--backward-thing-1 thing)
             (apply-partially #'meow--forward-thing-1 thing))))))

(defun meow--forward-thing-1 (thing)
  (let ((pos (point)))
    (if (and (fboundp 'emt--move-by-word) (looking-at-p "\\cc"))
        (emt--move-by-word 'forward)
      (forward-thing thing 1))
    (when (not (= pos (point)))
      (meow--hack-cursor-pos (point)))))

(defun meow--backward-thing-1 (thing)
  (let ((pos (point)))
    (if (and (fboundp 'emt--move-by-word) (looking-at-p "\\cc"))
        (emt--move-by-word 'backward)
      (forward-thing thing -1))
    (when (not (= pos (point)))
      (point))))

meow-mark-word 背后是meow-mark-thingmeow-next-word 背后是 meow-next-thing,加上修改了 nav-functions,现在 meow-next-wordmeow-back-word 也完美支持中文分词了(其实日文也可以)。

2 个赞