【求助】org-protocol 能否传递整个HTML页面？

noalias · 2024 年5 月 8 日 00:57

我在bookmarklet中，设置了document.body.outerHTML

javascript:location.href='org-protocol://capture-book-list?template=bw&url=%27+encodeURIComponent(location.href)+%27&title=%27+encodeURIComponent(document.title)+%27&body=%27+encodeURIComponent(document.body.outerHTML)+%27&note=%27+encodeURIComponent(window.getSelection())

但是Emacs端没反应。去掉encodeURIComponent(document.body.outerHTML)是可以正常解析出url、title等。

lld2001 · 2024 年5 月 8 日 01:28

找了下曾经用的一段脚本，是可以解析网页上选中的文本的。用了 shortmanager 插件（看了 Sacha Chua 一篇文章后网上搜集的，github中没找到）。

// ==UserScript==
// @ShortcutManager
// @name Capture link
// @namespace XPrUJhE4wRsC
// @key Alt+c
// @include *
// ==/UserScript==
var captureLink =function(){
  var uri = 'org-protocol:///capture-html?template=w&url=' + encodeURIComponent(location.href) + '&title=' + encodeURIComponent(document.title || "[untitled page]") + '&body=' + encodeURIComponent(function () {var html = ""; if (typeof window.getSelection != "undefined") {var sel = window.getSelection(); if (sel.rangeCount) {var container = document.createElement("div"); for (var i = 0, len = sel.rangeCount; i < len; ++i) {container.appendChild(sel.getRangeAt(i).cloneContents());} html = container.innerHTML;}} else if (typeof document.selection != "undefined") {if (document.selection.type == "Text") {html = document.selection.createRange().htmlText;}} var relToAbs = function (href) {var a = document.createElement("a"); a.href = href; var abs = a.protocol + "//" + a.host + a.pathname + a.search + a.hash; a.remove(); return abs;}; var elementTypes = [['a', 'href'], ['img', 'src']]; var div = document.createElement('div'); div.innerHTML = html; elementTypes.map(function(elementType) {var elements = div.getElementsByTagName(elementType[0]); for (var i = 0; i < elements.length; i++) {elements[i].setAttribute(elementType[1], relToAbs(elements[i].getAttribute(elementType[1])));}}); return div.innerHTML;}());
  window.location = uri;
  return uri;
};
captureLink();

还用了 org-protocol-capture-html。

Voleking · 2024 年5 月 8 日 02:57

感觉可以参考下 GitHub - alphapapa/org-protocol-capture-html: Capture HTML from the browser selection into Emacs as org-mode content 的最后一部分 js

encodeURIComponent(function() {
    var html = "";
    if (typeof document.getSelection != "undefined") {
        var sel = document.getSelection();
        if (sel.rangeCount) {
            var container = document.createElement("div");
            for (var i = 0, len = sel.rangeCount; i < len; ++i) {
                container.appendChild(sel.getRangeAt(i).cloneContents());
            }
            html = container.innerHTML;
        }
    } else if (typeof document.selection != "undefined") {
        if (document.selection.type == "Text") {
            html = document.selection.createRange().htmlText;
        }
    }
    var relToAbs = function(href) {
        var a = document.createElement("a");
        a.href = href;
        var abs = a.protocol + "//" + a.host + a.pathname + a.search + a.hash;
        a.remove();
        return abs;
    };
    var elementTypes = [
        ['a', 'href'],
        ['img', 'src']
    ];
    var div = document.createElement('div');
    div.innerHTML = html;
    elementTypes.map(function(elementType) {
        var elements = div.getElementsByTagName(elementType[0]);
        for (var i = 0; i < elements.length; i++) {
            elements[i].setAttribute(elementType[1], relToAbs(elements[i].getAttribute(elementType[1])));
        }
    });
    return div.innerHTML;
}());

noalias · 2024 年5 月 8 日 03:18

我现在只传递url，通过url-retrieve-synchronously获取HTML页面，来迂回解决😢。