Paper Clip (Save HTML)

Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S

目前為 2023-05-11 提交的版本,檢視 最新版本

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript== 
// @name        Paper Clip (Save HTML)
// @description Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S
// @author      Schimon Jehudah, Adv.
// @namespace   i2p.schimon.paperclip
// @homepageURL https://greasyfork.org/en/scripts/465960-paper-clip-save-html
// @supportURL  https://greasyfork.org/en/scripts/465960-paper-clip-save-html/feedback
// @copyright   2023, Schimon Jehudah (http://schimon.i2p)
// @license     MIT; https://opensource.org/licenses/MIT
// @exclude     devtools://*
// @include     *
// @version     23.05.09
// @run-at      document-end
// @icon        
// ==/UserScript==

/* TODO

1) Bookmarklet

2) jsPDF /parallax/jsPDF

*/

// Check whether HTML; otherwise, exit.
//if (!document.contentType == 'text/html')
if (document.doctype == null) return;

var
  originalBackground, originalColor,
  originalDisplay, originalOutline;

const time = new Date();
const namespace = 'i2p.schimon.paperclip';

// FIXME set hotkey
document.onkeyup = function(e) {
  //if (e.ctrlKey && e.shiftKey && e.which == 49) { // Ctrl + Shift + 1
  if (e.metaKey && e.shiftKey && e.which == 83) { // Command + Shift + S
    console.info('Saving selection to HTML.')
    createPage();
  }
};

// event listener
// event "click" and "mouseup" are the most sensible, albeit not accurate
// event "mousemove" is the most manipulative (per user), yet (almost) the most accurate
// event "select" seem to work only inside element input
window.addEventListener('click',event => {
//document.addEventListener('click',event => {
  let selection = document.getSelection();
  let btn = document.getElementById(namespace);
  if (!btn && selection.toString().length) {
    btn = createButton(event.pageX, event.pageY);
    document.body.append(btn);
  } else
  if (btn && !selection.toString().length) {
    btn.remove();
  }
}, {passive: true});

// TODO declare variables once
// NOTE consider "mousedown"
// NOTE consider moving this functionality into function createButton()
window.addEventListener('mousemove',function(){
  let selection = document.getSelection();
  let btn = document.getElementById(namespace);
  if (btn && !selection.toString().length) {
    btn.remove();
  }
});

function createButton(x, y) {
  // create element
  let btn = document.createElement(namespace);
  // set content
  btn.id = namespace;
  btn.textContent = '📎'; // 🖇️ 💾
  // set position
  btn.style.position = 'absolute';
  btn.style.left = x + 5 + 'px';
  btn.style.top = y + 'px';
  // set appearance
  btn.style.fontFamily = 'none'; // emoji
  btn.style.background = 'repeating-linear-gradient(45deg, black, transparent 100px)'; // black, cornflowerblue, grey, rosybrown
  btn.style.border = 'ridge';
  btn.style.borderColor = 'rosybrown';
  btn.style.borderRadius = '50%';
  btn.style.padding = '3px';
  //btn.style.marginTop = '100px';
  //btn.style.marginLeft = '10px';
  btn.style.minWidth = '30px';
  btn.style.minHeight = '30px';
  //btn.style.width = '10px';
  //btn.style.height = '10px';
  btn.style.fontSize = '20px';
  btn.style.zIndex = 10000;
  btn.style.opacity = 0.7;
  btn.onmouseover = () => {
    drawBorder();
    btn.style.opacity = 1;
    };
  btn.onmouseleave = () => { // onmouseout
    resetStyle();
    btn.style.opacity = 0.7;
  };
  // center character
  btn.style.justifyContent = 'center';
  btn.style.alignItems = 'center';
  btn.style.display = 'flex';
  // disable selection marks
  btn.style.outline = 'white'; // none
  btn.style.userSelect = 'none';
  btn.style.cursor = 'default';
  // set button behaviour
  btn.onclick = () => {
    resetStyle();
    createPage();
  };
  return btn;
}

function drawBorder() {
  let sel = getSelectedText();
  originalColor = sel.style.color;
  originalOutline = sel.style.outline;
  originalBackground = sel.style.background;
  // Draw border around input without affecting style, layout or spacing
  // https://overflow.adminforge.de/questions/29990319/draw-border-around-input-without-affecting-style-layout-or-spacing
  //sel.style.outline = '3px solid';
  //sel.style.background = 'lightgoldenrodyellow';
  //sel.style.outline = '3px dashed';
  //sel.style.background = 'rgba(250,250,210,0.3)';
  //sel.style.outline = '3px double darkblue';
  //sel.style.background = 'rgba(210,250,250,0.8)';
  sel.style.outline = '2px double rosybrown';
  //sel.style.background = 'rgba(250,250,210,0.7)';
  sel.style.background = 'rgb(250 250 210)';
  sel.style.color = 'black'; // DarkRed
}

// TODO remove attribute 'style' of first element after 'body'
// FIXME
// http://gothicrichard.synthasite.com/what-i-fond-on-the-net.php
// https://darknetdiaries.com/episode/65/
function resetStyle() {
  let sel = getSelectedText();
  sel.style.color = originalColor;
  sel.style.outline = originalOutline;
  sel.style.background = originalBackground;
}

function createPage() {

  var template, domParser, data, meta;
  template = '<!DOCTYPE html>';
  domParser = new DOMParser();
  data = domParser.parseFromString(template, 'text/html');

  // set title
  if (document.title.length > 0) {
    data.title = document.title;
  }

  // set base
  base = data.createElement('base');
  base.href = data.head.baseURI; // location.href;
  data.head.append(base);

  const metaTag = [
    'url',
    'date',
    'creator',
    'user-agent',
    //'connection-type',
    'content-type-sourced',
    'charset-sourced'
    //'character-count'
    //'word-count'
  ];

  const metaValue = [
    location.href,
    time,
    namespace,
    navigator.userAgent,
    //navigator.connection.effectiveType,
    document.contentType,
    document.charset
  ];

  for (let i = 0; i < metaTag.length; i++) {
    meta = document.createElement('meta');
    meta.name = metaTag[i];
    meta.content = metaValue[i];
    data.head.append(meta);
  }

  const metaData = [
    //'content-type',
    'viewport',
    'description',
    'keywords',
    'generator'
  ];

  for (let i = 0; i < metaData.length; i++) {

    meta = document.createElement('meta');
    meta.name = metaData[i] + '-imported';

       try {
         meta.content = document.querySelector('meta[name="' + metaData[i] + '" i]')
           // .querySelector('meta[http-equiv="' + metaData[i] + '" i]')
           .content;
       }
       catch(err) {
         console.warn(metaData[i] + ': Not found.');
         continue;
       }

    data.head.append(meta);
  }

  data.body.innerHTML = getSelectedText().outerHTML;
  data = listMediaElements(data);
  data = removeAttributes(data);
  data = removeMediaElements(data);
//data = replaceMediaByLinks(data);
  data = correctLinks(data);
  data = removeEmptyElements(data);
  data = removeCommentNodes(data);
  data = new XMLSerializer().serializeToString(data);
//data = formatPage(data);
//data = minify(data);
//data = removeComments(data);
  data = removeMultipleWhiteSpace(data);
  savePage(data,createFilename());

}

function replaceMediaByLinks(data) {
  for (const imgElement of data.querySelectorAll('img')) {
    // Create a new <a> element
    const aElement = data.createElement('a');
    aElement.setAttribute.href = imgElement.src;

    // Copy the attributes and contents of the <img> element to the new <a> element
    for (let i = 0, l = imgElement.attributes.length; i < l; i++) {
      const name = imgElement.attributes.item(i).name;
      const value = imgElement.attributes.item(i).value;
      aElement.setAttribute(name, value);
    }
    aElement.textContent = imgElement.src;

    // Replace the <img> element with the new <a> element
    imgElement.parentNode.replaceChild(aElement, imgElement);
  }
  return data;
}

function listMediaElements(data) {

  const elements = [
    'audio', 'embed', 'img', 'video',
    'frame', 'frameset', 'iframe',
  ];

  for (let i = 0; i < elements.length; i++) {
    for (const element of data.querySelectorAll(elements[i])) {
      const attributes = ['src', 'data-img-url'];
      for (const attribute of attributes) {
        if (element.getAttribute(attribute)) {
          meta = data.createElement('meta');
          meta.name = `extracted-media-${elements[i]}`;
          meta.content = element.getAttribute(attribute);
          data.head.append(meta);
        }
      }
    }
  }
  return data;
}

function removeMediaElements(data) {
  // TODO Remove span and preserve its contents
  // Movespan content to its parent element/node
  // https://overflow.lunar.icu/questions/9848465/js-remove-a-tag-without-deleting-content
  // Remove graphics, media and scripts

  // TODO Replace "iframe" by "a href"

  const elements = [
    'audio', 'embed', 'img', 'video', 'button',
    'form', 'frame', 'frameset', 'iframe', 'textarea',
    'svg', 'input', 'path',
    'script', 'style',
    'select',
  ];

  for (let i = 0; i < elements.length; i++) {
    for (const element of data.querySelectorAll(elements[i])) {
      element.remove();
    }
  }

  return data;
}

// Remove all attributes
function removeAttributes(data) {
  // https://stackoverflow.com/questions/1870441/remove-all-attributes
  const removeAttributes = (element) => {
    for (let i = 0; i < element.attributes.length; i++) {
      if (element.attributes[i].name != 'href' &&
          element.attributes[i].name != 'name' &&
          element.attributes[i].name != 'id') {
        element.removeAttribute(element.attributes[i].name);
      }
    }
  };

  for (const element of data.querySelectorAll('body *')) {
    removeAttributes(element);
  }

  return data;
}

// Correct links for offline usage
function correctLinks(data) {
  for (const element of data.querySelectorAll('a')) {
    //if (element.hash) {
    //if (element.hostname + element.pathname == location.hostname + location.pathname) {
    if (element.href.startsWith(element.baseURI + '#')) {
      element.href = element.hash;
    }
  }
  return data;
}

function removeEmptyElements (data) {
  for (const element of data.body.querySelectorAll('*')) {
    if (/^\s*$/.test(element.outerText)) {
      element.remove();
    }
  }
  return data;
}

function removeCommentNodes(data) {
  const nodeIterator = data.createNodeIterator(
    data,  // Starting node, usually the document body
    NodeFilter.SHOW_ALL,  // NodeFilter to show all node types
    null,  
    false  
  );

  let currentNode;
  // Loop through each node in the node iterator
  while (currentNode = nodeIterator.nextNode()) {
    if (currentNode.nodeName == '#comment') {
      currentNode.remove();
      console.log(currentNode.nodeName);
    }
  }
  return data;
}

function removeComments(str) {
  return str.replace(/<!--[\s\S]*?-->/g, '');
}

function removeMultipleWhiteSpace(str) {
  //return str.replace(/\s+/g, ' ');
  //return str.replace(/(?<!<code>)\s+(?![^<]*<\/code>)/g, " ");
  return str.replace(/(<(code|pre|code-[^\s]+)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  if (p1) { // if the match is a code block
    return p1; // return the complete code block as is
  } else { // if the match is whitespace outside of a code block
    return " "; // replace with a single space
  }
});
}

// Get parent element of beginning (and end) of selected text
// https://stackoverflow.com/questions/32515175/get-parent-element-of-beginning-and-end-of-selected-text
function getSelectedText() {
  var selection = document.getSelection();
  var selectionBegin = selection.anchorNode.parentNode;
  var selectionEnd = selection.focusNode.parentNode;
  var selectionCommon =
    findFirstCommonAncestor
    (
      selectionBegin,
      selectionEnd
    );
  return selectionCommon;
}

// find common parent
// https://stackoverflow.com/questions/2453742/whats-the-best-way-to-find-the-first-common-parent-of-two-dom-nodes-in-javascri
function findFirstCommonAncestor(nodeA, nodeB) {
  let range = new Range();
  range.setStart(nodeA, 0);
  range.setEnd(nodeB, 0);
  // There's a compilication, if nodeA is positioned after
  // nodeB in the document, we created a collapsed range.
  // That means the start and end of the range are at the
  // same position. In that case `range.commonAncestorContainer`
  // would likely just be `nodeB.parentNode`.
  if(range.collapsed) {
    // The old switcheroo does the trick.
    range.setStart(nodeB, 0);
    range.setEnd(nodeA, 0);
  }
  return range.commonAncestorContainer;
}

// minify html
// /questions/23284784/javascript-minify-html-regex
// TODO Don't apply on code/pre
function minify( s ){
  return s ? s
    .replace(/\>[\r\n ]+\</g, "><")  // Removes new lines and irrelevant spaces which might affect layout, and are better gone
    .replace(/(<.*?>)|\s+/g, (m, $1) => $1 ? $1 : ' ')
    .trim()
    : "";
}

// format html
// /questions/3913355/how-to-format-tidy-beautify-in-javascript
// TODO Don't inset span in code/pre
function formatPage(html) {
  var tab = '\t';
  var result = '';
  var indent= '';

  html.split(/>\s*</).forEach(function(element) {

    if (element.match( /^\/\w/ )) {
        indent = indent.substring(tab.length);
    }

    result += indent + '<' + element + '>\r\n';

    if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input")  ) { 
      indent += tab;              
    }

  });

  return result.substring(1, result.length-3);

}

function createFilename() {

  let day, now, timestamp, title, filename;

  day = time
    .toISOString()
    .split('T')[0];

  now = [
    time.getHours(),
    time.getMinutes(),
    time.getSeconds()
  ];

  for (let i = 0; i < now.length; i++) { 
    if (now[i] < 10) {now[i] = '0' + now[i];}
  }

  timestamp = [
    day,
    now.join('-')
  ];

/*
  address = [
    location.hostname,
    location.pathname.replace(/\//g,'_')
  ]

  filename =
    address.join('') +
    '_' +
    timestamp.join('_') +
    '.html';
*/

  if (document.title) {
    title = document.title;
  } else {
    title = location.pathname.split('/');
    title = title[title.length-1];
  }

  title = title.replace(/ /g, '_');

  filename =
    title + // TODO replace whitespace by underscore
    '_' +
    timestamp.join('_') +
    '.html';

  return filename.toLowerCase();

}

// export file
// https://stackoverflow.com/questions/4545311/download-a-file-by-jquery-ajax
// https://stackoverflow.com/questions/43135852/javascript-export-to-text-file
var savePage = (function () {
  var a = document.createElement("a");
  // document.body.appendChild(a);
  // a.style = "display: none";
  return function (data, fileName) {
    var blob = new Blob([data], {type: "text/html"}),
        url = window.URL.createObjectURL(blob);
    a.href = url;
    a.download = fileName;
    a.click();
    window.URL.revokeObjectURL(url);
  };
}());