Paper Clip (Save HTML)

Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S

当前为 2023-05-10 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Paper Clip (Save HTML)
  3. // @description Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S
  4. // @author Schimon Jehudah, Adv.
  5. // @namespace i2p.schimon.paperclip
  6. // @homepageURL https://greasyfork.org/en/scripts/465960-paper-clip-save-html
  7. // @supportURL https://greasyfork.org/en/scripts/465960-paper-clip-save-html/feedback
  8. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  9. // @license MIT; https://opensource.org/licenses/MIT
  10. // @exclude devtools://*
  11. // @include *
  12. // @version 23.05.09
  13. // @run-at document-end
  14. // @icon 
  15. // ==/UserScript==
  16.  
  17. /* TODO
  18.  
  19. 1) Bookmarklet
  20.  
  21. 2) jsPDF /parallax/jsPDF
  22.  
  23. */
  24.  
  25. // Check whether HTML; otherwise, exit.
  26. //if (!document.contentType == 'text/html')
  27. if (document.doctype == null) return;
  28.  
  29. var
  30. originalBackground, originalColor,
  31. originalDisplay, originalOutline;
  32.  
  33. const time = new Date();
  34. const namespace = 'org.openuserjs.sjehuda.paperclip';
  35.  
  36. // FIXME set hotkey
  37. document.onkeyup = function(e) {
  38. //if (e.ctrlKey && e.shiftKey && e.which == 49) { // Ctrl + Shift + 1
  39. if (e.metaKey && e.shiftKey && e.which == 83) { // Command + Shift + S
  40. console.info('Saving selection to HTML.')
  41. createPage();
  42. }
  43. };
  44.  
  45. // event listener
  46. // event "click" and "mouseup" are the most sensible, albeit not accurate
  47. // event "mousemove" is the most manipulative (per user), yet (almost) the most accurate
  48. // event "select" seem to work only inside element input
  49. window.addEventListener('click',event => {
  50. //document.addEventListener('click',event => {
  51. let selection = document.getSelection();
  52. let btn = document.getElementById(namespace);
  53. if (!btn && selection.toString().length) {
  54. btn = createButton(event.pageX, event.pageY);
  55. document.body.append(btn);
  56. } else
  57. if (btn && !selection.toString().length) {
  58. btn.remove();
  59. }
  60. }, {passive: true});
  61.  
  62. // TODO declare variables once
  63. // NOTE consider "mousedown"
  64. // NOTE consider moving this functionality into function createButton()
  65. window.addEventListener('mousemove',function(){
  66. let selection = document.getSelection();
  67. let btn = document.getElementById(namespace);
  68. if (btn && !selection.toString().length) {
  69. btn.remove();
  70. }
  71. });
  72.  
  73. function createButton(x, y) {
  74. // create element
  75. let btn = document.createElement(namespace);
  76. // set content
  77. btn.id = namespace;
  78. btn.textContent = '📎'; // 🖇️ 💾
  79. // set position
  80. btn.style.position = 'absolute';
  81. btn.style.left = x + 5 + 'px';
  82. btn.style.top = y + 'px';
  83. // set appearance
  84. btn.style.fontFamily = 'none'; // emoji
  85. btn.style.background = 'repeating-linear-gradient(45deg, black, transparent 100px)'; // black, cornflowerblue, grey, rosybrown
  86. btn.style.border = 'ridge';
  87. btn.style.borderColor = 'rosybrown';
  88. btn.style.borderRadius = '50%';
  89. btn.style.padding = '3px';
  90. //btn.style.marginTop = '100px';
  91. //btn.style.marginLeft = '10px';
  92. btn.style.minWidth = '30px';
  93. btn.style.minHeight = '30px';
  94. //btn.style.width = '10px';
  95. //btn.style.height = '10px';
  96. btn.style.fontSize = '20px';
  97. btn.style.zIndex = 10000;
  98. btn.style.opacity = 0.7;
  99. btn.onmouseover = () => {
  100. drawBorder();
  101. btn.style.opacity = 1;
  102. };
  103. btn.onmouseleave = () => { // onmouseout
  104. resetStyle();
  105. btn.style.opacity = 0.7;
  106. };
  107. // center character
  108. btn.style.justifyContent = 'center';
  109. btn.style.alignItems = 'center';
  110. btn.style.display = 'flex';
  111. // disable selection marks
  112. btn.style.outline = 'white'; // none
  113. btn.style.userSelect = 'none';
  114. btn.style.cursor = 'default';
  115. // set button behaviour
  116. btn.onclick = () => {
  117. resetStyle();
  118. createPage();
  119. };
  120. return btn;
  121. }
  122.  
  123. function drawBorder() {
  124. let sel = getSelectedText();
  125. originalColor = sel.style.color;
  126. originalOutline = sel.style.outline;
  127. originalBackground = sel.style.background;
  128. // Draw border around input without affecting style, layout or spacing
  129. // https://overflow.adminforge.de/questions/29990319/draw-border-around-input-without-affecting-style-layout-or-spacing
  130. //sel.style.outline = '3px solid';
  131. //sel.style.background = 'lightgoldenrodyellow';
  132. //sel.style.outline = '3px dashed';
  133. //sel.style.background = 'rgba(250,250,210,0.3)';
  134. //sel.style.outline = '3px double darkblue';
  135. //sel.style.background = 'rgba(210,250,250,0.8)';
  136. sel.style.outline = '2px double rosybrown';
  137. //sel.style.background = 'rgba(250,250,210,0.7)';
  138. sel.style.background = 'rgb(250 250 210)';
  139. sel.style.color = 'black'; // DarkRed
  140. }
  141.  
  142. // TODO remove attribute 'style' of first element after 'body'
  143. // FIXME
  144. // http://gothicrichard.synthasite.com/what-i-fond-on-the-net.php
  145. // https://darknetdiaries.com/episode/65/
  146. function resetStyle() {
  147. let sel = getSelectedText();
  148. sel.style.color = originalColor;
  149. sel.style.outline = originalOutline;
  150. sel.style.background = originalBackground;
  151. }
  152.  
  153. function createPage() {
  154.  
  155. var template, domParser, data, meta;
  156. template = '<!DOCTYPE html>';
  157. domParser = new DOMParser();
  158. data = domParser.parseFromString(template, 'text/html');
  159.  
  160. // set title
  161. if (document.title.length > 0) {
  162. data.title = document.title;
  163. }
  164.  
  165. // set base
  166. base = data.createElement('base');
  167. base.href = data.head.baseURI; // location.href;
  168. data.head.append(base);
  169.  
  170. const metaTag = [
  171. 'url',
  172. 'date',
  173. 'creator',
  174. 'user-agent',
  175. //'connection-type',
  176. 'content-type-sourced',
  177. 'charset-sourced'
  178. //'character-count'
  179. //'word-count'
  180. ];
  181.  
  182. const metaValue = [
  183. location.href,
  184. time,
  185. namespace,
  186. navigator.userAgent,
  187. //navigator.connection.effectiveType,
  188. document.contentType,
  189. document.charset
  190. ];
  191.  
  192. for (let i = 0; i < metaTag.length; i++) {
  193. meta = document.createElement('meta');
  194. meta.name = metaTag[i];
  195. meta.content = metaValue[i];
  196. data.head.append(meta);
  197. }
  198.  
  199. const metaData = [
  200. //'content-type',
  201. 'viewport',
  202. 'description',
  203. 'keywords',
  204. 'generator'
  205. ];
  206.  
  207. for (let i = 0; i < metaData.length; i++) {
  208.  
  209. meta = document.createElement('meta');
  210. meta.name = metaData[i] + '-imported';
  211.  
  212. try {
  213. meta.content = document.querySelector('meta[name="' + metaData[i] + '" i]')
  214. // .querySelector('meta[http-equiv="' + metaData[i] + '" i]')
  215. .content;
  216. }
  217. catch(err) {
  218. console.warn(metaData[i] + ': Not found.');
  219. continue;
  220. }
  221.  
  222. data.head.append(meta);
  223. }
  224.  
  225. data.body.innerHTML = getSelectedText().outerHTML;
  226. data = listMediaElements(data);
  227. data = removeAttributes(data);
  228. data = removeMediaElements(data);
  229. //data = replaceMediaByLinks(data);
  230. data = correctLinks(data);
  231. data = removeEmptyElements(data);
  232. data = removeCommentNodes(data);
  233. data = new XMLSerializer().serializeToString(data);
  234. //data = formatPage(data);
  235. //data = minify(data);
  236. //data = removeComments(data);
  237. data = removeMultipleWhiteSpace(data);
  238. savePage(data,createFilename());
  239.  
  240. }
  241.  
  242. function replaceMediaByLinks(data) {
  243. for (const imgElement of data.querySelectorAll('img')) {
  244. // Create a new <a> element
  245. const aElement = data.createElement('a');
  246. aElement.setAttribute.href = imgElement.src;
  247.  
  248. // Copy the attributes and contents of the <img> element to the new <a> element
  249. for (let i = 0, l = imgElement.attributes.length; i < l; i++) {
  250. const name = imgElement.attributes.item(i).name;
  251. const value = imgElement.attributes.item(i).value;
  252. aElement.setAttribute(name, value);
  253. }
  254. aElement.textContent = imgElement.src;
  255.  
  256. // Replace the <img> element with the new <a> element
  257. imgElement.parentNode.replaceChild(aElement, imgElement);
  258. }
  259. return data;
  260. }
  261.  
  262. function listMediaElements(data) {
  263.  
  264. const elements = [
  265. 'audio', 'embed', 'img', 'video',
  266. 'frame', 'frameset', 'iframe',
  267. ];
  268.  
  269. for (let i = 0; i < elements.length; i++) {
  270. for (const element of data.querySelectorAll(elements[i])) {
  271. const attributes = ['src', 'data-img-url'];
  272. for (const attribute of attributes) {
  273. if (element.getAttribute(attribute)) {
  274. meta = data.createElement('meta');
  275. meta.name = `extracted-media-${elements[i]}`;
  276. meta.content = element.getAttribute(attribute);
  277. data.head.append(meta);
  278. }
  279. }
  280. }
  281. }
  282. return data;
  283. }
  284.  
  285. function removeMediaElements(data) {
  286. // TODO Remove span and preserve its contents
  287. // Movespan content to its parent element/node
  288. // https://overflow.lunar.icu/questions/9848465/js-remove-a-tag-without-deleting-content
  289. // Remove graphics, media and scripts
  290.  
  291. // TODO Replace "iframe" by "a href"
  292.  
  293. const elements = [
  294. 'audio', 'embed', 'img', 'video', 'button',
  295. 'form', 'frame', 'frameset', 'iframe', 'textarea',
  296. 'svg', 'input', 'path',
  297. 'script', 'style',
  298. 'select',
  299. ];
  300.  
  301. for (let i = 0; i < elements.length; i++) {
  302. for (const element of data.querySelectorAll(elements[i])) {
  303. element.remove();
  304. }
  305. }
  306.  
  307. return data;
  308. }
  309.  
  310. // Remove all attributes
  311. function removeAttributes(data) {
  312. // https://stackoverflow.com/questions/1870441/remove-all-attributes
  313. const removeAttributes = (element) => {
  314. for (let i = 0; i < element.attributes.length; i++) {
  315. if (element.attributes[i].name != 'href' &&
  316. element.attributes[i].name != 'name' &&
  317. element.attributes[i].name != 'id') {
  318. element.removeAttribute(element.attributes[i].name);
  319. }
  320. }
  321. };
  322.  
  323. for (const element of data.querySelectorAll('body *')) {
  324. removeAttributes(element);
  325. }
  326.  
  327. return data;
  328. }
  329.  
  330. // Correct links for offline usage
  331. function correctLinks(data) {
  332. for (const element of data.querySelectorAll('a')) {
  333. //if (element.hash) {
  334. //if (element.hostname + element.pathname == location.hostname + location.pathname) {
  335. if (element.href.startsWith(element.baseURI + '#')) {
  336. element.href = element.hash;
  337. }
  338. }
  339. return data;
  340. }
  341.  
  342. function removeEmptyElements (data) {
  343. for (const element of data.body.querySelectorAll('*')) {
  344. if (/^\s*$/.test(element.outerText)) {
  345. element.remove();
  346. }
  347. }
  348. return data;
  349. }
  350.  
  351. function removeCommentNodes(data) {
  352. const nodeIterator = data.createNodeIterator(
  353. data, // Starting node, usually the document body
  354. NodeFilter.SHOW_ALL, // NodeFilter to show all node types
  355. null,
  356. false
  357. );
  358.  
  359. let currentNode;
  360. // Loop through each node in the node iterator
  361. while (currentNode = nodeIterator.nextNode()) {
  362. if (currentNode.nodeName == '#comment') {
  363. currentNode.remove();
  364. console.log(currentNode.nodeName);
  365. }
  366. }
  367. return data;
  368. }
  369.  
  370. function removeComments(str) {
  371. return str.replace(/<!--[\s\S]*?-->/g, '');
  372. }
  373.  
  374. function removeMultipleWhiteSpace(str) {
  375. //return str.replace(/\s+/g, ' ');
  376. //return str.replace(/(?<!<code>)\s+(?![^<]*<\/code>)/g, " ");
  377. return str.replace(/(<(code|pre|code-[^\s]+)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  378. if (p1) { // if the match is a code block
  379. return p1; // return the complete code block as is
  380. } else { // if the match is whitespace outside of a code block
  381. return " "; // replace with a single space
  382. }
  383. });
  384. }
  385.  
  386. // Get parent element of beginning (and end) of selected text
  387. // https://stackoverflow.com/questions/32515175/get-parent-element-of-beginning-and-end-of-selected-text
  388. function getSelectedText() {
  389. var selection = document.getSelection();
  390. var selectionBegin = selection.anchorNode.parentNode;
  391. var selectionEnd = selection.focusNode.parentNode;
  392. var selectionCommon =
  393. findFirstCommonAncestor
  394. (
  395. selectionBegin,
  396. selectionEnd
  397. );
  398. return selectionCommon;
  399. }
  400.  
  401. // find common parent
  402. // https://stackoverflow.com/questions/2453742/whats-the-best-way-to-find-the-first-common-parent-of-two-dom-nodes-in-javascri
  403. function findFirstCommonAncestor(nodeA, nodeB) {
  404. let range = new Range();
  405. range.setStart(nodeA, 0);
  406. range.setEnd(nodeB, 0);
  407. // There's a compilication, if nodeA is positioned after
  408. // nodeB in the document, we created a collapsed range.
  409. // That means the start and end of the range are at the
  410. // same position. In that case `range.commonAncestorContainer`
  411. // would likely just be `nodeB.parentNode`.
  412. if(range.collapsed) {
  413. // The old switcheroo does the trick.
  414. range.setStart(nodeB, 0);
  415. range.setEnd(nodeA, 0);
  416. }
  417. return range.commonAncestorContainer;
  418. }
  419.  
  420. // minify html
  421. // /questions/23284784/javascript-minify-html-regex
  422. // TODO Don't apply on code/pre
  423. function minify( s ){
  424. return s ? s
  425. .replace(/\>[\r\n ]+\</g, "><") // Removes new lines and irrelevant spaces which might affect layout, and are better gone
  426. .replace(/(<.*?>)|\s+/g, (m, $1) => $1 ? $1 : ' ')
  427. .trim()
  428. : "";
  429. }
  430.  
  431. // format html
  432. // /questions/3913355/how-to-format-tidy-beautify-in-javascript
  433. // TODO Don't inset span in code/pre
  434. function formatPage(html) {
  435. var tab = '\t';
  436. var result = '';
  437. var indent= '';
  438.  
  439. html.split(/>\s*</).forEach(function(element) {
  440.  
  441. if (element.match( /^\/\w/ )) {
  442. indent = indent.substring(tab.length);
  443. }
  444.  
  445. result += indent + '<' + element + '>\r\n';
  446.  
  447. if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input") ) {
  448. indent += tab;
  449. }
  450.  
  451. });
  452.  
  453. return result.substring(1, result.length-3);
  454.  
  455. }
  456.  
  457. function createFilename() {
  458.  
  459. let day, now, timestamp, title, filename;
  460.  
  461. day = time
  462. .toISOString()
  463. .split('T')[0];
  464.  
  465. now = [
  466. time.getHours(),
  467. time.getMinutes(),
  468. time.getSeconds()
  469. ];
  470.  
  471. for (let i = 0; i < now.length; i++) {
  472. if (now[i] < 10) {now[i] = '0' + now[i];}
  473. }
  474.  
  475. timestamp = [
  476. day,
  477. now.join('-')
  478. ];
  479.  
  480. /*
  481. address = [
  482. location.hostname,
  483. location.pathname.replace(/\//g,'_')
  484. ]
  485.  
  486. filename =
  487. address.join('') +
  488. '_' +
  489. timestamp.join('_') +
  490. '.html';
  491. */
  492.  
  493. if (document.title) {
  494. title = document.title;
  495. } else {
  496. title = location.pathname.split('/');
  497. title = title[title.length-1];
  498. }
  499.  
  500. title = title.replace(/ /g, '_');
  501.  
  502. filename =
  503. title + // TODO replace whitespace by underscore
  504. '_' +
  505. timestamp.join('_') +
  506. '.html';
  507.  
  508. return filename.toLowerCase();
  509.  
  510. }
  511.  
  512. // export file
  513. // https://stackoverflow.com/questions/4545311/download-a-file-by-jquery-ajax
  514. // https://stackoverflow.com/questions/43135852/javascript-export-to-text-file
  515. var savePage = (function () {
  516. var a = document.createElement("a");
  517. // document.body.appendChild(a);
  518. // a.style = "display: none";
  519. return function (data, fileName) {
  520. var blob = new Blob([data], {type: "text/html"}),
  521. url = window.URL.createObjectURL(blob);
  522. a.href = url;
  523. a.download = fileName;
  524. a.click();
  525. window.URL.revokeObjectURL(url);
  526. };
  527. }());