Paper Clip (Save pages as XHTML/Markdown/Text)

Edit and save selection as clean XHTML, Markdown or Text file optimized for printing. Hotkey: Command + Shift + S.

  1. // ==UserScript==
  2. // @name Paper Clip (Save pages as XHTML/Markdown/Text)
  3. // @description Edit and save selection as clean XHTML, Markdown or Text file optimized for printing. Hotkey: Command + Shift + S.
  4. // @author Schimon Jehudah, Adv.
  5. // @namespace i2p.schimon.paperclip
  6. // @homepageURL https://greasyfork.org/scripts/465960-paper-clip
  7. // @supportURL https://greasyfork.org/scripts/465960-paper-clip/feedback
  8. // @copyright 2023 - 2025, Schimon Jehudah (http://schimon.i2p)
  9. // @license MIT; https://opensource.org/licenses/MIT
  10. // @require https://unpkg.com/turndown/dist/turndown.js
  11. // @exclude devtools://*
  12. // @match file:///*
  13. // @match *://*/*
  14. // @version 25.01.24
  15. // @run-at document-end
  16. // @icon data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj48dGV4dCB5PSIuOWVtIiBmb250LXNpemU9IjkwIj7wn5OOPC90ZXh0Pjwvc3ZnPgo=
  17. // ==/UserScript==
  18.  
  19. /*
  20.  
  21. TODO
  22.  
  23. 0) ePUB and HTMLZ
  24.  
  25. 1) Display preview
  26.  
  27. 2) Bookmarklet
  28.  
  29. 3) jsPDF /parallax/jsPDF
  30.  
  31. 4) Remove style="background-image: url('https://torrentfreak.com/images/canada-featured.jpg');"
  32. https://torrentfreak.com/movie-companies-cannot-use-piracy-notice-scheme-to-facilitate-class-action-230629/
  33. 5) Save description too for MD and TXT. example: restoreprivacy.com
  34.  
  35. 6) Footnotes for TXT
  36. * Collect all hyperlinks (array)
  37. * Scan for text
  38. NO
  39. AGAIN
  40. * For each hyperlink detected, enter an pointer of some sort
  41. NO
  42. AGAIN
  43. * Detect hyperlink
  44. * Place links into an array
  45. * Replace each hyperlink by text and a number (i.e. [1])
  46. * Replace hyperlink
  47. * Append link to array
  48. * Extract all links from array as footnotes
  49.  
  50. FIXME
  51.  
  52. 1) https://vision.gel.ulaval.ca/~klein/duke3d/
  53.  
  54. 2) Replace element-img enclosed in element-a by href of element-a
  55. NO
  56. Better use place these as footnotes
  57.  
  58. 3) Rense.com & epubbooks.com
  59.  
  60. 4) Replace element img with attribute alt to element span
  61. https://datawrapper.dwcdn.net/qBzPR/6/
  62.  
  63. 5) Code (pre) is realized as one line questions/72294299/multiple-top-level-packages-discovered-in-a-flat-layout
  64.  
  65. */
  66.  
  67. // Check whether HTML; otherwise, exit.
  68. //if (!document.contentType == 'text/html')
  69. //if (document.doctype == null) return;
  70.  
  71. var
  72. originalBackground, originalColor,
  73. originalDisplay, originalOutline;
  74.  
  75. const time = new Date();
  76. const namespace = 'i2p.schimon.paperclip';
  77.  
  78. // FIXME set hotkey
  79. document.onkeyup = function(e) {
  80. //if (e.ctrlKey && e.shiftKey && e.which == 49) { // Ctrl + Shift + 1
  81. if (e.metaKey && e.shiftKey && e.which == 83) { // Command + Shift + S
  82. console.info('Saving selection to HTML.')
  83. generateXHTML();
  84. }
  85. };
  86.  
  87. // TODO https://community.arm.com/support-forums/f/architectures-and-processors-forum/5814/what-is-difference-between-arm7-and-arm-cortex-m-series/13573#13573
  88. // event listener
  89. // event "click" and "mouseup" are the most sensible, albeit not accurate
  90. // event "mousemove" is the most manipulative (per user), yet (almost) the most accurate
  91. // event "select" seem to work only inside element input
  92. window.addEventListener('click',event => {
  93. //document.addEventListener('click',event => {
  94. let selection = document.getSelection();
  95. let btn = document.getElementById(namespace);
  96. if (!btn && selection.toString().length) {
  97. btn = createButton(event.pageX, event.pageY);
  98. // TODO Move "append"s to a function
  99. btn.append(actionButton('close'));
  100. btn.append(actionButton('save'));
  101. btn.append(actionButton('highlight'));
  102. btn.append(actionButton('edit'));
  103. btn.append(actionButton('send'));
  104. document.body.append(btn);
  105. } else
  106. if (btn && !selection.toString().length) {
  107. btn.remove();
  108. }
  109. }, {passive: true});
  110.  
  111. // TODO declare variables once
  112. // NOTE consider "mousedown"
  113. // NOTE consider moving this functionality into function createButton()
  114. window.addEventListener('mousemove',function(){
  115. let selection = document.getSelection();
  116. let btn = document.getElementById(namespace);
  117. if (btn && !selection.toString().length) {
  118. btn.remove();
  119. }
  120. });
  121.  
  122. function createButton(x, y) {
  123. // create element
  124. let btn = document.createElement(namespace);
  125. // set content
  126. btn.id = namespace;
  127. // btn.textContent = '📎'; // 🖇️ 💾
  128. // set position
  129. btn.style.all = 'unset';
  130. btn.style.position = 'absolute';
  131. btn.style.left = x + 5 + 'px';
  132. btn.style.top = y + 'px';
  133. // set appearance
  134. btn.style.direction = 'ltr';
  135. btn.style.fontFamily = 'system-ui'; // cursive sans-serif emoji
  136. btn.style.background = 'black'; // cornflowerblue, grey, rosybrown
  137. btn.style.border = 'thin solid white';
  138. //btn.style.borderWidth = 'thin';
  139. //btn.style.border = 'solid'; // ridge
  140. //btn.style.borderColor = 'darkred';
  141. btn.style.borderRadius = '3px';
  142. btn.style.padding = '1%';
  143. //btn.style.marginTop = '100px';
  144. //btn.style.marginLeft = '10px';
  145. btn.style.minWidth = '30px';
  146. btn.style.minHeight = '30px';
  147. //btn.style.width = '10px';
  148. //btn.style.height = '10px';
  149. //btn.style.fontSize = '20px';
  150. btn.style.zIndex = 10000;
  151. btn.style.opacity = 0.7;
  152. // center character
  153. btn.style.justifyContent = 'center';
  154. btn.style.alignItems = 'center';
  155. btn.style.display = 'flex';
  156. // disable selection marks
  157. btn.style.outline = 'white'; // none
  158. btn.style.userSelect = 'none';
  159. btn.style.cursor = 'default';
  160. btn.onmouseleave = () => {btn.style.opacity = 0.27;};
  161. btn.onmouseover = () => {btn.style.opacity = 1;};
  162. return btn;
  163. }
  164.  
  165. function actionButton(type) {
  166. let content = getSelectedText().outerText; // textContent
  167. content = content.replace(/%0D%0A%0D%0A/g, " ");
  168. content = removeMultipleWhiteSpace(content);
  169. let item = document.createElement('span');
  170. item.id = `${namespace}-${type}`;
  171. //item.style.borderRadius = '50%';
  172. item.style.outline = 'none';
  173. item.style.padding = '3px';
  174. item.style.margin = '3px';
  175. //item.style.fontSize = '10px';
  176. item.style.fontWeight = 'bold';
  177. item.style.color = 'white';
  178. item.onmouseleave = () => {resetStyle();};
  179. switch (type) {
  180. case 'back':
  181. item.textContent = '<';
  182. item.onclick = () => {
  183. item.parentElement.replaceChildren(
  184. actionButton('close'),
  185. actionButton('save'),
  186. actionButton('highlight'),
  187. actionButton('edit'),
  188. actionButton('send')
  189. )
  190. };
  191. break;
  192. case 'close':
  193. item.textContent = 'x';
  194. item.title = 'Double-click to close';
  195. item.ondblclick = () => {item.parentElement.remove();};
  196. break;
  197. case 'delete':
  198. item.textContent = 'Delete';
  199. item.title = 'Double-click to delete content';
  200. item.ondblclick = () => {getSelectedText().remove();};
  201. item.onmouseenter = () => {drawBorder('darkred', 'rgb(255 182 182)', '2px dashed hotpink');};
  202. break;
  203. case 'edit':
  204. item.textContent = 'Edit';
  205. //item.style.cursor = 'context-menu';
  206. item.onclick = () => {
  207. item.parentElement.replaceChildren(
  208. actionButton('back'),
  209. actionButton('delete'),
  210. actionButton('editable')
  211. )
  212. };
  213. break;
  214. case 'editable':
  215. item.onmouseenter = () => {drawBorder('darkblue', 'rgb(200 182 255)', '2px solid blue');};
  216. if (getSelectedText().contentEditable == 'true') {
  217. item.textContent = 'Stop Editing';
  218. item.title = 'Turn off edit mode';
  219. } else {
  220. item.textContent = 'Start Editing';
  221. item.title = 'Turn on edit mode';
  222. }
  223. item.onclick = () => {
  224. let texts = toggleEditeMode();
  225. item.textContent = texts[0];
  226. item.title = texts[1];
  227. }
  228. break;
  229. case 'email':
  230. item.textContent = 'Email';
  231. item.title = 'Send via Email as reference';
  232. item.onclick = () => {window.location = `mailto:?subject=Content on ${location.hostname}&body=${document.title}%0D%0A%0D%0A${content}%0D%0A%0D%0A${location.hostname}${location.pathname}`};
  233. break;
  234. case 'highlight':
  235. item.textContent = 'Mark';
  236. item.title = 'Highlight text';
  237. //item.onclick = () => {highlightSelection('black', 'khaki');};
  238. item.onclick = () => {drawBorder('black', 'khaki', 'unset');};
  239. break;
  240. case 'irc':
  241. item.textContent = 'IRC';
  242. item.title = 'Send via IRC as reference';
  243. item.onclick = () => {alert('This button will be supported in next update')};
  244. break;
  245. case 'markdown':
  246. item.textContent = 'Markdown';
  247. item.title = 'Save to Markdown';
  248. item.onclick = () => {generateMD();}; //TODO URL reference to source URL
  249. break;
  250. case 'text':
  251. item.textContent = 'Text';
  252. item.title = 'Save to Plain Text';
  253. item.onclick = () => {generateTXT();};
  254. break;
  255. case 'xhtml':
  256. item.textContent = 'HTML';
  257. item.title = 'Save to HTML (valid XHTML)';
  258. item.onclick = () => {generateXHTML();};
  259. break;
  260. case 'xmpp':
  261. item.textContent = 'XMPP';
  262. item.title = 'Send via Jabber as reference';
  263. item.onclick = () => {window.location = `xmpp:?subject=Content on ${location.hostname}&body=${document.title}%0D%0A%0D%0A${content}%0D%0A%0D%0A${location.hostname}${location.pathname}`};
  264. break;
  265. case 'save':
  266. item.textContent = 'Save';
  267. //item.style.cursor = 'context-menu';
  268. item.onmouseenter = () => {drawBorder('black', 'rgb(250 250 210)', '2px double rosybrown');};
  269. item.onclick = () => {
  270. item.parentElement.replaceChildren(
  271. actionButton('back'),
  272. actionButton('xhtml'),
  273. actionButton('markdown'),
  274. actionButton('text')
  275. )
  276. };
  277. break;
  278. case 'send':
  279. item.textContent = 'Send';
  280. //item.style.cursor = 'context-menu';
  281. item.onclick = () => {
  282. item.parentElement.replaceChildren(
  283. actionButton('back'),
  284. actionButton('email'),
  285. actionButton('irc'),
  286. actionButton('xmpp')
  287. )
  288. };
  289. break;
  290. }
  291. return item;
  292. }
  293.  
  294. function toggleEditeMode() {
  295. let texts;
  296. if (getSelectedText().contentEditable == 'true') {
  297. getSelectedText().contentEditable = 'false';
  298. texts = ['Continue Editing', 'Edit content'];
  299. }
  300. else {
  301. getSelectedText().contentEditable = 'true';
  302. texts = ['Stop Editing', 'Turn off edit mode'];
  303. }
  304. return texts;
  305. }
  306.  
  307. // FIXME
  308. function highlightSelection(color, background) {
  309. let sel = document.getSelection();
  310. originalColor = sel.style.color;
  311. originalBackground = sel.style.background;
  312. //sel.style.background = 'rgb(250 250 210)';
  313. sel.style.background = background;
  314. //sel.style.color = 'black'; // DarkRed
  315. sel.style.color = color;
  316. }
  317.  
  318. function drawBorder(color, background, outline) {
  319. //function drawBorder(color, background, outline, method) {
  320. let sel = getSelectedText();
  321. //if (method == 'exactly') {
  322. // let sel = getSelectedText();
  323. //} else {
  324. // let sel = document.getSelection();
  325. //}
  326. originalColor = sel.style.color;
  327. originalOutline = sel.style.outline;
  328. originalBackground = sel.style.background;
  329. // Draw border around input without affecting style, layout or spacing
  330. // /questions/29990319/draw-border-around-input-without-affecting-style-layout-or-spacing
  331. //sel.style.outline = '3px solid';
  332. //sel.style.background = 'lightgoldenrodyellow';
  333. //sel.style.outline = '3px dashed';
  334. //sel.style.background = 'rgba(250,250,210,0.3)';
  335. //sel.style.outline = '3px double darkblue';
  336. //sel.style.background = 'rgba(210,250,250,0.8)';
  337. sel.style.outline = '2px double rosybrown';
  338. sel.style.outline = outline;
  339. //sel.style.background = 'rgba(250,250,210,0.7)';
  340. sel.style.background = 'rgb(250 250 210)';
  341. sel.style.background = background;
  342. sel.style.color = 'black'; // DarkRed
  343. sel.style.color = color;
  344. }
  345.  
  346. // TODO remove attribute 'style' of first element after 'body'
  347. // FIXME
  348. // http://gothicrichard.synthasite.com/what-i-fond-on-the-net.php
  349. // https://darknetdiaries.com/episode/65/
  350. function resetStyle() {
  351. let sel = getSelectedText();
  352. sel.style.color = originalColor;
  353. sel.style.outline = originalOutline;
  354. sel.style.background = originalBackground;
  355. }
  356.  
  357. function generateTXT() {
  358. let data = getSelectedText().outerText;
  359. data = `${data}
  360.  
  361. Created: ${time.toDateString()} ${time.toLocaleTimeString()}
  362. Source: ${location.href}
  363. Title: ${document.title}
  364.  
  365. Document generated using Paper Clip
  366. https://greasyfork.org/en/scripts/465960-paper-clip
  367. Save selected content into clean HTML, Markdown or Text
  368. `;
  369. savePage(
  370. data,
  371. createFilename('txt'),
  372. "text/plain"
  373. );
  374. }
  375.  
  376. function generateMD() {
  377. let domParser = new DOMParser();
  378. let data = domParser.parseFromString('', 'text/html'); // Falkon: TrustedHTML
  379. data.body.innerHTML = getSelectedText().outerHTML;
  380. console.log(data)
  381. let elementsToRemove = [
  382. 'button', 'form', 'frame', 'frameset', 'iframe', 'textarea',
  383. //'svg', 'input', 'path',
  384. 'script', 'style',
  385. 'select'];
  386. data = removeMediaElements(data, elementsToRemove);
  387. let turndownService = new TurndownService();
  388. data = turndownService.turndown(data);
  389.  
  390. let title;
  391. if (document.title) {
  392. title = document.title;
  393. } else {
  394. title = location.pathname.split('/');
  395. title = title[title.length-1];
  396. //title = location.pathname.split('/');
  397. //title = title[title.length-1];
  398. }
  399.  
  400. data = `# [${title}](${location.href})
  401. ${getDescription()}
  402. ${data}
  403.  
  404. ---
  405.  
  406. This page was saved at ${time.toDateString()} ${time.toLocaleTimeString()} from [${location.hostname}](${location.href})
  407. using [Paper Clip](https://greasyfork.org/en/scripts/465960-paper-clip) and converted into Markdown with [Turndown](https://mixmark-io.github.io/turndown/)
  408. `;
  409. console.log(data)
  410. savePage(
  411. data,
  412. createFilename('md'),
  413. "text/plain"
  414. );
  415. }
  416.  
  417. function generateXHTML() {
  418. let domParser = new DOMParser();
  419. let data = domParser.parseFromString('', 'text/html'); // Falkon: TrustedHTML
  420. // set title
  421. if (document.title.length > 0) {
  422. data.title = document.title;
  423. }
  424. // set base
  425. // NOTE do not "set base".
  426. // TODO Complete links of ./ and / etc. by fetching complete
  427. // url and replace href with it (it = complete url)
  428. base = data.createElement('base');
  429. base.href = data.head.baseURI; // location.href;
  430. data.head.append(base);
  431. const metaTag = [
  432. 'url',
  433. 'date',
  434. 'creator',
  435. 'user-agent',
  436. //'connection-type',
  437. 'content-type-sourced',
  438. 'charset-sourced'
  439. //'character-count'
  440. //'word-count'
  441. ];
  442. const metaValue = [
  443. location.href,
  444. time,
  445. namespace,
  446. navigator.userAgent,
  447. //navigator.connection.effectiveType,
  448. document.contentType,
  449. document.charset
  450. ];
  451. for (let i = 0; i < metaTag.length; i++) {
  452. let meta = document.createElement('meta');
  453. meta.name = metaTag[i];
  454. meta.content = metaValue[i];
  455. data.head.append(meta);
  456. }
  457. const metaData = [
  458. //'content-type',
  459. 'viewport',
  460. 'description',
  461. 'keywords',
  462. 'generator'
  463. ];
  464. for (let i = 0; i < metaData.length; i++) {
  465. let meta = document.createElement('meta');
  466. meta.name = metaData[i] + '-imported';
  467. try {
  468. meta.content = document.querySelector('meta[name="' + metaData[i] + '" i]')
  469. // .querySelector('meta[http-equiv="' + metaData[i] + '" i]')
  470. .content;
  471. }
  472. catch(err) {
  473. console.warn(metaData[i] + ': Not found.');
  474. continue;
  475. }
  476. data.head.append(meta);
  477. }
  478. if (document.dir == 'rtl') {
  479. data.dir = 'rtl';
  480. }
  481. data.body.innerHTML = getSelectedText().outerHTML;
  482. data = listMediaElements(data);
  483. let elementsToRemove = [
  484. 'audio', 'embed', 'img', 'video', 'button',
  485. 'form', 'frame', 'frameset', 'iframe', 'textarea',
  486. 'svg', 'input', 'path',
  487. 'script', 'style',
  488. 'select'];
  489. data = removeMediaElements(data, elementsToRemove);
  490. data = removeAttributes(data);
  491. //data = replaceMediaByLinks(data);
  492. data = correctLinks(data);
  493. data = removeEmptyElements(data);
  494. data = removeCommentNodes(data);
  495. //data = removeWhitespaceFromNodes(data, ['code', 'pre']);
  496. //data = replaceCodeAndPre(data);
  497. //data = setStylesheet(data);
  498. data = new XMLSerializer().serializeToString(data);
  499. //data = formatPage(data);
  500. //data = minify(data);
  501. //data = removeComments(data);
  502. data = removeMultipleWhiteSpace(data);
  503. savePage(
  504. data,
  505. createFilename('xhtml'),
  506. "text/html"
  507. );
  508. }
  509.  
  510. // FIXME
  511. // body::-webkit-scrollbar{width:10.666666666666666px;height:10.666666666666666px;}
  512. function setStylesheet(node) {
  513. let cssStylesheet = document.createElement('style');
  514. document.head.append(cssStylesheet);
  515. cssStylesheet.type = 'text/css';
  516. if (node.querySelector('code') ||
  517. node.querySelector('pre')) {
  518. cssStylesheet.textContent = 'code, pre {overflow: auto; display: grid; max-width: 100vw;}';
  519. }
  520. return node;
  521. }
  522.  
  523. // TODO Place plain text inside elements <code> <pre> (eliminate <span>, <br> etc.)
  524. // TODO Eliminate all elements without changing original text layout
  525. function replaceCodeAndPre(node) { // correctCodeElements
  526. const codeElements = node.getElementsByTagName('code');
  527. const preElements = node.getElementsByTagName('pre');
  528. // Replace content of all code elements with their own outerText
  529. for (let i = 0; i < codeElements.length; i++) {
  530. const element = codeElements[i];
  531. element.outerText = element.outerText;
  532. }
  533. // Replace content of all pre elements with their own outerText
  534. for (let i = 0; i < preElements.length; i++) {
  535. const element = preElements[i];
  536. element.outerText = element.outerText;
  537. }
  538. return node;
  539. }
  540.  
  541. function replaceMediaByLinks(node) {
  542. for (const imgElement of node.querySelectorAll('img')) {
  543. // Create a new <a> element
  544. const aElement = node.createElement('a');
  545. aElement.setAttribute.href = imgElement.src;
  546. // Copy the attributes and contents of the <img> element to the new <a> element
  547. for (let i = 0, l = imgElement.attributes.length; i < l; i++) {
  548. const name = imgElement.attributes.item(i).name;
  549. const value = imgElement.attributes.item(i).value;
  550. aElement.setAttribute(name, value);
  551. }
  552. aElement.textContent = imgElement.src;
  553. // Replace the <img> element with the new <a> element
  554. imgElement.parentNode.replaceChild(aElement, imgElement);
  555. }
  556. return node;
  557. }
  558.  
  559. // TODO
  560. // Catch all elements with attribute
  561. // contains URL, and
  562. // starts with / (add hostname), and
  563. // contains / (add hostname with first parent path), and
  564. // validate using URL API
  565. function listMediaElements(node) {
  566. let unique = []
  567. const elements = [
  568. 'audio', 'embed', 'img', 'svg', 'video',
  569. 'frame', 'frameset', 'iframe', '[data-source]',
  570. ];
  571. // Find element and add its URL as metadata
  572. for (let i = 0; i < elements.length; i++) {
  573. for (const element of node.querySelectorAll(elements[i])) {
  574. const attributes = ['src', 'data-img-url', 'data-source'];
  575. for (const attribute of attributes) {
  576. if (element.getAttribute(attribute) &&
  577. !unique.includes(element.getAttribute(attribute))) {
  578. let attr = element.getAttribute(attribute)
  579. unique.push(attr)
  580. let meta = node.createElement('meta');
  581. meta.name = `extracted-media-${element.nodeName.toLowerCase()}`; // Was ${elements[i]}
  582. meta.content = attr;
  583. node.head.append(meta);
  584. }
  585. }
  586. }
  587. }
  588. return node;
  589. }
  590.  
  591. //Remove graphics, media and scripts
  592. function removeMediaElements(node, elements) {
  593. /*
  594. TODO Remove span and preserve its contents
  595. Movespan content to its parent element/node
  596. /questions/9848465/js-remove-a-tag-without-deleting-content
  597.  
  598. FIXME Couldn't remove "iframe" for
  599. https://www.dailymail.co.uk/health/article-3460321/How-Big-Pharma-greed-killing-tens-thousands-world-Patients-medicated-given-profitable-drugs-little-proven-benefits-leading-doctors-warn.html
  600. */
  601. media = ['audio', 'embed', 'frame', 'frameset', 'iframe', 'img', 'video'];
  602. for (let i = 0; i < elements.length; i++) {
  603. for (const element of node.querySelectorAll(elements[i])) {
  604. if (media.includes(elements[i])) {
  605. let ele = node.createElement('a');
  606. //ele.textContent = `Click to view removed HTML <${elements[i]}> media element.`.toUpperCase();
  607.  
  608. //ele.textContent = `[HIDDEN MEDIA]`;
  609. //ele.title = `${elements[i]} media element`;
  610. //ele.href = element.getAttribute('src');
  611.  
  612. //let ele = node.createElement('pre')
  613. //let ele = node.createElement('code')
  614. //ele.textContent = `${element.getAttribute('src')}`;
  615. //ele.title = 'Hidden media';
  616. element.parentNode.insertBefore(ele, element.nextSibling); // insertAfter
  617. }
  618. element.remove();
  619. }
  620. }
  621. return node;
  622. }
  623.  
  624. // Remove all attributes
  625. function removeAttributes(node) {
  626. // /questions/1870441/remove-all-attributes
  627. const removeAttributes = (element) => {
  628. for (let i = 0; i < element.attributes.length; i++) {
  629. if (element.attributes[i].name != 'href' &&
  630. element.attributes[i].name != 'name' &&
  631. element.attributes[i].name != 'id') {
  632. element.removeAttribute(element.attributes[i].name);
  633. }
  634. }
  635. };
  636. for (const element of node.querySelectorAll('body *')) {
  637. removeAttributes(element);
  638. }
  639. return node;
  640. }
  641.  
  642. // Correct links for offline usage
  643. function correctLinks(node) {
  644. for (const element of node.querySelectorAll('a')) {
  645. //if (element.hash) {
  646. //if (element.hostname + element.pathname == location.hostname + location.pathname) {
  647. if (element.href.startsWith(element.baseURI + '#')) {
  648. element.href = element.hash;
  649. }
  650. }
  651. return node;
  652. }
  653.  
  654. function removeEmptyElements (node) {
  655. for (const element of node.body.querySelectorAll('*')) {
  656. //if (/^\s*$/.test(element.outerText)) {
  657. if (element.tagName.toLowerCase() != 'br' && /^\s*$/.test(element.textContent)) {
  658. element.remove();
  659. }
  660. }
  661. return node;
  662. }
  663.  
  664. function removeCommentNodes(node) {
  665. const nodeIterator = node.createNodeIterator(
  666. node, // Starting node, usually the document body
  667. NodeFilter.SHOW_ALL, // NodeFilter to show all node types
  668. null,
  669. false
  670. );
  671. let currentNode;
  672. // Loop through each node in the node iterator
  673. while (currentNode = nodeIterator.nextNode()) {
  674. if (currentNode.nodeName == '#comment') {
  675. currentNode.remove();
  676. console.log(currentNode.nodeName);
  677. }
  678. }
  679. return node;
  680. }
  681.  
  682. function removeComments(str) {
  683. return str.replace(/<!--[\s\S]*?-->/g, '');
  684. }
  685.  
  686. function removeWhitespaceFromNodes(node, excludedTags) {
  687. const removeWhitespace = (node) => {
  688. if (node.nodeType === Node.TEXT_NODE) {
  689. node.textContent = node.textContent.trim();
  690. } else if (
  691. node.nodeType === Node.ELEMENT_NODE &&
  692. !excludedTags.includes(node.tagName.toLowerCase())
  693. ) {
  694. for (let i = 0; i < node.childNodes.length; i++) {
  695. removeWhitespace(node.childNodes[i]);
  696. }
  697. }
  698. };
  699. removeWhitespace(node);
  700. return node;
  701. }
  702.  
  703. function removeMultipleWhiteSpace(str) {
  704. //return str.replace(/\s+/g, ' ');
  705. //return str.replace(/(?<!<code>)\s+(?![^<]*<\/code>)/g, " ");
  706. /*
  707. return str.replace(/(<(code|pre|code-[^\s]+)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  708. if (p1) { // if the match is a code block
  709. return p1; // return the complete code block as is
  710. } else { // if the match is whitespace outside of a code block
  711. return " "; // replace with a single space
  712. }
  713. });
  714. */
  715. return str.replace(/(<(code|pre)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  716. if (p1) { // if the match is a code block
  717. return p1; // return the complete code block as is
  718. } else { // if the match is whitespace outside of a code block
  719. return " "; // replace with a single space
  720. }
  721. });
  722. }
  723.  
  724. // Get parent element of beginning (and end) of selected text
  725. // /questions/32515175/get-parent-element-of-beginning-and-end-of-selected-text
  726. function getSelectedText() {
  727. var selection = document.getSelection();
  728. var selectionBegin = selection.anchorNode.parentNode;
  729. var selectionEnd = selection.focusNode.parentNode;
  730. var selectionCommon =
  731. findFirstCommonAncestor
  732. (
  733. selectionBegin,
  734. selectionEnd
  735. );
  736. return selectionCommon;
  737. }
  738.  
  739. // find common parent
  740. // /questions/2453742/whats-the-best-way-to-find-the-first-common-parent-of-two-dom-nodes-in-javascri
  741. function findFirstCommonAncestor(nodeA, nodeB) {
  742. let range = new Range();
  743. range.setStart(nodeA, 0);
  744. range.setEnd(nodeB, 0);
  745. // There's a compilication, if nodeA is positioned after
  746. // nodeB in the document, we created a collapsed range.
  747. // That means the start and end of the range are at the
  748. // same position. In that case `range.commonAncestorContainer`
  749. // would likely just be `nodeB.parentNode`.
  750. if(range.collapsed) {
  751. // The old switcheroo does the trick.
  752. range.setStart(nodeB, 0);
  753. range.setEnd(nodeA, 0);
  754. }
  755. return range.commonAncestorContainer;
  756. }
  757.  
  758. // minify html
  759. // /questions/23284784/javascript-minify-html-regex
  760. // TODO Don't apply on code/pre
  761. function minify( s ){
  762. return s ? s
  763. .replace(/\>[\r\n ]+\</g, "><") // Removes new lines and irrelevant spaces which might affect layout, and are better gone
  764. .replace(/(<.*?>)|\s+/g, (m, $1) => $1 ? $1 : ' ')
  765. .trim()
  766. : "";
  767. }
  768.  
  769. // format html
  770. // /questions/3913355/how-to-format-tidy-beautify-in-javascript
  771. // TODO Don't inset span in code/pre
  772. function formatPage(html) {
  773. var tab = '\t';
  774. var result = '';
  775. var indent= '';
  776. html.split(/>\s*</).forEach(function(element) {
  777. if (element.match( /^\/\w/ )) {
  778. indent = indent.substring(tab.length);
  779. }
  780. result += indent + '<' + element + '>\r\n';
  781. if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input") ) {
  782. indent += tab;
  783. }
  784. });
  785. return result.substring(1, result.length-3);
  786. }
  787.  
  788. function getDescription() {
  789. let desc;
  790. if (document.querySelector('meta[name=description]')) {
  791. desc = '## ' + document.querySelector('meta[name=description]').content + '\n';
  792. } else
  793. if (document.querySelector('meta[itemprop=description]')) {
  794. desc = '## ' + document.querySelector('meta[itemprop=description]').content + '\n';
  795. } else {
  796. desc = '';
  797. }
  798. return desc;
  799. }
  800.  
  801. function createFilename(extension) {
  802.  
  803. let day, now, timestamp, title, filename;
  804.  
  805. day = time
  806. .toISOString()
  807. .split('T')[0];
  808.  
  809. now = [
  810. time.getHours(),
  811. time.getMinutes(),
  812. time.getSeconds()
  813. ];
  814.  
  815. for (let i = 0; i < now.length; i++) {
  816. if (now[i] < 10) {now[i] = '0' + now[i];}
  817. }
  818.  
  819. timestamp = [
  820. day,
  821. now.join('-')
  822. ];
  823.  
  824. /*
  825. address = [
  826. location.hostname,
  827. location.pathname.replace(/\//g,'_')
  828. ]
  829.  
  830. filename =
  831. address.join('') +
  832. '_' +
  833. timestamp.join('_') +
  834. '.html';
  835. */
  836.  
  837. if (document.title) {
  838. title = document.title;
  839. } else {
  840. title = location.pathname.split('/');
  841. title = title[title.length-1];
  842. }
  843.  
  844. // TODO ‘ ’ ·
  845. title = title.replace(/[\/?<>\\:*|'"\.,]/g, '');
  846. title = title.replace(/ /g, '_');
  847. title = title.replace(/-/g, '_');
  848. title = title.replace(/__/g, '_');
  849.  
  850. filename =
  851. title + // TODO replace whitespace by underscore
  852. '_' +
  853. timestamp.join('_') +
  854. `.${extension}`;
  855.  
  856. return filename.toLowerCase();
  857.  
  858. }
  859.  
  860. // export file
  861. // /questions/4545311/download-a-file-by-jquery-ajax
  862. // /questions/43135852/javascript-export-to-text-file
  863. var savePage = (function () {
  864. var a = document.createElement("a");
  865. // document.body.appendChild(a);
  866. // a.style = "display: none";
  867. return function (fileData, fileName, fileType) {
  868. var blob = new Blob([fileData], {type: fileType}),
  869. url = window.URL.createObjectURL(blob);
  870. a.href = url;
  871. a.download = fileName;
  872. a.click();
  873. window.URL.revokeObjectURL(url);
  874. };
  875. }());