Paper Clip (Save as HTML, Markdown and Text)

Edit and save selection as clean HTML, Markdown or Text file optimized for printing. Hotkey: Command + Shift + S.

当前为 2024-01-29 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Paper Clip (Save as HTML, Markdown and Text)
  3. // @description Edit and save selection as clean HTML, Markdown or Text file optimized for printing. Hotkey: Command + Shift + S.
  4. // @author Schimon Jehudah, Adv.
  5. // @namespace i2p.schimon.paperclip
  6. // @homepageURL https://greasyfork.org/en/scripts/465960-paper-clip
  7. // @supportURL https://greasyfork.org/en/scripts/465960-paper-clip/feedback
  8. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  9. // @license MIT; https://opensource.org/licenses/MIT
  10. // @require https://unpkg.com/turndown/dist/turndown.js
  11. // @exclude devtools://*
  12. // @match file:///*
  13. // @match *://*/*
  14. // @version 24.01.29
  15. // @run-at document-end
  16. // @icon 
  17. // ==/UserScript==
  18.  
  19. /*
  20.  
  21. TODO
  22.  
  23. 0) ePUB and HTMLZ
  24.  
  25. 1) Display preview
  26.  
  27. 2) Bookmarklet
  28.  
  29. 3) jsPDF /parallax/jsPDF
  30.  
  31. 4) Remove style="background-image: url('https://torrentfreak.com/images/canada-featured.jpg');"
  32. https://torrentfreak.com/movie-companies-cannot-use-piracy-notice-scheme-to-facilitate-class-action-230629/
  33. 5) Save description too for MD and TXT. example: restoreprivacy.com
  34.  
  35. 6) Footnotes for TXT
  36. * Collect all hyperlinks (array)
  37. * Scan for text
  38. NO
  39. AGAIN
  40. * For each hyperlink detected, enter an pointer of some sort
  41. NO
  42. AGAIN
  43. * Detect hyperlink
  44. * Place links into an array
  45. * Replace each hyperlink by text and a number (i.e. [1])
  46. * Replace hyperlink
  47. * Append link to array
  48. * Extract all links from array as footnotes
  49.  
  50. FIXME
  51.  
  52. 1) https://vision.gel.ulaval.ca/~klein/duke3d/
  53.  
  54. 2) Replace element-img enclosed in element-a by href of element-a
  55. NO
  56. Better use place these as footnotes
  57.  
  58. 3) Rense.com & epubbooks.com
  59.  
  60. 4) Replace element img with attribute alt to element span
  61. https://datawrapper.dwcdn.net/qBzPR/6/
  62.  
  63. */
  64.  
  65. // Check whether HTML; otherwise, exit.
  66. //if (!document.contentType == 'text/html')
  67. if (document.doctype == null) return;
  68.  
  69. var
  70. originalBackground, originalColor,
  71. originalDisplay, originalOutline;
  72.  
  73. const time = new Date();
  74. const namespace = 'i2p.schimon.paperclip';
  75.  
  76. // FIXME set hotkey
  77. document.onkeyup = function(e) {
  78. //if (e.ctrlKey && e.shiftKey && e.which == 49) { // Ctrl + Shift + 1
  79. if (e.metaKey && e.shiftKey && e.which == 83) { // Command + Shift + S
  80. console.info('Saving selection to HTML.')
  81. generateXHTML();
  82. }
  83. };
  84.  
  85. // TODO https://community.arm.com/support-forums/f/architectures-and-processors-forum/5814/what-is-difference-between-arm7-and-arm-cortex-m-series/13573#13573
  86. // event listener
  87. // event "click" and "mouseup" are the most sensible, albeit not accurate
  88. // event "mousemove" is the most manipulative (per user), yet (almost) the most accurate
  89. // event "select" seem to work only inside element input
  90. window.addEventListener('click',event => {
  91. //document.addEventListener('click',event => {
  92. let selection = document.getSelection();
  93. let btn = document.getElementById(namespace);
  94. if (!btn && selection.toString().length) {
  95. btn = createButton(event.pageX, event.pageY);
  96. // TODO Move "append"s to a function
  97. btn.append(actionButton('close'));
  98. btn.append(actionButton('save'));
  99. btn.append(actionButton('edit'));
  100. btn.append(actionButton('send'));
  101. document.body.append(btn);
  102. } else
  103. if (btn && !selection.toString().length) {
  104. btn.remove();
  105. }
  106. }, {passive: true});
  107.  
  108. // TODO declare variables once
  109. // NOTE consider "mousedown"
  110. // NOTE consider moving this functionality into function createButton()
  111. window.addEventListener('mousemove',function(){
  112. let selection = document.getSelection();
  113. let btn = document.getElementById(namespace);
  114. if (btn && !selection.toString().length) {
  115. btn.remove();
  116. }
  117. });
  118.  
  119. function createButton(x, y) {
  120. // create element
  121. let btn = document.createElement(namespace);
  122. // set content
  123. btn.id = namespace;
  124. // btn.textContent = '📎'; // 🖇️ 💾
  125. // set position
  126. btn.style.position = 'absolute';
  127. //btn.style.left = x + 5 + 'px';
  128. //btn.style.top = y + 'px';
  129. btn.style.left = x + 'px';
  130. btn.style.top = y - 50 + 'px';
  131. // set appearance
  132. btn.style.direction = 'ltr';
  133. btn.style.fontFamily = 'system-ui'; // cursive sans-serif emoji
  134. btn.style.background = 'black'; // cornflowerblue, grey, rosybrown
  135. btn.style.border = 'thin solid white';
  136. //btn.style.borderWidth = 'thin';
  137. //btn.style.border = 'solid'; // ridge
  138. //btn.style.borderColor = 'darkred';
  139. btn.style.borderRadius = '3px';
  140. btn.style.padding = '1%';
  141. //btn.style.marginTop = '100px';
  142. //btn.style.marginLeft = '10px';
  143. btn.style.minWidth = '30px';
  144. btn.style.minHeight = '30px';
  145. //btn.style.width = '10px';
  146. //btn.style.height = '10px';
  147. //btn.style.fontSize = '20px';
  148. btn.style.zIndex = 10000;
  149. btn.style.opacity = 0.7;
  150. // center character
  151. btn.style.justifyContent = 'center';
  152. btn.style.alignItems = 'center';
  153. btn.style.display = 'flex';
  154. // disable selection marks
  155. btn.style.outline = 'white'; // none
  156. btn.style.userSelect = 'none';
  157. btn.style.cursor = 'default';
  158. btn.onmouseleave = () => {btn.style.opacity = 0.27;};
  159. btn.onmouseover = () => {btn.style.opacity = 1;};
  160. return btn;
  161. }
  162.  
  163. function actionButton(type) {
  164. let content = getSelectedText().outerText; // textContent
  165. content = content.replace(/%0D%0A%0D%0A/g, " ");
  166. content = removeMultipleWhiteSpace(content);
  167. let item = document.createElement('span');
  168. item.id = `${namespace}-${type}`;
  169. //item.style.borderRadius = '50%';
  170. item.style.outline = 'none';
  171. item.style.padding = '3px';
  172. item.style.margin = '3px';
  173. //item.style.fontSize = '10px';
  174. item.style.fontWeight = 'bold';
  175. item.style.color = 'white';
  176. item.onmouseleave = () => {resetStyle();};
  177. switch (type) {
  178. case 'back':
  179. item.textContent = '<';
  180. item.onclick = () => {
  181. item.parentElement.replaceChildren(
  182. actionButton('close'),
  183. actionButton('save'),
  184. actionButton('edit'),
  185. actionButton('send')
  186. )
  187. };
  188. break;
  189. case 'close':
  190. item.textContent = 'x';
  191. item.title = 'Double-click to close';
  192. item.ondblclick = () => {item.parentElement.remove();};
  193. break;
  194. case 'delete':
  195. item.textContent = 'Delete';
  196. item.title = 'Double-click to delete content';
  197. item.ondblclick = () => {getSelectedText().remove();};
  198. item.onmouseenter = () => {drawBorder('darkred', 'rgb(255 182 182)', '2px dashed hotpink');};
  199. break;
  200. case 'edit':
  201. item.textContent = 'Edit';
  202. //item.style.cursor = 'context-menu';
  203. item.onclick = () => {
  204. item.parentElement.replaceChildren(
  205. actionButton('back'),
  206. actionButton('delete'),
  207. actionButton('editable')
  208. )
  209. };
  210. break;
  211. case 'editable':
  212. item.onmouseenter = () => {drawBorder('darkblue', 'rgb(200 182 255)', '2px solid blue');};
  213. if (getSelectedText().contentEditable == 'true') {
  214. item.textContent = 'Stop Editing';
  215. item.title = 'Turn off edit mode';
  216. } else {
  217. item.textContent = 'Start Editing';
  218. item.title = 'Turn on edit mode';
  219. }
  220. item.onclick = () => {
  221. let texts = toggleEditeMode();
  222. item.textContent = texts[0];
  223. item.title = texts[1];
  224. }
  225. break;
  226. case 'email':
  227. item.textContent = 'Email';
  228. item.title = 'Send via Email as reference';
  229. item.onclick = () => {window.location = `mailto:?subject=Content on ${location.hostname}&body=${document.title}%0D%0A%0D%0A${content}%0D%0A%0D%0A${location.hostname}${location.pathname}`};
  230. break;
  231. case 'irc':
  232. item.textContent = 'IRC';
  233. item.title = 'Send via IRC as reference';
  234. item.onclick = () => {alert('This button will be supported in next update')};
  235. break;
  236. case 'markdown':
  237. item.textContent = 'Markdown';
  238. item.title = 'Save to Markdown';
  239. item.onclick = () => {generateMD();}; //TODO URL reference to source URL
  240. break;
  241. case 'text':
  242. item.textContent = 'Text';
  243. item.title = 'Save to Plain Text';
  244. item.onclick = () => {generateTXT();};
  245. break;
  246. case 'xhtml':
  247. item.textContent = 'HTML';
  248. item.title = 'Save to HTML (valid XHTML)';
  249. item.onclick = () => {generateXHTML();};
  250. break;
  251. case 'xmpp':
  252. item.textContent = 'Jabber';
  253. item.title = 'Send via XMPP as reference';
  254. item.onclick = () => {window.location = `xmpp:?subject=Content on ${location.hostname}&body=${document.title}%0D%0A%0D%0A${content}%0D%0A%0D%0A${location.hostname}${location.pathname}`};
  255. break;
  256. case 'save':
  257. item.textContent = 'Save';
  258. //item.style.cursor = 'context-menu';
  259. item.onmouseenter = () => {drawBorder('black', 'rgb(250 250 210)', '2px double rosybrown');};
  260. item.onclick = () => {
  261. item.parentElement.replaceChildren(
  262. actionButton('back'),
  263. actionButton('xhtml'),
  264. actionButton('markdown'),
  265. actionButton('text')
  266. )
  267. };
  268. break;
  269. case 'send':
  270. item.textContent = 'Send';
  271. //item.style.cursor = 'context-menu';
  272. item.onclick = () => {
  273. item.parentElement.replaceChildren(
  274. actionButton('back'),
  275. actionButton('email'),
  276. actionButton('irc'),
  277. actionButton('xmpp')
  278. )
  279. };
  280. break;
  281. }
  282. return item;
  283. }
  284.  
  285. function toggleEditeMode() {
  286. let texts;
  287. if (getSelectedText().contentEditable == 'true') {
  288. getSelectedText().contentEditable = 'false';
  289. texts = ['Continue Editing', 'Edit content'];
  290. }
  291. else {
  292. getSelectedText().contentEditable = 'true';
  293. texts = ['Stop Editing', 'Turn off edit mode'];
  294. }
  295. return texts;
  296. }
  297.  
  298. function drawBorder(color, background, outline) {
  299. let sel = getSelectedText();
  300. originalColor = sel.style.color;
  301. originalOutline = sel.style.outline;
  302. originalBackground = sel.style.background;
  303. // Draw border around input without affecting style, layout or spacing
  304. // /questions/29990319/draw-border-around-input-without-affecting-style-layout-or-spacing
  305. //sel.style.outline = '3px solid';
  306. //sel.style.background = 'lightgoldenrodyellow';
  307. //sel.style.outline = '3px dashed';
  308. //sel.style.background = 'rgba(250,250,210,0.3)';
  309. //sel.style.outline = '3px double darkblue';
  310. //sel.style.background = 'rgba(210,250,250,0.8)';
  311. sel.style.outline = '2px double rosybrown';
  312. sel.style.outline = outline;
  313. //sel.style.background = 'rgba(250,250,210,0.7)';
  314. sel.style.background = 'rgb(250 250 210)';
  315. sel.style.background = background;
  316. sel.style.color = 'black'; // DarkRed
  317. sel.style.color = color;
  318. }
  319.  
  320. // TODO remove attribute 'style' of first element after 'body'
  321. // FIXME
  322. // http://gothicrichard.synthasite.com/what-i-fond-on-the-net.php
  323. // https://darknetdiaries.com/episode/65/
  324. function resetStyle() {
  325. let sel = getSelectedText();
  326. sel.style.color = originalColor;
  327. sel.style.outline = originalOutline;
  328. sel.style.background = originalBackground;
  329. }
  330.  
  331. function generateTXT() {
  332. let data = getSelectedText().outerText;
  333. data = `${data}
  334.  
  335. Created: ${time.toDateString()} ${time.toLocaleTimeString()}
  336. Source: ${location.href}
  337. Title: ${document.title}
  338.  
  339. Document generated using Paper Clip
  340. https://greasyfork.org/en/scripts/465960-paper-clip
  341. Save selected content into clean HTML, Markdown or Text
  342. `;
  343. savePage(
  344. data,
  345. createFilename('txt'),
  346. "text/plain"
  347. );
  348. }
  349.  
  350. function generateMD() {
  351. let domParser = new DOMParser();
  352. let data = domParser.parseFromString('', 'text/html'); // Falkon: TrustedHTML
  353. data.body.innerHTML = getSelectedText().outerHTML;
  354. console.log(data)
  355. let elementsToRemove = [
  356. 'button', 'form', 'frame', 'frameset', 'iframe', 'textarea',
  357. //'svg', 'input', 'path',
  358. 'script', 'style',
  359. 'select'];
  360. data = removeMediaElements(data, elementsToRemove);
  361. let turndownService = new TurndownService();
  362. data = turndownService.turndown(data);
  363.  
  364. let title;
  365. if (document.title) {
  366. title = document.title;
  367. } else {
  368. title = location.pathname.split('/');
  369. title = title[title.length-1];
  370. //title = location.pathname.split('/');
  371. //title = title[title.length-1];
  372. }
  373.  
  374. data = `# [${title}](${location.href})
  375. ${getDescription()}
  376. ${data}
  377.  
  378. ---
  379.  
  380. This page was saved at ${time.toDateString()} ${time.toLocaleTimeString()} from [${location.hostname}](${location.href})
  381. using [Paper Clip](https://greasyfork.org/en/scripts/465960-paper-clip) and converted into Markdown with [Turndown](https://mixmark-io.github.io/turndown/)
  382. `;
  383. console.log(data)
  384. savePage(
  385. data,
  386. createFilename('md'),
  387. "text/plain"
  388. );
  389. }
  390.  
  391. function generateXHTML() {
  392. let domParser = new DOMParser();
  393. let data = domParser.parseFromString('', 'text/html'); // Falkon: TrustedHTML
  394. // set title
  395. if (document.title.length > 0) {
  396. data.title = document.title;
  397. }
  398. // set base
  399. // NOTE do not "set base".
  400. // TODO Complete links of ./ and / etc. by fetching complete
  401. // url and replace href with it (it = complete url)
  402. base = data.createElement('base');
  403. base.href = data.head.baseURI; // location.href;
  404. data.head.append(base);
  405. const metaTag = [
  406. 'url',
  407. 'date',
  408. 'creator',
  409. 'user-agent',
  410. //'connection-type',
  411. 'content-type-sourced',
  412. 'charset-sourced'
  413. //'character-count'
  414. //'word-count'
  415. ];
  416. const metaValue = [
  417. location.href,
  418. time,
  419. namespace,
  420. navigator.userAgent,
  421. //navigator.connection.effectiveType,
  422. document.contentType,
  423. document.charset
  424. ];
  425. for (let i = 0; i < metaTag.length; i++) {
  426. let meta = document.createElement('meta');
  427. meta.name = metaTag[i];
  428. meta.content = metaValue[i];
  429. data.head.append(meta);
  430. }
  431. const metaData = [
  432. //'content-type',
  433. 'viewport',
  434. 'description',
  435. 'keywords',
  436. 'generator'
  437. ];
  438. for (let i = 0; i < metaData.length; i++) {
  439. let meta = document.createElement('meta');
  440. meta.name = metaData[i] + '-imported';
  441. try {
  442. meta.content = document.querySelector('meta[name="' + metaData[i] + '" i]')
  443. // .querySelector('meta[http-equiv="' + metaData[i] + '" i]')
  444. .content;
  445. }
  446. catch(err) {
  447. console.warn(metaData[i] + ': Not found.');
  448. continue;
  449. }
  450. data.head.append(meta);
  451. }
  452. if (document.dir == 'rtl') {
  453. data.dir = 'rtl';
  454. }
  455. data.body.innerHTML = getSelectedText().outerHTML;
  456. data = listMediaElements(data);
  457. let elementsToRemove = [
  458. 'audio', 'embed', 'img', 'video', 'button',
  459. 'form', 'frame', 'frameset', 'iframe', 'textarea',
  460. 'svg', 'input', 'path',
  461. 'script', 'style',
  462. 'select'];
  463. data = removeMediaElements(data, elementsToRemove);
  464. data = removeAttributes(data);
  465. //data = replaceMediaByLinks(data);
  466. data = correctLinks(data);
  467. data = removeEmptyElements(data);
  468. data = removeCommentNodes(data);
  469. //data = removeWhitespaceFromNodes(data, ['code', 'pre']);
  470. //data = replaceCodeAndPre(data);
  471. //data = setStylesheet(data);
  472. data = new XMLSerializer().serializeToString(data);
  473. //data = formatPage(data);
  474. //data = minify(data);
  475. //data = removeComments(data);
  476. data = removeMultipleWhiteSpace(data);
  477. savePage(
  478. data,
  479. // NOTE xhtml is also valid
  480. createFilename('html'),
  481. "text/html"
  482. );
  483. }
  484.  
  485. // FIXME
  486. // body::-webkit-scrollbar{width:10.666666666666666px;height:10.666666666666666px;}
  487. function setStylesheet(node) {
  488. let cssStylesheet = document.createElement('style');
  489. document.head.append(cssStylesheet);
  490. cssStylesheet.type = 'text/css';
  491. if (node.querySelector('code') ||
  492. node.querySelector('pre')) {
  493. cssStylesheet.textContent = 'code, pre {overflow: auto; display: grid; max-width: 100vw;}';
  494. }
  495. return node;
  496. }
  497.  
  498. // TODO Place plain text inside elements <code> <pre> (eliminate <span>, <br> etc.)
  499. // TODO Eliminate all elements without changing original text layout
  500. function replaceCodeAndPre(node) { // correctCodeElements
  501. const codeElements = node.getElementsByTagName('code');
  502. const preElements = node.getElementsByTagName('pre');
  503. // Replace content of all code elements with their own outerText
  504. for (let i = 0; i < codeElements.length; i++) {
  505. const element = codeElements[i];
  506. element.outerText = element.outerText;
  507. }
  508. // Replace content of all pre elements with their own outerText
  509. for (let i = 0; i < preElements.length; i++) {
  510. const element = preElements[i];
  511. element.outerText = element.outerText;
  512. }
  513. return node;
  514. }
  515.  
  516. function replaceMediaByLinks(node) {
  517. for (const imgElement of node.querySelectorAll('img')) {
  518. // Create a new <a> element
  519. const aElement = node.createElement('a');
  520. aElement.setAttribute.href = imgElement.src;
  521. // Copy the attributes and contents of the <img> element to the new <a> element
  522. for (let i = 0, l = imgElement.attributes.length; i < l; i++) {
  523. const name = imgElement.attributes.item(i).name;
  524. const value = imgElement.attributes.item(i).value;
  525. aElement.setAttribute(name, value);
  526. }
  527. aElement.textContent = imgElement.src;
  528. // Replace the <img> element with the new <a> element
  529. imgElement.parentNode.replaceChild(aElement, imgElement);
  530. }
  531. return node;
  532. }
  533.  
  534. // TODO
  535. // Catch all elements with attribute
  536. // contains URL, and
  537. // starts with / (add hostname), and
  538. // contains / (add hostname with first parent path), and
  539. // validate using URL API
  540. function listMediaElements(node) {
  541. let unique = []
  542. const elements = [
  543. 'audio', 'embed', 'img', 'svg', 'video',
  544. 'frame', 'frameset', 'iframe', '[data-source]',
  545. ];
  546. // Find element and add its URL as metadata
  547. for (let i = 0; i < elements.length; i++) {
  548. for (const element of node.querySelectorAll(elements[i])) {
  549. const attributes = ['src', 'data-img-url', 'data-source'];
  550. for (const attribute of attributes) {
  551. if (element.getAttribute(attribute) &&
  552. !unique.includes(element.getAttribute(attribute))) {
  553. let attr = element.getAttribute(attribute)
  554. unique.push(attr)
  555. let meta = node.createElement('meta');
  556. meta.name = `extracted-media-${element.nodeName.toLowerCase()}`; // Was ${elements[i]}
  557. meta.content = attr;
  558. node.head.append(meta);
  559. }
  560. }
  561. }
  562. }
  563. return node;
  564. }
  565.  
  566. //Remove graphics, media and scripts
  567. function removeMediaElements(node, elements) {
  568. /*
  569. TODO Remove span and preserve its contents
  570. Movespan content to its parent element/node
  571. /questions/9848465/js-remove-a-tag-without-deleting-content
  572.  
  573. FIXME Couldn't remove "iframe" for
  574. https://www.dailymail.co.uk/health/article-3460321/How-Big-Pharma-greed-killing-tens-thousands-world-Patients-medicated-given-profitable-drugs-little-proven-benefits-leading-doctors-warn.html
  575. */
  576. media = ['audio', 'embed', 'frame', 'frameset', 'iframe', 'img', 'video'];
  577. for (let i = 0; i < elements.length; i++) {
  578. for (const element of node.querySelectorAll(elements[i])) {
  579. if (media.includes(elements[i])) {
  580. let ele = node.createElement('a');
  581. //ele.textContent = `Click to view removed HTML <${elements[i]}> media element.`.toUpperCase();
  582.  
  583. //ele.textContent = `[HIDDEN MEDIA]`;
  584. //ele.title = `${elements[i]} media element`;
  585. //ele.href = element.getAttribute('src');
  586.  
  587. //let ele = node.createElement('pre')
  588. //let ele = node.createElement('code')
  589. //ele.textContent = `${element.getAttribute('src')}`;
  590. //ele.title = 'Hidden media';
  591. element.parentNode.insertBefore(ele, element.nextSibling); // insertAfter
  592. }
  593. element.remove();
  594. }
  595. }
  596. return node;
  597. }
  598.  
  599. // Remove all attributes
  600. function removeAttributes(node) {
  601. // /questions/1870441/remove-all-attributes
  602. const removeAttributes = (element) => {
  603. for (let i = 0; i < element.attributes.length; i++) {
  604. if (element.attributes[i].name != 'href' &&
  605. element.attributes[i].name != 'name' &&
  606. element.attributes[i].name != 'id') {
  607. element.removeAttribute(element.attributes[i].name);
  608. }
  609. }
  610. };
  611. for (const element of node.querySelectorAll('body *')) {
  612. removeAttributes(element);
  613. }
  614. return node;
  615. }
  616.  
  617. // Correct links for offline usage
  618. function correctLinks(node) {
  619. for (const element of node.querySelectorAll('a')) {
  620. //if (element.hash) {
  621. //if (element.hostname + element.pathname == location.hostname + location.pathname) {
  622. if (element.href.startsWith(element.baseURI + '#')) {
  623. element.href = element.hash;
  624. }
  625. }
  626. return node;
  627. }
  628.  
  629. function removeEmptyElements (node) {
  630. for (const element of node.body.querySelectorAll('*')) {
  631. //if (/^\s*$/.test(element.outerText)) {
  632. if (element.tagName.toLowerCase() != 'br' && /^\s*$/.test(element.textContent)) {
  633. element.remove();
  634. }
  635. }
  636. return node;
  637. }
  638.  
  639. function removeCommentNodes(node) {
  640. const nodeIterator = node.createNodeIterator(
  641. node, // Starting node, usually the document body
  642. NodeFilter.SHOW_ALL, // NodeFilter to show all node types
  643. null,
  644. false
  645. );
  646. let currentNode;
  647. // Loop through each node in the node iterator
  648. while (currentNode = nodeIterator.nextNode()) {
  649. if (currentNode.nodeName == '#comment') {
  650. currentNode.remove();
  651. console.log(currentNode.nodeName);
  652. }
  653. }
  654. return node;
  655. }
  656.  
  657. function removeComments(str) {
  658. return str.replace(/<!--[\s\S]*?-->/g, '');
  659. }
  660.  
  661. function removeWhitespaceFromNodes(node, excludedTags) {
  662. const removeWhitespace = (node) => {
  663. if (node.nodeType === Node.TEXT_NODE) {
  664. node.textContent = node.textContent.trim();
  665. } else if (
  666. node.nodeType === Node.ELEMENT_NODE &&
  667. !excludedTags.includes(node.tagName.toLowerCase())
  668. ) {
  669. for (let i = 0; i < node.childNodes.length; i++) {
  670. removeWhitespace(node.childNodes[i]);
  671. }
  672. }
  673. };
  674. removeWhitespace(node);
  675. return node;
  676. }
  677.  
  678. function removeMultipleWhiteSpace(str) {
  679. //return str.replace(/\s+/g, ' ');
  680. //return str.replace(/(?<!<code>)\s+(?![^<]*<\/code>)/g, " ");
  681. /*
  682. return str.replace(/(<(code|pre|code-[^\s]+)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  683. if (p1) { // if the match is a code block
  684. return p1; // return the complete code block as is
  685. } else { // if the match is whitespace outside of a code block
  686. return " "; // replace with a single space
  687. }
  688. });
  689. */
  690. return str.replace(/(<(code|pre)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  691. if (p1) { // if the match is a code block
  692. return p1; // return the complete code block as is
  693. } else { // if the match is whitespace outside of a code block
  694. return " "; // replace with a single space
  695. }
  696. });
  697. }
  698.  
  699. // Get parent element of beginning (and end) of selected text
  700. // /questions/32515175/get-parent-element-of-beginning-and-end-of-selected-text
  701. function getSelectedText() {
  702. var selection = document.getSelection();
  703. var selectionBegin = selection.anchorNode.parentNode;
  704. var selectionEnd = selection.focusNode.parentNode;
  705. var selectionCommon =
  706. findFirstCommonAncestor
  707. (
  708. selectionBegin,
  709. selectionEnd
  710. );
  711. return selectionCommon;
  712. }
  713.  
  714. // find common parent
  715. // /questions/2453742/whats-the-best-way-to-find-the-first-common-parent-of-two-dom-nodes-in-javascri
  716. function findFirstCommonAncestor(nodeA, nodeB) {
  717. let range = new Range();
  718. range.setStart(nodeA, 0);
  719. range.setEnd(nodeB, 0);
  720. // There's a compilication, if nodeA is positioned after
  721. // nodeB in the document, we created a collapsed range.
  722. // That means the start and end of the range are at the
  723. // same position. In that case `range.commonAncestorContainer`
  724. // would likely just be `nodeB.parentNode`.
  725. if(range.collapsed) {
  726. // The old switcheroo does the trick.
  727. range.setStart(nodeB, 0);
  728. range.setEnd(nodeA, 0);
  729. }
  730. return range.commonAncestorContainer;
  731. }
  732.  
  733. // minify html
  734. // /questions/23284784/javascript-minify-html-regex
  735. // TODO Don't apply on code/pre
  736. function minify( s ){
  737. return s ? s
  738. .replace(/\>[\r\n ]+\</g, "><") // Removes new lines and irrelevant spaces which might affect layout, and are better gone
  739. .replace(/(<.*?>)|\s+/g, (m, $1) => $1 ? $1 : ' ')
  740. .trim()
  741. : "";
  742. }
  743.  
  744. // format html
  745. // /questions/3913355/how-to-format-tidy-beautify-in-javascript
  746. // TODO Don't inset span in code/pre
  747. function formatPage(html) {
  748. var tab = '\t';
  749. var result = '';
  750. var indent= '';
  751. html.split(/>\s*</).forEach(function(element) {
  752. if (element.match( /^\/\w/ )) {
  753. indent = indent.substring(tab.length);
  754. }
  755. result += indent + '<' + element + '>\r\n';
  756. if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input") ) {
  757. indent += tab;
  758. }
  759. });
  760. return result.substring(1, result.length-3);
  761. }
  762.  
  763. function getDescription() {
  764. let desc;
  765. if (document.querySelector('meta[name=description]')) {
  766. desc = '## ' + document.querySelector('meta[name=description]').content + '\n';
  767. } else
  768. if (document.querySelector('meta[itemprop=description]')) {
  769. desc = '## ' + document.querySelector('meta[itemprop=description]').content + '\n';
  770. } else {
  771. desc = '';
  772. }
  773. return desc;
  774. }
  775.  
  776. function createFilename(extension) {
  777.  
  778. let day, now, timestamp, title, filename;
  779.  
  780. day = time
  781. .toISOString()
  782. .split('T')[0];
  783.  
  784. now = [
  785. time.getHours(),
  786. time.getMinutes(),
  787. time.getSeconds()
  788. ];
  789.  
  790. for (let i = 0; i < now.length; i++) {
  791. if (now[i] < 10) {now[i] = '0' + now[i];}
  792. }
  793.  
  794. timestamp = [
  795. day,
  796. now.join('-')
  797. ];
  798.  
  799. /*
  800. address = [
  801. location.hostname,
  802. location.pathname.replace(/\//g,'_')
  803. ]
  804.  
  805. filename =
  806. address.join('') +
  807. '_' +
  808. timestamp.join('_') +
  809. '.html';
  810. */
  811.  
  812. if (document.title) {
  813. title = document.title;
  814. } else {
  815. title = location.pathname.split('/');
  816. title = title[title.length-1];
  817. }
  818.  
  819. // TODO ‘ ’ ·
  820. title = title.replace(/[\/?<>\\:*|'"\.,]/g, '');
  821. title = title.replace(/ /g, '_');
  822. title = title.replace(/-/g, '_');
  823. title = title.replace(/__/g, '_');
  824.  
  825. filename =
  826. title + // TODO replace whitespace by underscore
  827. '_' +
  828. timestamp.join('_') +
  829. `.${extension}`;
  830.  
  831. return filename.toLowerCase();
  832.  
  833. }
  834.  
  835. // export file
  836. // /questions/4545311/download-a-file-by-jquery-ajax
  837. // /questions/43135852/javascript-export-to-text-file
  838. var savePage = (function () {
  839. var a = document.createElement("a");
  840. // document.body.appendChild(a);
  841. // a.style = "display: none";
  842. return function (fileData, fileName, fileType) {
  843. var blob = new Blob([fileData], {type: fileType}),
  844. url = window.URL.createObjectURL(blob);
  845. a.href = url;
  846. a.download = fileName;
  847. a.click();
  848. window.URL.revokeObjectURL(url);
  849. };
  850. }());