导出网页中的全部有效链接

点击右下角导出图标,自动爬取全部有效链接并导出为excel表格

当前为 2022-10-19 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Export All Useful Links
  3. // @name:zh-CN 导出网页中的全部有效链接
  4. // @namespace xcl
  5. // @version 1.2
  6. // @description:zh-CN 点击右下角导出图标,自动爬取全部有效链接并导出为excel表格
  7. // @author xcl
  8. // @match *://*/*
  9. // @run-at document-end
  10. // @grant none
  11. // @description Get all links from a website. right-click -> tampermonkey -> "Get All Links".
  12. // ==/UserScript==
  13.  
  14.  
  15. // 格式化网址作为文件名
  16. function formatFilename(url) {
  17. if (url.indexOf("http://") != -1) {
  18. url = url.replace('http://', '')
  19. } else {
  20. url = url.replace('https://', '')
  21. }
  22. var symbol = ['<', '>', '/', '\\', '|', ':', '*', '?', '#']
  23. symbol.forEach(ch => {
  24. var reg = new RegExp("/" + ch + "/g")
  25. url = url.replace(reg, '_')
  26. });
  27. return url
  28. }
  29.  
  30. // 将一个sheet转成最终的excel文件的blob对象,然后利用URL.createObjectURL下载
  31. function sheet2blob(sheet, sheetName) {
  32. sheetName = sheetName || 'sheet1';
  33. var workbook = {
  34. SheetNames: [sheetName],
  35. Sheets: {}
  36. };
  37. workbook.Sheets[sheetName] = sheet;
  38. // 生成excel的配置项
  39. var wopts = {
  40. bookType: 'xlsx', // 要生成的文件类型
  41. bookSST: false, // 是否生成Shared String Table,官方解释是,如果开启生成速度会下降,但在低版本IOS设备上有更好的兼容性
  42. type: 'binary'
  43. };
  44. var wbout = XLSX.write(workbook, wopts);
  45. var blob = new Blob([s2ab(wbout)], {
  46. type: "application/octet-stream"
  47. });
  48. // 字符串转ArrayBuffer
  49. function s2ab(s) {
  50. var buf = new ArrayBuffer(s.length);
  51. var view = new Uint8Array(buf);
  52. for (var i = 0; i != s.length; ++i) view[i] = s.charCodeAt(i) & 0xFF;
  53. return buf;
  54. }
  55. return blob;
  56. }
  57.  
  58. function downloadExcel(aoa, filename) {
  59. var sheet = XLSX.utils.aoa_to_sheet(aoa);
  60. console.log("正在导出表格")
  61. const blob = sheet2blob(sheet, "Sheet1")
  62. const url = URL.createObjectURL(blob)
  63. const link = document.createElement('a')
  64. link.href = url
  65. link.download = `${filename}.xlsx`
  66. document.body.appendChild(link)
  67. link.click()
  68. document.body.removeChild(link)
  69. }
  70.  
  71. function make_list(results) {
  72. var data_list = [];
  73. let table = "<table><tbody>";
  74. results.forEach(result => {
  75. if (result.url != window.location.href && result.url != "" && !result.url.includes('javascript')) {
  76. table += `<tr><td> ${result.url} </td><td> ${result.name} </td></tr>`;
  77. data_list.push([result.url, result.name]);
  78. }
  79. });
  80. table += "</table>";
  81. window.open("").document.write(table);
  82. downloadExcel(data_list, formatFilename(window.location.href))
  83. }
  84.  
  85. function inIframe(doc, results) {
  86. if (doc == null) return results;
  87. console.log(doc)
  88. let urls = doc.querySelectorAll("a");
  89. urls.forEach(url => {
  90. let link_name = url.textContent.replace(/\t|\s+/g, "").trim();
  91. let link = url.href;
  92. results.push({
  93. name: link_name,
  94. url: link
  95. });
  96. });
  97. var iframes = doc.getElementsByTagName("iframe")
  98. for (var i = 0; i < iframes.length; i++) {
  99. inIframe(iframes[i].contentDocument, results)
  100. }
  101. return results
  102. }
  103.  
  104. function get_links() {
  105. let results = [];
  106. results = inIframe(document, results)
  107. make_list(results);
  108. }
  109.  
  110. (function () {
  111. "use strict";
  112. console.log("正在执行")
  113. let script = document.createElement('script');
  114. script.setAttribute('type', 'text/javascript');
  115. script.src = "https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js";
  116. document.documentElement.appendChild(script);
  117.  
  118. var toTopBtn = document.createElement('button')
  119. toTopBtn.innerHTML = "导出"
  120. toTopBtn.className = "a-b-c-d-toTop"
  121. toTopBtn.onclick = function (e) {
  122. get_links();
  123. }
  124. var body = document.body
  125. var style = document.createElement('style')
  126. style.id = "a-b-c-d-style"
  127. var css = `.a-b-c-d-toTop{
  128. position: fixed;
  129. bottom: 10%;
  130. right: 5%;
  131. width: 50px;
  132. height: 50px;
  133. border-radius: 50%;
  134. font-size: 15px;
  135. z-index: 999;
  136. cursor: pointer;
  137. font-size: 12px;
  138. overflow: hidden;
  139. background: blue
  140. }`
  141. if (style.styleSheet) {
  142. style.styleSheet.cssText = css;
  143. } else {
  144. style.appendChild(document.createTextNode(css));
  145. }
  146. body.appendChild(toTopBtn)
  147. body.appendChild(style)
  148. })();