导出网页中的全部有效链接

点击右下角导出图标,自动爬取全部有效链接并导出为excel表格

  1. // ==UserScript==
  2. // @name Export All Useful Links
  3. // @name:zh-CN 导出网页中的全部有效链接
  4. // @namespace xcl
  5. // @version 1.4
  6. // @description:zh-CN 点击右下角导出图标,自动爬取全部有效链接并导出为excel表格
  7. // @author xcl
  8. // @match *://*/*
  9. // @grant none
  10. // @noframes
  11. // @description Get all links from a website. right-click -> tampermonkey -> "Get All Links".
  12. // ==/UserScript==
  13.  
  14.  
  15. // 格式化网址作为文件名
  16. function formatFilename(url) {
  17. if (url.indexOf("http://") != -1) {
  18. url = url.replace('http://', '')
  19. } else {
  20. url = url.replace('https://', '')
  21. }
  22. var symbol = ['<', '>', '/', '\\', '|', ':', '*', '?', '#']
  23. symbol.forEach(ch => {
  24. var reg = new RegExp("/" + ch + "/g")
  25. url = url.replace(reg, '_')
  26. });
  27. return url
  28. }
  29.  
  30. // 将一个sheet转成最终的excel文件的blob对象,然后利用URL.createObjectURL下载
  31. function sheet2blob(sheet, sheetName) {
  32. sheetName = sheetName || 'sheet1';
  33. var workbook = {
  34. SheetNames: [sheetName],
  35. Sheets: {}
  36. };
  37. workbook.Sheets[sheetName] = sheet;
  38. // 生成excel的配置项
  39. var wopts = {
  40. bookType: 'xlsx', // 要生成的文件类型
  41. bookSST: false, // 是否生成Shared String Table,官方解释是,如果开启生成速度会下降,但在低版本IOS设备上有更好的兼容性
  42. type: 'binary'
  43. };
  44. var wbout = XLSX.write(workbook, wopts);
  45. var blob = new Blob([s2ab(wbout)], {
  46. type: "application/octet-stream"
  47. });
  48. // 字符串转ArrayBuffer
  49. function s2ab(s) {
  50. var buf = new ArrayBuffer(s.length);
  51. var view = new Uint8Array(buf);
  52. for (var i = 0; i != s.length; ++i) view[i] = s.charCodeAt(i) & 0xFF;
  53. return buf;
  54. }
  55. return blob;
  56. }
  57.  
  58. function downloadExcel(aoa, filename) {
  59. var sheet = XLSX.utils.aoa_to_sheet(aoa);
  60. console.log("正在导出表格")
  61. const blob = sheet2blob(sheet, "Sheet1")
  62. const url = URL.createObjectURL(blob)
  63. const link = document.createElement('a')
  64. link.href = url
  65. link.download = `${filename}.xlsx`
  66. document.body.appendChild(link)
  67. link.click()
  68. document.body.removeChild(link)
  69. }
  70.  
  71. function make_list(results) {
  72. var data_list = [];
  73. let table = "<table><tbody>";
  74. results.forEach(result => {
  75. if (result.url != window.location.href && result.url != "" && !result.url.includes('javascript')) {
  76. table += `<tr><td> ${result.url} </td><td> ${result.name} </td></tr>`;
  77. data_list.push([result.url, result.name]);
  78. }
  79. });
  80. table += "</table>";
  81. // window.open("").document.write(table);
  82. downloadExcel(data_list, formatFilename(window.location.href))
  83. }
  84.  
  85. function inIframe(doc, results) {
  86. if (doc == null) return results;
  87. console.log(doc)
  88. let urls = doc.querySelectorAll("a");
  89. urls.forEach(url => {
  90. let link_name = url.textContent.replace(/\t|\s+/g, "").trim();
  91. let link = url.href;
  92. results.push({
  93. name: link_name,
  94. url: link
  95. });
  96. });
  97. var iframes = doc.getElementsByTagName("iframe")
  98. for (var i = 0; i < iframes.length; i++) {
  99. inIframe(iframes[i].contentDocument, results)
  100. }
  101. return results
  102. }
  103.  
  104. async function writeFile(fileHandle, contents) {
  105. // Create a FileSystemWritableFileStream to write to.
  106. const writable = await fileHandle.createWritable();
  107. // Write the contents of the file to the stream.
  108. await writable.write(contents);
  109. // Close the file and write the contents to disk.
  110. await writable.close();
  111. }
  112.  
  113. function get_links() {
  114. let results = [];
  115. results = inIframe(document, results)
  116. make_list(results);
  117. }
  118.  
  119. function readFile(e) {
  120. var file = e.target.files[0];
  121. if (!file) return;
  122. var reader = new FileReader();
  123. reader.onload = function (e) {
  124. document.getElementById('contents').innerHTML = e.target.result;
  125. }
  126. reader.readAsText(file)
  127. }
  128.  
  129. function downloadTxt(text, fileName){
  130. let element = document.createElement('a')
  131. element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text))
  132. element.setAttribute('download', fileName)
  133. element.style.display = 'none'
  134. element.click()
  135. }
  136.  
  137. (function () {
  138. "use strict";
  139. console.log("正在执行")
  140. let script = document.createElement('script');
  141. script.setAttribute('type', 'text/javascript');
  142. script.src = "https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js";
  143. document.documentElement.appendChild(script);
  144.  
  145. var input = document.createElement('input')
  146. input.id = "inp"
  147. input.type = "file"
  148. input.style.visibility = "hidden"
  149. input.addEventListener('change', async (e) => {
  150. console.log(e.target.files)
  151. var file = e.target.files[0];
  152. if (!file) return;
  153. var reader = new FileReader();
  154. reader.onload = function (e) {
  155. let contents = e.target.result
  156. console.log(contents)
  157. let contentList = contents.split('\n')
  158. console.log(contentList)
  159. for(let i = 0; i < contentList.length; i++) {
  160. if(contentList[i] == window.location.href) {
  161. alert("提示:该网站已经爬取过!")
  162. return
  163. }
  164. }
  165. contents += (window.location.href+'\n')
  166. console.log(contents)
  167. get_links()
  168. downloadTxt(contents, '历史记录.txt')
  169. // var blob = new Blob([contents], {type: "text/plain;charset=utf-8"});
  170. // saveAs(blob, "历史记录.txt");
  171. // writeFile(file, contents)
  172. }
  173. reader.readAsText(file)
  174. })
  175.  
  176. var toTopBtn = document.createElement('button')
  177. toTopBtn.innerHTML = "导出"
  178. toTopBtn.className = "a-b-c-d-toTop"
  179. toTopBtn.addEventListener('click', async (e) => {
  180. // fileHandle = await window.showOpenFilePicker();
  181. // console.log(fileHandle);
  182. // const file = await fileHandle[0].getFile();
  183. // let contents = await file.text();
  184. document.getElementById('inp').click()
  185. })
  186. var body = document.body
  187. var style = document.createElement('style')
  188. style.id = "a-b-c-d-style"
  189. var css = `.a-b-c-d-toTop{
  190. position: fixed;
  191. bottom: 10%;
  192. right: 5%;
  193. width: 50px;
  194. height: 50px;
  195. border-radius: 50%;
  196. font-size: 15px;
  197. z-index: 999;
  198. cursor: pointer;
  199. font-size: 12px;
  200. overflow: hidden;
  201. background: blue
  202. }`
  203. if (style.styleSheet) {
  204. style.styleSheet.cssText = css;
  205. } else {
  206. style.appendChild(document.createTextNode(css));
  207. }
  208. body.appendChild(input)
  209. body.appendChild(toTopBtn)
  210. body.appendChild(style)
  211. })();