导出网页中的全部有效链接

点击右下角导出图标,自动爬取全部有效链接并导出为excel表格

当前为 2022-09-27 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Export All Useful Links
  3. // @name:zh-CN 导出网页中的全部有效链接
  4. // @namespace xcl
  5. // @version 1.0
  6. // @description:zh-CN 点击右下角导出图标,自动爬取全部有效链接并导出为excel表格
  7. // @author xcl
  8. // @match *://*/*
  9. // @grant none
  10. // @description Get all links from a website. right-click -> tampermonkey -> "Get All Links".
  11. // ==/UserScript==
  12.  
  13.  
  14. // 格式化网址作为文件名
  15. function formatFilename(url) {
  16. if(url.indexOf("http://") != -1) {
  17. url = url.replace('http://','')
  18. } else {
  19. url = url.replace('https://','')
  20. }
  21. var symbol = ['<','>','/','\\','|',':','*','?','#']
  22. symbol.forEach(ch => {
  23. var reg = new RegExp("/"+ch+"/g")
  24. url = url.replace(reg, '_')
  25. });
  26. return url
  27. }
  28.  
  29. // 将一个sheet转成最终的excel文件的blob对象,然后利用URL.createObjectURL下载
  30. function sheet2blob(sheet, sheetName) {
  31. sheetName = sheetName || 'sheet1';
  32. var workbook = {
  33. SheetNames: [sheetName],
  34. Sheets: {}
  35. };
  36. workbook.Sheets[sheetName] = sheet;
  37. // 生成excel的配置项
  38. var wopts = {
  39. bookType: 'xlsx', // 要生成的文件类型
  40. bookSST: false, // 是否生成Shared String Table,官方解释是,如果开启生成速度会下降,但在低版本IOS设备上有更好的兼容性
  41. type: 'binary'
  42. };
  43. var wbout = XLSX.write(workbook, wopts);
  44. var blob = new Blob([s2ab(wbout)], {
  45. type: "application/octet-stream"
  46. });
  47. // 字符串转ArrayBuffer
  48. function s2ab(s) {
  49. var buf = new ArrayBuffer(s.length);
  50. var view = new Uint8Array(buf);
  51. for (var i = 0; i != s.length; ++i) view[i] = s.charCodeAt(i) & 0xFF;
  52. return buf;
  53. }
  54. return blob;
  55. }
  56.  
  57. function downloadExcel(aoa, filename) {
  58. var sheet = XLSX.utils.aoa_to_sheet(aoa);
  59. console.log("正在导出表格")
  60. const blob = sheet2blob(sheet, "Sheet1")
  61. const url = URL.createObjectURL(blob)
  62. const link = document.createElement('a')
  63. link.href = url
  64. link.download = `${filename}.xlsx`
  65. document.body.appendChild(link)
  66. link.click()
  67. document.body.removeChild(link)
  68. }
  69.  
  70. function make_list(results) {
  71. var data_list=[];
  72. results.forEach(result => {
  73. if(result.url != window.location.href && result.url != "" && !result.url.includes('javascript')) {
  74. data_list.push([result.url, result.name]);
  75. }
  76. });
  77. downloadExcel(data_list, formatFilename(window.location.href))
  78. }
  79.  
  80. function get_links() {
  81. let urls = document.querySelectorAll("a");
  82. let results = [];
  83. urls.forEach(url => {
  84. let link_name = url.textContent.replace(/\t|\s+/g, "").trim();
  85. let link = url.href;
  86. results.push({
  87. name: link_name,
  88. url: link
  89. });
  90. });
  91. make_list(results);
  92. }
  93.  
  94. (function () {
  95. "use strict";
  96. console.log("正在执行")
  97. let script = document.createElement('script');
  98. script.setAttribute('type', 'text/javascript');
  99. script.src = "https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js";
  100. document.documentElement.appendChild(script);
  101.  
  102. var toTopBtn = document.createElement('button')
  103. toTopBtn.innerHTML = "导出"
  104. toTopBtn.className = "a-b-c-d-toTop"
  105. toTopBtn.onclick = function (e) {
  106. get_links();
  107. }
  108. var body = document.body
  109. var style = document.createElement('style')
  110. style.id = "a-b-c-d-style"
  111. var css = `.a-b-c-d-toTop{
  112. position: fixed;
  113. bottom: 10%;
  114. right: 5%;
  115. width: 50px;
  116. height: 50px;
  117. border-radius: 50%;
  118. font-size: 15px;
  119. z-index: 999;
  120. cursor: pointer;
  121. font-size: 12px;
  122. overflow: hidden;
  123. background: blue
  124. }`
  125. if (style.styleSheet) {
  126. style.styleSheet.cssText = css;
  127. } else {
  128. style.appendChild(document.createTextNode(css));
  129. }
  130. body.appendChild(toTopBtn)
  131. body.appendChild(style)
  132. })();