douyin-user-data-download

下载抖音用户主页数据!

当前为 2023-09-21 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name douyin-user-data-download
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.2.9
  5. // @description 下载抖音用户主页数据!
  6. // @author xxmdmst
  7. // @match https://www.douyin.com/user/*
  8. // @icon https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
  9. // @grant none
  10. // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
  11. // @license MIT
  12. // ==/UserScript==
  13.  
  14. (function () {
  15. 'use strict';
  16. let table;
  17.  
  18. function initGbkTable() {
  19. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  20. const ranges = [
  21. [0xA1, 0xA9, 0xA1, 0xFE],
  22. [0xB0, 0xF7, 0xA1, 0xFE],
  23. [0x81, 0xA0, 0x40, 0xFE],
  24. [0xAA, 0xFE, 0x40, 0xA0],
  25. [0xA8, 0xA9, 0x40, 0xA0],
  26. [0xAA, 0xAF, 0xA1, 0xFE],
  27. [0xF8, 0xFE, 0xA1, 0xFE],
  28. [0xA1, 0xA7, 0x40, 0xA0],
  29. ];
  30. const codes = new Uint16Array(23940);
  31. let i = 0;
  32.  
  33. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  34. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  35. if (b2 !== 0x7F) {
  36. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  37. codes[i++] = b2 << 8 | b1
  38. }
  39. }
  40. }
  41. }
  42. table = new Uint16Array(65536);
  43. table.fill(0xFFFF);
  44. const str = new TextDecoder('gbk').decode(codes);
  45. for (let i = 0; i < str.length; i++) {
  46. table[str.charCodeAt(i)] = codes[i]
  47. }
  48. }
  49.  
  50. function str2gbk(str, opt = {}) {
  51. if (!table) {
  52. initGbkTable()
  53. }
  54. const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
  55. const defaultOnAlloc = NodeJsBufAlloc
  56. ? (len) => NodeJsBufAlloc(len)
  57. : (len) => new Uint8Array(len);
  58. const defaultOnError = () => 63;
  59. const onAlloc = opt.onAlloc || defaultOnAlloc;
  60. const onError = opt.onError || defaultOnError;
  61.  
  62. const buf = onAlloc(str.length * 2);
  63. let n = 0;
  64.  
  65. for (let i = 0; i < str.length; i++) {
  66. const code = str.charCodeAt(i);
  67. if (code < 0x80) {
  68. buf[n++] = code;
  69. continue
  70. }
  71. const gbk = table[code];
  72.  
  73. if (gbk !== 0xFFFF) {
  74. buf[n++] = gbk;
  75. buf[n++] = gbk >> 8
  76. } else if (code === 8364) {
  77. buf[n++] = 0x80
  78. } else {
  79. const ret = onError(i, str);
  80. if (ret === -1) {
  81. break
  82. }
  83. if (ret > 0xFF) {
  84. buf[n++] = ret;
  85. buf[n++] = ret >> 8
  86. } else {
  87. buf[n++] = ret
  88. }
  89. }
  90. }
  91. return buf.subarray(0, n)
  92. }
  93.  
  94. let aweme_list = [];
  95. let userKey = [
  96. "昵称", "关注", "粉丝",
  97. "获赞", "抖音号", "IP属地",
  98. "年龄", "签名", "作品数", "主页"
  99. ];
  100. let userData = [];
  101. let timer, dimg_button;
  102.  
  103. function extractDataFromScript() {
  104. const scriptTag = document.getElementById('RENDER_DATA');
  105. if (!scriptTag) return;
  106. let data = JSON.parse(decodeURIComponent(scriptTag.innerHTML));
  107.  
  108. for (const prop in data) {
  109. if (prop !== "_location" && prop !== "app") {
  110. let userInfo = data[prop].user.user;
  111. userData.push(
  112. userInfo.nickname, userInfo.followingCount, userInfo.mplatformFollowersCount,
  113. userInfo.totalFavorited, '\t' + (userInfo.uniqueId === "" ? userInfo.uniqueId : userInfo.shortId), userInfo.ipLocation,
  114. userInfo.age, '"' + (userInfo.desc === undefined ? '' : userInfo.desc) + '"', userInfo.awemeCount, "https://www.douyin.com/user/" + userInfo.secUid
  115. );
  116. }
  117. }
  118. timer = setTimeout(() => createDownloadButton(), 1000);
  119. }
  120.  
  121. function copyToClipboard(text) {
  122. try {
  123. const textarea = document.createElement("textarea");
  124. textarea.setAttribute('readonly', 'readonly');
  125. textarea.value = text;
  126. document.body.appendChild(textarea);
  127. textarea.select();
  128. let flag = document.execCommand("copy");
  129. document.body.removeChild(textarea);
  130. return flag;
  131. } catch (e) {
  132. console.log(e);
  133. return false;
  134. }
  135. }
  136.  
  137. function openLink(url) {
  138. const link = document.createElement('a');
  139. link.href = url;
  140. link.target = "_blank";
  141. document.body.appendChild(link);
  142. link.click();
  143. document.body.removeChild(link);
  144. }
  145.  
  146. function createVideoButton(text, top, func) {
  147. const button = document.createElement("button");
  148. button.textContent = text;
  149. button.style.position = "absolute";
  150. button.style.right = "0px";
  151. button.style.top = top;
  152. button.style.opacity = "0.5";
  153. button.addEventListener("click", func);
  154. return button;
  155. }
  156.  
  157. function createDownloadButton() {
  158. let targetNodes = document.querySelectorAll("ul.EZC0YBrG > li.Eie04v01 > div > a");
  159. for (let i = 0; i < targetNodes.length; i++) {
  160. let targetNode = targetNodes[i];
  161. if (targetNode.dataset.added)
  162. continue;
  163. const button2 = createVideoButton("复制链接", "0px", (event) => {
  164. event.preventDefault();
  165. event.stopPropagation();
  166. if (copyToClipboard(aweme_list[i].url))
  167. button2.textContent = "复制成功";
  168. else
  169. button2.textContent = "复制失败";
  170. setTimeout(() => {
  171. button2.textContent = '复制链接';
  172. }, 2000);
  173. });
  174. targetNode.appendChild(button2);
  175. const button3 = createVideoButton("打开链接", "21px", (event) => {
  176. event.preventDefault();
  177. event.stopPropagation();
  178. openLink(aweme_list[i].url);
  179. });
  180. targetNode.appendChild(button3);
  181. const button = createVideoButton("下载", "42px", (event) => {
  182. event.preventDefault();
  183. event.stopPropagation();
  184. let xhr = new XMLHttpRequest();
  185. xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
  186. xhr.responseType = 'blob';
  187. xhr.onload = (e) => {
  188. let a = document.createElement('a');
  189. a.href = window.URL.createObjectURL(xhr.response);
  190. a.download = (aweme_list[i].desc ? aweme_list[i].desc.replace(/[\/:*?"<>|]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
  191. a.click()
  192. };
  193. xhr.onprogress = (event) => {
  194. if (event.lengthComputable) {
  195. button.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
  196. }
  197. };
  198. xhr.send();
  199. });
  200. targetNode.appendChild(button);
  201. if (aweme_list[i].images) {
  202. const button4 = createVideoButton("图片打包下载", "63px", (event) => {
  203. event.preventDefault();
  204. event.stopPropagation();
  205. const zip = new JSZip();
  206. console.log(aweme_list[i].images);
  207. button4.textContent = "下载并打包中...";
  208. const promises = aweme_list[i].images.map((link, index) => {
  209. return fetch(link)
  210. .then((response) => response.arrayBuffer())
  211. .then((buffer) => {
  212. zip.file(`image_${index + 1}.jpg`, buffer);
  213. });
  214. });
  215. Promise.all(promises)
  216. .then(() => {
  217. return zip.generateAsync({type: "blob"});
  218. })
  219. .then((content) => {
  220. const link = document.createElement("a");
  221. link.href = URL.createObjectURL(content);
  222. link.download = (aweme_list[i].desc ? aweme_list[i].desc.replace(/[\/:*?"<>|]/g, "") : aweme_list[i].awemeId) + ".zip";
  223. link.click();
  224. button4.textContent = "图片打包完成";
  225. });
  226. });
  227. targetNode.appendChild(button4);
  228. }
  229. targetNode.dataset.added = true;
  230. }
  231. }
  232.  
  233. function createButton(title, top) {
  234. top = top === undefined ? "60px" : top;
  235. const button = document.createElement('button');
  236. button.textContent = title;
  237. button.style.position = 'fixed';
  238. button.style.right = '5px';
  239. button.style.top = top;
  240. button.style.zIndex = '90000';
  241. document.body.appendChild(button);
  242. return button
  243. }
  244.  
  245. function txt2file(txt, filename) {
  246. const blob = new Blob([txt], {type: 'text/plain'});
  247. const url = URL.createObjectURL(blob);
  248. const link = document.createElement('a');
  249. link.href = url;
  250. link.download = filename.replace(/[\/:*?"<>|]/g, "");
  251. document.body.appendChild(link);
  252. link.click();
  253. document.body.removeChild(link);
  254. URL.revokeObjectURL(url);
  255. }
  256.  
  257. function downloadData(encoding) {
  258. let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
  259. text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
  260. aweme_list.forEach(item => {
  261. text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
  262. item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
  263. });
  264. if (encoding === "gbk")
  265. text = str2gbk(text);
  266. txt2file(text, userData[0] + ".csv");
  267. }
  268.  
  269. async function downloadImg() {
  270. const zip = new JSZip();
  271. let flag = true;
  272. for (let [index, aweme] of aweme_list.entries()) {
  273. if (!aweme.images)
  274. continue;
  275. dimg_button.textContent = `${index + 1}.${aweme.desc}...`;
  276. let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.replace(/[\/:*?"<>|]/g, "") : aweme.awemeId));
  277. await Promise.all(aweme.images.map((link, index) => {
  278. return fetch(link)
  279. .then((res) => res.arrayBuffer())
  280. .then((buffer) => {
  281. folder.file(`image_${index + 1}.jpg`, buffer);
  282. });
  283. }));
  284. flag = false;
  285. }
  286. if (flag) {
  287. alert("当前页面未发现图文链接");
  288. return
  289. }
  290. dimg_button.textContent = "图片打包中...";
  291. zip.generateAsync({type: "blob"})
  292. .then((content) => {
  293. const link = document.createElement("a");
  294. link.href = URL.createObjectURL(content);
  295. link.download = userData[0] + ".zip";
  296. link.click();
  297. dimg_button.textContent = "图片打包完成";
  298. });
  299. }
  300.  
  301. function interceptResponse() {
  302. const originalSend = XMLHttpRequest.prototype.send;
  303. XMLHttpRequest.prototype.send = function () {
  304. const self = this;
  305. this.onreadystatechange = function () {
  306. if (self.readyState === 4) {
  307. if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
  308. var json = JSON.parse(self.response);
  309. let post_data = json.aweme_list.map(item => Object.assign(
  310. {"awemeId": item.aweme_id, "desc": item.desc},
  311. {
  312. "diggCount": item.statistics.digg_count,
  313. "commentCount": item.statistics.comment_count,
  314. "collectCount": item.statistics.collect_count,
  315. "shareCount": item.statistics.share_count
  316. },
  317. {
  318. "date": new Date(item.create_time * 1000).toLocaleString(),
  319. "url": item.video.play_addr.url_list[0]
  320. },
  321. {
  322. "images": item.images ? item.images.map(row => row.url_list[3]) : null
  323. }
  324. ));
  325. // console.log(post_data);
  326. // aweme_list = aweme_list.concat(post_data);
  327. aweme_list.push(...post_data);
  328. if (timer !== undefined)
  329. clearTimeout(timer);
  330. timer = setTimeout(() => createDownloadButton(), 500);
  331. }
  332. }
  333. };
  334. originalSend.apply(this, arguments);
  335. };
  336. }
  337.  
  338. function scrollPageToBottom() {
  339. const SCROLL_DELAY = 1000; // Adjust the delay between each scroll action (in milliseconds)
  340. let scrollInterval;
  341.  
  342. function getScrollPosition() {
  343. return scrollY || pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0;
  344. }
  345.  
  346. function scrollToBottom() {
  347. scrollTo(0, document.body.scrollHeight);
  348. }
  349.  
  350. function hasReachedBottom() {
  351. return getScrollPosition() >= (document.body.scrollHeight - innerHeight);
  352. }
  353.  
  354. function scrollLoop() {
  355. if (!hasReachedBottom()) {
  356. scrollToBottom();
  357. } else {
  358. console.log("Reached the bottom of the page!");
  359. clearInterval(scrollInterval);
  360. }
  361. }
  362.  
  363. function startScrolling() {
  364. scrollInterval = setInterval(scrollLoop, SCROLL_DELAY);
  365. }
  366.  
  367. let button = createButton('开启自动下拉到底', '60px');
  368. button.addEventListener('click', startScrolling);
  369. }
  370.  
  371. scrollPageToBottom();
  372. interceptResponse();
  373. window.onload = () => {
  374. extractDataFromScript();
  375. createButton("下载已加载数据", "81px").addEventListener('click', (e) => downloadData("gbk"));
  376. dimg_button = createButton("图文批量打包下载", "102px");
  377. dimg_button.addEventListener('click', (e) => downloadImg());
  378. };
  379. })();