douyin-user-data-download

下载抖音用户主页数据!

当前为 2023-07-29 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name douyin-user-data-download
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.2.6.1
  5. // @description 下载抖音用户主页数据!
  6. // @author xxmdmst
  7. // @match https://www.douyin.com/user/*
  8. // @icon https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
  9. // @grant none
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. (function () {
  14. 'use strict';
  15. let table;
  16.  
  17. function initGbkTable() {
  18. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  19. const ranges = [
  20. [0xA1, 0xA9, 0xA1, 0xFE],
  21. [0xB0, 0xF7, 0xA1, 0xFE],
  22. [0x81, 0xA0, 0x40, 0xFE],
  23. [0xAA, 0xFE, 0x40, 0xA0],
  24. [0xA8, 0xA9, 0x40, 0xA0],
  25. [0xAA, 0xAF, 0xA1, 0xFE],
  26. [0xF8, 0xFE, 0xA1, 0xFE],
  27. [0xA1, 0xA7, 0x40, 0xA0],
  28. ];
  29. const codes = new Uint16Array(23940);
  30. let i = 0;
  31.  
  32. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  33. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  34. if (b2 !== 0x7F) {
  35. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  36. codes[i++] = b2 << 8 | b1
  37. }
  38. }
  39. }
  40. }
  41. table = new Uint16Array(65536);
  42. table.fill(0xFFFF);
  43. const str = new TextDecoder('gbk').decode(codes);
  44. for (let i = 0; i < str.length; i++) {
  45. table[str.charCodeAt(i)] = codes[i]
  46. }
  47. }
  48.  
  49. function str2gbk(str, opt = {}) {
  50. if (!table) {
  51. initGbkTable()
  52. }
  53. const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
  54. const defaultOnAlloc = NodeJsBufAlloc
  55. ? (len) => NodeJsBufAlloc(len)
  56. : (len) => new Uint8Array(len);
  57. const defaultOnError = () => 63;
  58. const onAlloc = opt.onAlloc || defaultOnAlloc;
  59. const onError = opt.onError || defaultOnError;
  60.  
  61. const buf = onAlloc(str.length * 2);
  62. let n = 0;
  63.  
  64. for (let i = 0; i < str.length; i++) {
  65. const code = str.charCodeAt(i);
  66. if (code < 0x80) {
  67. buf[n++] = code;
  68. continue
  69. }
  70. const gbk = table[code];
  71.  
  72. if (gbk !== 0xFFFF) {
  73. buf[n++] = gbk;
  74. buf[n++] = gbk >> 8
  75. } else if (code === 8364) {
  76. buf[n++] = 0x80
  77. } else {
  78. const ret = onError(i, str);
  79. if (ret === -1) {
  80. break
  81. }
  82. if (ret > 0xFF) {
  83. buf[n++] = ret;
  84. buf[n++] = ret >> 8
  85. } else {
  86. buf[n++] = ret
  87. }
  88. }
  89. }
  90. return buf.subarray(0, n)
  91. }
  92.  
  93. window.aweme_list = [];
  94. let userKey = [
  95. "昵称", "关注", "粉丝",
  96. "获赞", "抖音号", "IP属地",
  97. "年龄", "签名", "作品数", "主页"
  98. ];
  99. window.userData = [];
  100. let timer;
  101.  
  102. function extractDataFromScript() {
  103. const scriptTag = document.getElementById('RENDER_DATA');
  104. if (!scriptTag) return;
  105. let data = JSON.parse(decodeURIComponent(scriptTag.innerHTML));
  106.  
  107. for (const prop in data) {
  108. if (data.hasOwnProperty(prop) && prop !== "_location" && prop !== "app") {
  109. const user = data[prop];
  110. let userInfo = user.user.user;
  111. userData.push(
  112. userInfo.nickname, userInfo.followingCount, userInfo.mplatformFollowersCount,
  113. userInfo.totalFavorited, (userInfo.uniqueId === "" ? userInfo.uniqueId : userInfo.shortId), userInfo.ipLocation,
  114. userInfo.age, '"' + (userInfo.desc === undefined ? '' : userInfo.desc) + '"', userInfo.awemeCount, "https://www.douyin.com/user/" + userInfo.secUid
  115. );
  116. let post_data = user.post.data.map(item => Object.assign(
  117. {"awemeId": item.awemeId, "desc": item.desc},
  118. item.stats,
  119. {
  120. "date": new Date(item.createTime * 1000).toLocaleString(),
  121. "url": "https:" + item.video.playAddr[0].src
  122. }));
  123. aweme_list = aweme_list.concat(post_data);
  124. }
  125. }
  126. timer = setTimeout(() => createDownloadButton(), 1000);
  127. }
  128.  
  129. function copyToClipboard(text) {
  130. try {
  131. const textarea = document.createElement("textarea");
  132. textarea.setAttribute('readonly', 'readonly');
  133. textarea.value = text;
  134. document.body.appendChild(textarea);
  135. textarea.select();
  136. let flag = document.execCommand("copy");
  137. document.body.removeChild(textarea);
  138. return flag;
  139. } catch (e) {
  140. console.log(e);
  141. return false;
  142. }
  143. }
  144.  
  145. function openLink(url) {
  146. const link = document.createElement('a');
  147. link.href = url;
  148. link.target = "_blank";
  149. document.body.appendChild(link);
  150. link.click();
  151. document.body.removeChild(link);
  152. }
  153.  
  154. function createVideoButton(text, top, func) {
  155. const button = document.createElement("button");
  156. button.textContent = text;
  157. button.style.position = "absolute";
  158. button.style.right = "0px";
  159. button.style.top = top;
  160. button.style.opacity = "0.5";
  161. button.addEventListener("click", func);
  162. return button;
  163. }
  164.  
  165. function createDownloadButton() {
  166. let targetNodes = document.querySelectorAll("ul.EZC0YBrG > li.Eie04v01 > div > a");
  167. for (let i = 0; i < targetNodes.length; i++) {
  168. let targetNode = targetNodes[i];
  169. if (targetNode.dataset.added)
  170. continue;
  171. const button2 = createVideoButton("复制链接", "0px", (event) => {
  172. event.preventDefault();
  173. event.stopPropagation();
  174. if (copyToClipboard(aweme_list[i].url))
  175. button2.textContent = "复制成功";
  176. else
  177. button2.textContent = "复制失败";
  178. setTimeout(() => {
  179. button2.textContent = '复制链接';
  180. }, 2000);
  181. });
  182. targetNode.appendChild(button2);
  183. const button3 = createVideoButton("打开链接", "21px", (event) => {
  184. event.preventDefault();
  185. event.stopPropagation();
  186. openLink(aweme_list[i].url);
  187. });
  188. targetNode.appendChild(button3);
  189. const button = createVideoButton("下载", "42px", (event) => {
  190. event.preventDefault();
  191. event.stopPropagation();
  192. let xhr = new XMLHttpRequest();
  193. xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
  194. xhr.responseType = 'blob';
  195. xhr.onload = (e) => {
  196. let a = document.createElement('a');
  197. a.href = window.URL.createObjectURL(xhr.response);
  198. a.download = (aweme_list[i].desc ? aweme_list[i].desc.replace(/[\/:*?"<>|]/g, "") : aweme_list[i].awemeId) + ".mp4";
  199. a.click()
  200. };
  201. xhr.onprogress = (event) => {
  202. if (event.lengthComputable) {
  203. button.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
  204. }
  205. };
  206. xhr.send();
  207. });
  208. targetNode.appendChild(button);
  209. targetNode.dataset.added = true;
  210. }
  211. }
  212.  
  213. function createButton(title, top) {
  214. top = top === undefined ? "60px" : top;
  215. const button = document.createElement('button');
  216. button.textContent = title;
  217. button.style.position = 'fixed';
  218. button.style.right = '5px';
  219. button.style.top = top;
  220. button.style.zIndex = '90000';
  221. document.body.appendChild(button);
  222. return button
  223. }
  224.  
  225. function txt2file(txt, filename) {
  226. const blob = new Blob([txt], {type: 'text/plain'});
  227. const url = URL.createObjectURL(blob);
  228. const link = document.createElement('a');
  229. link.href = url;
  230. link.download = filename.replace(/[\/:*?"<>|]/g, "");
  231. document.body.appendChild(link);
  232. link.click();
  233. document.body.removeChild(link);
  234. URL.revokeObjectURL(url);
  235. }
  236.  
  237. function downloadData(encoding) {
  238. let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
  239. text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
  240. aweme_list.forEach(item => {
  241. text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
  242. item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
  243. });
  244. if (encoding === "gbk")
  245. text = str2gbk(text);
  246. txt2file(text, userData[0] + ".csv");
  247. }
  248.  
  249. function interceptResponse() {
  250. const originalSend = XMLHttpRequest.prototype.send;
  251. XMLHttpRequest.prototype.send = function () {
  252. const self = this;
  253. this.onreadystatechange = function () {
  254. if (self.readyState === 4) {
  255. if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
  256. var json = JSON.parse(self.response);
  257. let post_data = json.aweme_list.map(item => Object.assign(
  258. {"awemeId": item.aweme_id, "desc": item.desc},
  259. {
  260. "diggCount": item.statistics.digg_count,
  261. "commentCount": item.statistics.comment_count,
  262. "collectCount": item.statistics.collect_count,
  263. "shareCount": item.statistics.share_count
  264. },
  265. {
  266. "date": new Date(item.create_time * 1000).toLocaleString(),
  267. "url": item.video.play_addr.url_list[0]
  268. }));
  269. aweme_list = aweme_list.concat(post_data);
  270. if (timer !== undefined)
  271. clearTimeout(timer);
  272. timer = setTimeout(() => createDownloadButton(), 1000);
  273. }
  274. }
  275. };
  276. originalSend.apply(this, arguments);
  277. };
  278. }
  279.  
  280. function scrollPageToBottom() {
  281. const SCROLL_DELAY = 1000; // Adjust the delay between each scroll action (in milliseconds)
  282. let scrollInterval;
  283.  
  284. function getScrollPosition() {
  285. return scrollY || pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0;
  286. }
  287.  
  288. function scrollToBottom() {
  289. scrollTo(0, document.body.scrollHeight);
  290. }
  291.  
  292. function hasReachedBottom() {
  293. return getScrollPosition() >= (document.body.scrollHeight - innerHeight);
  294. }
  295.  
  296. function scrollLoop() {
  297. if (!hasReachedBottom()) {
  298. scrollToBottom();
  299. } else {
  300. console.log("Reached the bottom of the page!");
  301. clearInterval(scrollInterval);
  302. }
  303. }
  304.  
  305. function startScrolling() {
  306. scrollInterval = setInterval(scrollLoop, SCROLL_DELAY);
  307. }
  308.  
  309. let button = createButton('开启自动下拉到底', '60px');
  310. button.addEventListener('click', startScrolling);
  311. }
  312.  
  313. // To start scrolling, call the function:
  314. scrollPageToBottom();
  315. interceptResponse();
  316. window.onload = () => {
  317. extractDataFromScript();
  318. let button = createButton("下载已加载数据(wps)", "81px");
  319. button.addEventListener('click', downloadData);
  320. createButton("下载gbk数据(office)", "102px").addEventListener('click', (e) => downloadData("gbk"));
  321. };
  322. })();