douyin-user-data-download

下载抖音用户主页数据!

当前为 2024-06-03 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name douyin-user-data-download
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.3.1
  5. // @description 下载抖音用户主页数据!
  6. // @author xxmdmst
  7. // @match https://www.douyin.com/user/*
  8. // @icon https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
  9. // @grant none
  10. // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
  11. // @license MIT
  12. // ==/UserScript==
  13.  
  14. (function() {
  15. let table;
  16.  
  17. function initGbkTable() {
  18. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  19. const ranges = [
  20. [0xA1, 0xA9, 0xA1, 0xFE],
  21. [0xB0, 0xF7, 0xA1, 0xFE],
  22. [0x81, 0xA0, 0x40, 0xFE],
  23. [0xAA, 0xFE, 0x40, 0xA0],
  24. [0xA8, 0xA9, 0x40, 0xA0],
  25. [0xAA, 0xAF, 0xA1, 0xFE],
  26. [0xF8, 0xFE, 0xA1, 0xFE],
  27. [0xA1, 0xA7, 0x40, 0xA0],
  28. ];
  29. const codes = new Uint16Array(23940);
  30. let i = 0;
  31.  
  32. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  33. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  34. if (b2 !== 0x7F) {
  35. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  36. codes[i++] = b2 << 8 | b1
  37. }
  38. }
  39. }
  40. }
  41. table = new Uint16Array(65536);
  42. table.fill(0xFFFF);
  43. const str = new TextDecoder('gbk').decode(codes);
  44. for (let i = 0; i < str.length; i++) {
  45. table[str.charCodeAt(i)] = codes[i]
  46. }
  47. }
  48.  
  49. function str2gbk(str, opt = {}) {
  50. if (!table) {
  51. initGbkTable()
  52. }
  53. const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
  54. const defaultOnAlloc = NodeJsBufAlloc
  55. ? (len) => NodeJsBufAlloc(len)
  56. : (len) => new Uint8Array(len);
  57. const defaultOnError = () => 63;
  58. const onAlloc = opt.onAlloc || defaultOnAlloc;
  59. const onError = opt.onError || defaultOnError;
  60.  
  61. const buf = onAlloc(str.length * 2);
  62. let n = 0;
  63.  
  64. for (let i = 0; i < str.length; i++) {
  65. const code = str.charCodeAt(i);
  66. if (code < 0x80) {
  67. buf[n++] = code;
  68. continue
  69. }
  70. const gbk = table[code];
  71.  
  72. if (gbk !== 0xFFFF) {
  73. buf[n++] = gbk;
  74. buf[n++] = gbk >> 8
  75. } else if (code === 8364) {
  76. buf[n++] = 0x80
  77. } else {
  78. const ret = onError(i, str);
  79. if (ret === -1) {
  80. break
  81. }
  82. if (ret > 0xFF) {
  83. buf[n++] = ret;
  84. buf[n++] = ret >> 8
  85. } else {
  86. buf[n++] = ret
  87. }
  88. }
  89. }
  90. return buf.subarray(0, n)
  91. }
  92. let aweme_list = [];
  93. let numMsg1,numMsg2;
  94. let userKey = [
  95. "昵称", "关注", "粉丝", "获赞",
  96. "抖音号", "IP属地", "性别",
  97. "位置", "签名", "作品数", "主页"
  98. ];
  99. let userData = [];
  100. let timer, dimg_button;
  101. function createVideoButton(text, top, func) {
  102. const button = document.createElement("button");
  103. button.textContent = text;
  104. button.style.position = "absolute";
  105. button.style.right = "0px";
  106. button.style.top = top;
  107. button.style.opacity = "0.5";
  108. button.addEventListener("click", func);
  109. return button;
  110. }
  111. function createDownloadButton() {
  112. let targetNodes = document.querySelectorAll("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] > li a");
  113. for (let i = 0; i < targetNodes.length; i++) {
  114. let targetNode = targetNodes[i];
  115. if (targetNode.dataset.added) {
  116. continue;
  117. }
  118. const button2 = createVideoButton("复制链接", "0px", (event) => {
  119. event.preventDefault();
  120. event.stopPropagation();
  121. navigator.clipboard.writeText(aweme_list[i].url).then(() => {
  122. button2.textContent = "复制成功";
  123. }).catch((e) => {
  124. button2.textContent = "复制失败";
  125. });
  126. setTimeout(() => {
  127. button2.textContent = '复制链接';
  128. }, 2000);
  129. });
  130. targetNode.appendChild(button2);
  131. const button3 = createVideoButton("打开链接", "21px", (event) => {
  132. event.preventDefault();
  133. event.stopPropagation();
  134. openLink(aweme_list[i].url);
  135. });
  136. targetNode.appendChild(button3);
  137. const button = createVideoButton("下载", "42px", (event) => {
  138. event.preventDefault();
  139. event.stopPropagation();
  140. let xhr = new XMLHttpRequest();
  141. xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
  142. xhr.responseType = 'blob';
  143. xhr.onload = (e) => {
  144. let a = document.createElement('a');
  145. a.href = window.URL.createObjectURL(xhr.response);
  146. a.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0,20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
  147. a.click()
  148. };
  149. xhr.onprogress = (event) => {
  150. if (event.lengthComputable) {
  151. button.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
  152. }
  153. };
  154. xhr.send();
  155. });
  156. targetNode.appendChild(button);
  157. if (aweme_list[i].images) {
  158. const button4 = createVideoButton("图片打包下载", "63px", (event) => {
  159. event.preventDefault();
  160. event.stopPropagation();
  161. const zip = new JSZip();
  162. console.log(aweme_list[i].images);
  163. button4.textContent = "下载并打包中...";
  164. const promises = aweme_list[i].images.map((link, index) => {
  165. return fetch(link)
  166. .then((response) => response.arrayBuffer())
  167. .then((buffer) => {
  168. zip.file(`image_${index + 1}.jpg`, buffer);
  169. });
  170. });
  171. Promise.all(promises)
  172. .then(() => {
  173. return zip.generateAsync({type: "blob"});
  174. })
  175. .then((content) => {
  176. const link = document.createElement("a");
  177. link.href = URL.createObjectURL(content);
  178. link.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0,20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + ".zip";
  179. link.click();
  180. button4.textContent = "图片打包完成";
  181. });
  182. });
  183. targetNode.appendChild(button4);
  184. }
  185. targetNode.dataset.added = true;
  186. }
  187. }
  188. function interceptResponse() {
  189. const originalSend = XMLHttpRequest.prototype.send;
  190. XMLHttpRequest.prototype.send = function () {
  191. const self = this;
  192. this.onreadystatechange = function () {
  193. if (self.readyState === 4) {
  194. if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
  195. var json = JSON.parse(self.response);
  196. let post_data = json.aweme_list.map(item => Object.assign(
  197. {"awemeId": item.aweme_id, "desc": item.desc.replace(/[^\x00-\x7F\u4E00-\u9FFF\uFF00-\uFFEF]+/g, " ").trim()},
  198. {
  199. "diggCount": item.statistics.digg_count,
  200. "commentCount": item.statistics.comment_count,
  201. "collectCount": item.statistics.collect_count,
  202. "shareCount": item.statistics.share_count
  203. },
  204. {
  205. "date": new Date(item.create_time * 1000).toLocaleString(),
  206. "url": item.video.play_addr.url_list[0]
  207. },
  208. {
  209. "images": item.images ? item.images.map(row => row.url_list.pop()) : null
  210. }
  211. ));
  212. aweme_list.push(...post_data);
  213. numMsg1.innerText = `已加载${aweme_list.length}条`;
  214. numMsg2.innerText = `图集${aweme_list.filter(a=>a.images).length}条`;
  215. if (timer !== undefined)
  216. clearTimeout(timer);
  217. timer = setTimeout(createDownloadButton, 500);
  218. dimg_button.textContent = "图文批量打包下载";
  219. } else if(self._url.indexOf("/aweme/v1/web/user/profile/other") > -1){
  220. var userInfo = JSON.parse(self.response).user;
  221. userData.push(
  222. userInfo.nickname, userInfo.following_count, userInfo.mplatform_followers_count,
  223. userInfo.total_favorited, '\t' + (userInfo.unique_id ? userInfo.unique_id : userInfo.short_id), userInfo.ip_location,userInfo.gender===2?"女":"男",
  224. `${userInfo.city${userInfo.district}`, '"' + (userInfo.signature ?userInfo.signature:'') + '"', userInfo.aweme_count, "https://www.douyin.com/user/" + userInfo.sec_uid
  225. );
  226. }
  227. }
  228. };
  229. originalSend.apply(this, arguments);
  230. };
  231. }
  232. interceptResponse();
  233.  
  234. // function copyToClipboard(text) {
  235. // return navigator.clipboard.writeText(text);
  236. // }
  237. function openLink(url) {
  238. const link = document.createElement('a');
  239. link.href = url;
  240. link.target = "_blank";
  241. document.body.appendChild(link);
  242. link.click();
  243. document.body.removeChild(link);
  244. }
  245. function txt2file(txt, filename) {
  246. const blob = new Blob([txt], {type: 'text/plain'});
  247. const url = URL.createObjectURL(blob);
  248. const link = document.createElement('a');
  249. link.href = url;
  250. link.download = filename.slice(0,20).replace(/[\/:*?"<>|\s]/g, "");
  251. document.body.appendChild(link);
  252. link.click();
  253. document.body.removeChild(link);
  254. URL.revokeObjectURL(url);
  255. }
  256. function downloadData(encoding) {
  257. let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
  258. text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
  259. aweme_list.forEach(item => {
  260. text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
  261. item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
  262. });
  263. if (encoding === "gbk"){
  264. text = str2gbk(text);
  265. }
  266. txt2file(text, userData[0] + ".csv");
  267. }
  268. function createButton(title, top) {
  269. top = top === undefined ? "60px" : top;
  270. const button = document.createElement('button');
  271. button.textContent = title;
  272. button.style.position = 'fixed';
  273. button.style.right = '5px';
  274. button.style.top = top;
  275. button.style.zIndex = '90000';
  276. button.style.opacity = "0.5";
  277. document.body.appendChild(button);
  278. return button
  279. }
  280.  
  281. function createDownloadAllData(){
  282. const label = document.createElement('label');
  283. label.setAttribute('for', 'gbk');
  284. label.innerText = 'gbk';
  285. label.style.position = 'fixed';
  286. label.style.right = '86px';
  287. label.style.top = '81px';
  288. label.style.color = 'white';
  289. label.style.zIndex = '90000';
  290. label.style.opacity = "0.8";
  291. const checkbox = document.createElement('input');
  292. checkbox.setAttribute('type', 'checkbox');
  293. checkbox.setAttribute('id', 'gbk');
  294. checkbox.style.position = 'fixed';
  295. checkbox.style.right = '106px';
  296. checkbox.style.top = '84px';
  297. checkbox.style.zIndex = '90000';
  298. document.body.appendChild(label);
  299. document.body.appendChild(checkbox);
  300. createButton("下载已加载数据", "81px").addEventListener('click', (e) => downloadData(checkbox.checked?"gbk":""));
  301. }
  302. function createScrollPageToBottom() {
  303. let scrollInterval;
  304. function scrollLoop() {
  305. let scrollPosition=scrollY || pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0;
  306. let height=document.body.scrollHeight - innerHeight;
  307. if (scrollPosition<height) {
  308. scrollTo(0, document.body.scrollHeight);
  309. } else {
  310. clearInterval(scrollInterval);
  311. scrollInterval=null;
  312. button.textContent = "已下拉到底!";
  313. }
  314. }
  315. let button = createButton('开启自动下拉到底', '60px');
  316. button.addEventListener('click', ()=>{
  317. if(!scrollInterval){
  318. scrollInterval = setInterval(scrollLoop, 1000);
  319. button.textContent = "停止自动下拉";
  320. } else {
  321. clearInterval(scrollInterval);
  322. scrollInterval=null;
  323. button.textContent = "开启自动下拉到底";
  324. }
  325. });
  326. numMsg1 = document.createElement('span');
  327. numMsg1.innerText = '已加载';
  328. numMsg1.style.color = 'white';
  329. numMsg1.style.position = 'fixed';
  330. numMsg1.style.right = '98px';
  331. numMsg1.style.top = '60px';
  332. numMsg1.style.color = 'white';
  333. numMsg1.style.zIndex = '90000';
  334. numMsg1.style.opacity = "0.5";
  335. document.body.appendChild(numMsg1);
  336. numMsg2 = document.createElement('span');
  337. numMsg2.innerText = '';
  338. numMsg2.style.color = 'white';
  339. numMsg2.style.position = 'fixed';
  340. numMsg2.style.right = '98px';
  341. numMsg2.style.top = '102px';
  342. numMsg2.style.color = 'white';
  343. numMsg2.style.zIndex = '90000';
  344. numMsg2.style.opacity = "0.5";
  345. document.body.appendChild(numMsg2);
  346. }
  347. async function downloadImg() {
  348. const zip = new JSZip();
  349. let flag = true;
  350. for (let [index, aweme] of aweme_list.filter(a=>a.images).entries()) {
  351. dimg_button.textContent = `${index + 1}.${aweme.desc}...`;
  352. let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.slice(0,20).replace(/[\/:*?"<>|\s]/g, "") : aweme.awemeId));
  353. await Promise.all(aweme.images.map((link, index) => {
  354. return fetch(link)
  355. .then((res) => res.arrayBuffer())
  356. .then((buffer) => {
  357. folder.file(`image_${index + 1}.jpg`, buffer);
  358. });
  359. }));
  360. flag = false;
  361. }
  362. if (flag) {
  363. alert("当前页面未发现图文链接");
  364. return
  365. }
  366. dimg_button.textContent = "图片打包中...";
  367. zip.generateAsync({type: "blob"})
  368. .then((content) => {
  369. const link = document.createElement("a");
  370. link.href = URL.createObjectURL(content);
  371. link.download = userData[0].slice(0,20).replace(/[\/:*?"<>|\s]/g, "") + ".zip";
  372. link.click();
  373. dimg_button.textContent = "图片打包完成";
  374. });
  375. }
  376. window.onload = () => {
  377. createDownloadAllData();
  378. createScrollPageToBottom();
  379. dimg_button = createButton("图文批量打包下载", "102px");
  380. dimg_button.addEventListener('click', downloadImg);
  381. };
  382. })();