douyin-user-data-download

下载抖音用户主页数据!

当前为 2024-06-04 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name douyin-user-data-download
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.3.3
  5. // @description 下载抖音用户主页数据!
  6. // @author xxmdmst
  7. // @match https://www.douyin.com/*
  8. // @icon https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
  9. // @grant none
  10. // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
  11. // @license MIT
  12. // ==/UserScript==
  13.  
  14. (function() {
  15. let table;
  16.  
  17. function initGbkTable() {
  18. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  19. const ranges = [
  20. [0xA1, 0xA9, 0xA1, 0xFE],
  21. [0xB0, 0xF7, 0xA1, 0xFE],
  22. [0x81, 0xA0, 0x40, 0xFE],
  23. [0xAA, 0xFE, 0x40, 0xA0],
  24. [0xA8, 0xA9, 0x40, 0xA0],
  25. [0xAA, 0xAF, 0xA1, 0xFE],
  26. [0xF8, 0xFE, 0xA1, 0xFE],
  27. [0xA1, 0xA7, 0x40, 0xA0],
  28. ];
  29. const codes = new Uint16Array(23940);
  30. let i = 0;
  31.  
  32. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  33. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  34. if (b2 !== 0x7F) {
  35. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  36. codes[i++] = b2 << 8 | b1
  37. }
  38. }
  39. }
  40. }
  41. table = new Uint16Array(65536);
  42. table.fill(0xFFFF);
  43. const str = new TextDecoder('gbk').decode(codes);
  44. for (let i = 0; i < str.length; i++) {
  45. table[str.charCodeAt(i)] = codes[i]
  46. }
  47. }
  48.  
  49. function str2gbk(str, opt = {}) {
  50. if (!table) {
  51. initGbkTable()
  52. }
  53. const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
  54. const defaultOnAlloc = NodeJsBufAlloc
  55. ? (len) => NodeJsBufAlloc(len)
  56. : (len) => new Uint8Array(len);
  57. const defaultOnError = () => 63;
  58. const onAlloc = opt.onAlloc || defaultOnAlloc;
  59. const onError = opt.onError || defaultOnError;
  60.  
  61. const buf = onAlloc(str.length * 2);
  62. let n = 0;
  63.  
  64. for (let i = 0; i < str.length; i++) {
  65. const code = str.charCodeAt(i);
  66. if (code < 0x80) {
  67. buf[n++] = code;
  68. continue
  69. }
  70. const gbk = table[code];
  71.  
  72. if (gbk !== 0xFFFF) {
  73. buf[n++] = gbk;
  74. buf[n++] = gbk >> 8
  75. } else if (code === 8364) {
  76. buf[n++] = 0x80
  77. } else {
  78. const ret = onError(i, str);
  79. if (ret === -1) {
  80. break
  81. }
  82. if (ret > 0xFF) {
  83. buf[n++] = ret;
  84. buf[n++] = ret >> 8
  85. } else {
  86. buf[n++] = ret
  87. }
  88. }
  89. }
  90. return buf.subarray(0, n)
  91. }
  92. let aweme_list = [];
  93. let numMsg1,numMsg2;
  94. let userKey = [
  95. "昵称", "关注", "粉丝", "获赞",
  96. "抖音号", "IP属地", "性别",
  97. "位置", "签名", "作品数", "主页"
  98. ];
  99. let userData = [];
  100. let timer, dimg_button;
  101. function createVideoButton(text, top, func) {
  102. const button = document.createElement("button");
  103. button.textContent = text;
  104. button.style.position = "absolute";
  105. button.style.right = "0px";
  106. button.style.top = top;
  107. button.style.opacity = "0.5";
  108. button.addEventListener("click", func);
  109. return button;
  110. }
  111. function openLink(url) {
  112. const link = document.createElement('a');
  113. link.href = url;
  114. link.target = "_blank";
  115. document.body.appendChild(link);
  116. link.click();
  117. document.body.removeChild(link);
  118. }
  119. function createDownloadButton() {
  120. let targetNodes = document.querySelectorAll("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] > li a");
  121. for (let i = 0; i < targetNodes.length; i++) {
  122. let targetNode = targetNodes[i];
  123. if (targetNode.dataset.added) {
  124. continue;
  125. }
  126. // const button2 = createVideoButton("复制链接", "0px", (event) => {
  127. // event.preventDefault();
  128. // event.stopPropagation();
  129. // navigator.clipboard.writeText(aweme_list[i].url).then(() => {
  130. // button2.textContent = "复制成功";
  131. // }).catch((e) => {
  132. // button2.textContent = "复制失败";
  133. // });
  134. // setTimeout(() => button2.textContent = '复制链接', 2000);
  135. // });
  136. // targetNode.appendChild(button2);
  137. const button3 = createVideoButton("打开视频源", "0px", (event) => {
  138. event.preventDefault();
  139. event.stopPropagation();
  140. openLink(aweme_list[i].url);
  141. });
  142. targetNode.appendChild(button3);
  143. const button = createVideoButton("下载视频", "21px", (event) => {
  144. event.preventDefault();
  145. event.stopPropagation();
  146. let xhr = new XMLHttpRequest();
  147. xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
  148. xhr.responseType = 'blob';
  149. xhr.onload = (e) => {
  150. let a = document.createElement('a');
  151. a.href = window.URL.createObjectURL(xhr.response);
  152. a.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0,20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
  153. a.click()
  154. };
  155. xhr.onprogress = (event) => {
  156. if (event.lengthComputable) {
  157. button.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
  158. }
  159. };
  160. xhr.send();
  161. });
  162. targetNode.appendChild(button);
  163. if (aweme_list[i].images) {
  164. const button4 = createVideoButton("图片打包下载", "42px", (event) => {
  165. event.preventDefault();
  166. event.stopPropagation();
  167. const zip = new JSZip();
  168. console.log(aweme_list[i].images);
  169. button4.textContent = "下载并打包中...";
  170. const promises = aweme_list[i].images.map((link, index) => {
  171. return fetch(link)
  172. .then((response) => response.arrayBuffer())
  173. .then((buffer) => {
  174. zip.file(`image_${index + 1}.jpg`, buffer);
  175. });
  176. });
  177. Promise.all(promises)
  178. .then(() => {
  179. return zip.generateAsync({type: "blob"});
  180. })
  181. .then((content) => {
  182. const link = document.createElement("a");
  183. link.href = URL.createObjectURL(content);
  184. link.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0,20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + ".zip";
  185. link.click();
  186. button4.textContent = "图片打包完成";
  187. });
  188. });
  189. targetNode.appendChild(button4);
  190. }
  191. targetNode.dataset.added = true;
  192. }
  193. }
  194. function interceptResponse() {
  195. const originalSend = XMLHttpRequest.prototype.send;
  196. XMLHttpRequest.prototype.send = function () {
  197. const self = this;
  198. this.onreadystatechange = function () {
  199. if (self.readyState === 4) {
  200. if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
  201. var json = JSON.parse(self.response);
  202. let post_data = json.aweme_list.map(item => Object.assign(
  203. {"awemeId": item.aweme_id, "desc": item.desc.replace(/[^\x00-\x7F\u4E00-\u9FFF\uFF00-\uFFEF]+/g, " ").trim()},
  204. {
  205. "diggCount": item.statistics.digg_count,
  206. "commentCount": item.statistics.comment_count,
  207. "collectCount": item.statistics.collect_count,
  208. "shareCount": item.statistics.share_count
  209. },
  210. {
  211. "date": new Date(item.create_time * 1000).toLocaleString(),
  212. "url": item.video.play_addr.url_list[0]
  213. },
  214. {
  215. "images": item.images ? item.images.map(row => row.url_list.pop()) : null
  216. }
  217. ));
  218. aweme_list.push(...post_data);
  219. numMsg1.innerText = `已加载${aweme_list.length}条`;
  220. numMsg2.innerText = `图集${aweme_list.filter(a=>a.images).length}条`;
  221. if (timer !== undefined)
  222. clearTimeout(timer);
  223. timer = setTimeout(createDownloadButton, 500);
  224. dimg_button.textContent = "图文批量打包下载";
  225. } else if(self._url.indexOf("/aweme/v1/web/user/profile/other") > -1){
  226. var userInfo = JSON.parse(self.response).user;
  227. userData.push(
  228. userInfo.nickname, userInfo.following_count, userInfo.mplatform_followers_count,
  229. userInfo.total_favorited, '\t' + (userInfo.unique_id ? userInfo.unique_id : userInfo.short_id), userInfo.ip_location,userInfo.gender===2?"女":"男",
  230. `${userInfo.city${userInfo.district}`, '"' + (userInfo.signature ?userInfo.signature:'') + '"', userInfo.aweme_count, "https://www.douyin.com/user/" + userInfo.sec_uid
  231. );
  232. }
  233. }
  234. };
  235. originalSend.apply(this, arguments);
  236. };
  237. }
  238. interceptResponse();
  239.  
  240. function txt2file(txt, filename) {
  241. const blob = new Blob([txt], {type: 'text/plain'});
  242. const url = URL.createObjectURL(blob);
  243. const link = document.createElement('a');
  244. link.href = url;
  245. link.download = filename.slice(0,20).replace(/[\/:*?"<>|\s]/g, "");
  246. document.body.appendChild(link);
  247. link.click();
  248. document.body.removeChild(link);
  249. URL.revokeObjectURL(url);
  250. }
  251. function downloadData(encoding) {
  252. let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
  253. text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
  254. aweme_list.forEach(item => {
  255. text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
  256. item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
  257. });
  258. if (encoding === "gbk"){
  259. text = str2gbk(text);
  260. }
  261. txt2file(text, userData[0] + ".csv");
  262. }
  263. function createButton(title, top) {
  264. top = top === undefined ? "60px" : top;
  265. const button = document.createElement('button');
  266. button.textContent = title;
  267. button.style.position = 'fixed';
  268. button.style.right = '5px';
  269. button.style.top = top;
  270. button.style.zIndex = '90000';
  271. button.style.opacity = "0.5";
  272. document.body.appendChild(button);
  273. return button
  274. }
  275.  
  276. function createDownloadAllData(){
  277. const label = document.createElement('label');
  278. label.setAttribute('for', 'gbk');
  279. label.innerText = 'gbk';
  280. label.style.position = 'fixed';
  281. label.style.right = '86px';
  282. label.style.top = '81px';
  283. label.style.color = 'white';
  284. label.style.zIndex = '90000';
  285. label.style.opacity = "0.8";
  286. const checkbox = document.createElement('input');
  287. checkbox.setAttribute('type', 'checkbox');
  288. checkbox.setAttribute('id', 'gbk');
  289. checkbox.style.position = 'fixed';
  290. checkbox.style.right = '106px';
  291. checkbox.style.top = '84px';
  292. checkbox.style.zIndex = '90000';
  293. document.body.appendChild(label);
  294. document.body.appendChild(checkbox);
  295. createButton("下载已加载数据", "81px").addEventListener('click', (e) => downloadData(checkbox.checked?"gbk":""));
  296. }
  297. function createScrollPageToBottom() {
  298. let scrollInterval;
  299. function scrollLoop() {
  300. let endText=document.querySelector("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] + div div").innerText;
  301. if (aweme_list.length < userData[9] || !endText) {
  302. scrollTo(0, document.body.scrollHeight);
  303. } else {
  304. clearInterval(scrollInterval);
  305. scrollInterval=null;
  306. button.textContent = "已加载全部!";
  307. }
  308. }
  309. let button = createButton('开启自动下拉', '60px');
  310. button.addEventListener('click', ()=>{
  311. if(!scrollInterval){
  312. scrollInterval = setInterval(scrollLoop, 1200);
  313. button.textContent = "停止自动下拉";
  314. } else {
  315. clearInterval(scrollInterval);
  316. scrollInterval=null;
  317. button.textContent = "开启自动下拉";
  318. }
  319. });
  320. numMsg1 = document.createElement('span');
  321. numMsg1.innerText = '已加载';
  322. numMsg1.style.color = 'white';
  323. numMsg1.style.position = 'fixed';
  324. numMsg1.style.right = '98px';
  325. numMsg1.style.top = '60px';
  326. numMsg1.style.color = 'white';
  327. numMsg1.style.zIndex = '90000';
  328. numMsg1.style.opacity = "0.5";
  329. document.body.appendChild(numMsg1);
  330. numMsg2 = document.createElement('span');
  331. numMsg2.innerText = '';
  332. numMsg2.style.color = 'white';
  333. numMsg2.style.position = 'fixed';
  334. numMsg2.style.right = '98px';
  335. numMsg2.style.top = '102px';
  336. numMsg2.style.color = 'white';
  337. numMsg2.style.zIndex = '90000';
  338. numMsg2.style.opacity = "0.5";
  339. document.body.appendChild(numMsg2);
  340. }
  341. async function downloadImg() {
  342. const zip = new JSZip();
  343. let flag = true;
  344. for (let [index, aweme] of aweme_list.filter(a=>a.images).entries()) {
  345. dimg_button.textContent = `${index + 1}.${aweme.desc.slice(0,20)}...`;
  346. let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.replace(/[\/:*?"<>|\s]/g, "").slice(0,20).replace(/[.\d]+$/g, "") : aweme.awemeId));
  347. await Promise.all(aweme.images.map((link, index) => {
  348. return fetch(link)
  349. .then((res) => res.arrayBuffer())
  350. .then((buffer) => {
  351. folder.file(`image_${index + 1}.jpg`, buffer);
  352. });
  353. }));
  354. flag = false;
  355. }
  356. if (flag) {
  357. alert("当前页面未发现图文链接");
  358. return
  359. }
  360. dimg_button.textContent = "图片打包中...";
  361. zip.generateAsync({type: "blob"})
  362. .then((content) => {
  363. const link = document.createElement("a");
  364. link.href = URL.createObjectURL(content);
  365. link.download = userData[0].slice(0,20).replace(/[\/:*?"<>|\s]/g, "") + ".zip";
  366. link.click();
  367. dimg_button.textContent = "图片打包完成";
  368. });
  369. }
  370. function douyinVideoDownloader() {
  371. function run(){
  372. let downloadOption = [{name:'打开视频源',id:'toLink'}];
  373. let videoElements = document.querySelectorAll('video');
  374. if(videoElements.length == 0) return;
  375. //把自动播放的video标签选择出来
  376. let playVideoElements=[];
  377. videoElements.forEach(function(element){
  378. let autoplay = element.getAttribute('autoplay');
  379. if(autoplay !== null){
  380. playVideoElements.push(element);
  381. }
  382. })
  383. let videoContainer=location.href.indexOf('modal_id') != -1
  384. ? playVideoElements[0]
  385. : playVideoElements[playVideoElements.length-1];
  386. if(!videoContainer) return;
  387. //获取视频播放地址
  388. let url = videoContainer && videoContainer.children.length>0 && videoContainer.children[0].src
  389. ? videoContainer.children[0].src
  390. : videoContainer.src;
  391. //获取视频ID,配合自定义id使用
  392. let videoId;
  393. let resp = url.match(/^(https:)?\/\/.+\.com\/([a-zA-Z0-9]+)\/[a-zA-Z0-9]+\/video/);
  394. let res = url.match(/blob:https:\/\/www.douyin.com\/(.*)/);
  395. if(resp && resp[2]){
  396. videoId=resp[2];
  397. }else if(res && res[1]){
  398. videoId=res[1]
  399. }else{
  400. videoId = videoContainer.getAttribute('data-xgplayerid')
  401. }
  402. let playContainer = videoContainer.parentNode.parentNode.querySelector('.xg-right-grid');
  403. if(!playContainer) return;
  404. //在对主页就行视频浏览时会出现多个按钮,删除不需要的,只保留当前对应的
  405. let videoDownloadDom = playContainer.querySelector('#scriptVideoDownload'+videoId);
  406. if(videoDownloadDom){
  407. let dom = playContainer.querySelectorAll('.xgplayer-playclarity-setting');
  408. dom.forEach(function(d){
  409. let btn = d.querySelector('.btn');
  410. if(d.id != 'scriptVideoDownload'+videoId && btn.innerText=='下载'){
  411. d.parentNode.removeChild(d);
  412. }
  413. });
  414. return;
  415. }
  416. if(videoContainer && playContainer){
  417. let playClarityDom = playContainer.querySelector('.xgplayer-playclarity-setting');
  418. if(!playClarityDom) return;
  419.  
  420. let palyClarityBtn = playClarityDom.querySelector('.btn');
  421. if(!palyClarityBtn) return;
  422.  
  423. let downloadDom = playClarityDom.cloneNode(true);
  424. downloadDom.setAttribute('id','scriptVideoDownload'+videoId);
  425.  
  426. if(location.href.indexOf('search') == -1){
  427. downloadDom.style='margin-top:-68px;padding-top:100px;padding-left:20px;padding-right:20px;';
  428. }else{
  429. downloadDom.style='margin-top:0px;padding-top:100px;';
  430. }
  431.  
  432. let downloadText = downloadDom.querySelector('.btn');
  433. downloadText.innerText='下载';
  434. downloadText.style = 'font-size:14px;font-weight:600;';
  435. downloadText.setAttribute('id','zhmDouyinDownload'+videoId);
  436. let detail = playContainer.querySelector('xg-icon:nth-of-type(1)').children[0];
  437. let linkUrl = detail.getAttribute('href') ? detail.getAttribute('href') : location.href;
  438.  
  439. if(linkUrl.indexOf('www.douyin.com')==-1){
  440. linkUrl='//www.douyin.com'+linkUrl;
  441. }
  442.  
  443. downloadText.setAttribute('data-url',linkUrl);
  444. downloadText.removeAttribute('target');
  445. downloadText.setAttribute('href','javascript:void(0);');
  446.  
  447. let virtualDom = downloadDom.querySelector('.virtual');
  448. downloadDom.onmouseover=function(){
  449. if(location.href.indexOf('search') == -1){
  450. virtualDom.style='display:block !important';
  451. }else{
  452. virtualDom.style='display:block !important;margin-bottom:37px;';
  453. }
  454. }
  455.  
  456. downloadDom.onmouseout=function(){
  457. virtualDom.style='display:none !important';
  458. }
  459.  
  460. let downloadHtml = '';
  461. downloadOption.forEach(function(item){
  462. if(item.id=="toLink"){
  463. downloadHtml += `<div style="text-align:center;" class="item ${item.id}" id="${item.id}${videoId}">${item.name}</div>`;
  464. }
  465. })
  466. if(downloadDom.querySelector('.virtual')){
  467. downloadDom.querySelector('.virtual').innerHTML = downloadHtml;
  468. }
  469. playClarityDom.after(downloadDom);
  470. //直接打开
  471. let toLinkDom = playContainer.querySelector('#toLink'+videoId);
  472. if(toLinkDom){
  473. toLinkDom.addEventListener('click',function(){
  474. if(url.match(/^blob/)){
  475. commonFunctionObject.webToast({"message":"加密视频地址,无法直接打开", "background":"#141414"});
  476. }else{
  477. window.open(url);
  478. }
  479. })
  480. }
  481. }
  482. }
  483. setInterval(run, 500);
  484. };
  485. window.onload = () => {
  486. douyinVideoDownloader();
  487. if(location.href.startsWith("https://www.douyin.com/user/")){
  488. createDownloadAllData();
  489. createScrollPageToBottom();
  490. dimg_button = createButton("图文批量打包下载", "102px");
  491. dimg_button.addEventListener('click', downloadImg);
  492. }
  493. };
  494. })();