Pinterest.com Backup Original Files

Download all original images from your Pinterest.com profile. Creates an entry in the Greasemonkey menu, just go to one of your boards, scroll down to the last image and click the option in the menu.

当前为 2022-07-17 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Pinterest.com Backup Original Files
  3. // @description Download all original images from your Pinterest.com profile. Creates an entry in the Greasemonkey menu, just go to one of your boards, scroll down to the last image and click the option in the menu.
  4. // @namespace cuzi
  5. // @license MIT
  6. // @version 19.0.1
  7. // @include https://*.pinterest.*
  8. // @grant GM_xmlhttpRequest
  9. // @grant GM_registerMenuCommand
  10. // @grant GM.xmlHttpRequest
  11. // @grant GM.registerMenuCommand
  12. // @require https://greasemonkey.github.io/gm4-polyfill/gm4-polyfill.js
  13. // @require https://cdn.jsdelivr.net/npm/jszip@3.9.1/dist/jszip.min.js
  14. // @require https://cdn.jsdelivr.net/npm/file-saver@2.0.5/dist/FileSaver.min.js
  15. // @connect pinterest.com
  16. // @connect pinterest.de
  17. // @connect pinimg.com
  18. // @icon https://s.pinimg.com/webapp/logo_trans_144x144-5e37c0c6.png
  19. // ==/UserScript==
  20.  
  21. /* globals JSZip, saveAs, GM, MouseEvent */
  22.  
  23. // Time to wait between every scroll to the bottom (in milliseconds)
  24. const scrollPause = 1000
  25.  
  26. let scrollIV = null
  27. let lastScrollY = null
  28. let noChangesFor = 0
  29.  
  30. function prepareForDownloading () {
  31. if (scrollIV !== null) {
  32. return
  33. }
  34.  
  35. document.scrollingElement.scrollTo(0, 0)
  36. collectActive = true
  37. scrollIV = true
  38. collectImages()
  39.  
  40. if (!window.confirm('The script needs to scroll down to the end of the page. It will start downloading once the end is reached.\n\nOnly images that are already visible can be downloaded.\n\n\u2757 Keep this tab open (visible) \u2757')) {
  41. return
  42. }
  43.  
  44. const div = document.querySelector('.downloadoriginal123button')
  45. div.style.position = 'fixed'
  46. div.style.top = '30%'
  47. div.style.zIndex = 100
  48. div.innerHTML = 'Collecting images... (keep this tab visible)<br>'
  49.  
  50. const startDownloadButton = div.appendChild(document.createElement('button'))
  51. startDownloadButton.appendChild(document.createTextNode('Stop scrolling & start downloading'))
  52. startDownloadButton.addEventListener('click', function () {
  53. window.clearInterval(scrollIV)
  54. downloadOriginals()
  55. })
  56.  
  57. const statusImageCollector = div.appendChild(document.createElement('div'))
  58. statusImageCollector.setAttribute('id', 'statusImageCollector')
  59.  
  60. document.scrollingElement.scrollTo(0, document.scrollingElement.scrollHeight)
  61.  
  62. window.setTimeout(function () {
  63. scrollIV = window.setInterval(scrollDown, scrollPause)
  64. }, 1000)
  65. }
  66.  
  67. function scrollDown () {
  68. if (document.hidden) {
  69. // Tab is hidden, don't do anyhting
  70. return
  71. }
  72. if (noChangesFor > 2) {
  73. console.log('noChangesFor > 2')
  74. window.clearInterval(scrollIV)
  75. window.setTimeout(downloadOriginals, 1000)
  76. } else {
  77. console.log('noChangesFor <= 2')
  78. document.scrollingElement.scrollTo(0, document.scrollingElement.scrollTop + 500)
  79. if (document.scrollingElement.scrollTop === lastScrollY) {
  80. noChangesFor++
  81. console.log('noChangesFor++')
  82. } else {
  83. noChangesFor = 0
  84. console.log('noChangesFor = 0')
  85. }
  86. }
  87. lastScrollY = document.scrollingElement.scrollTop
  88. }
  89.  
  90. let entryList = []
  91. let url = document.location.href
  92. let collectActive = false
  93. let boardName = ''
  94. let boardNameEscaped = ''
  95. let userName = ''
  96. let userNameEscaped = ''
  97. const startTime = new Date()
  98. const entryTemplate = {
  99. images: [],
  100. title: null,
  101. link: null,
  102. description: null,
  103. note: null,
  104. sourceLink: null
  105. }
  106.  
  107. function collectImages () {
  108. if (!collectActive) return
  109. if (url !== document.location.href) {
  110. // Reset on new page
  111. url = document.location.href
  112. entryList = []
  113. }
  114.  
  115. const imgs = document.querySelectorAll('.gridCentered a[href^="/pin/"] img')
  116. for (let i = 0; i < imgs.length; i++) {
  117. if (imgs[i].clientWidth < 100) {
  118. // Skip small images, these are user profile photos
  119. continue
  120. }
  121. if (!('mouseOver' in imgs[i].dataset)) {
  122. // Fake mouse over to load source link
  123. const mouseOverEvent = new MouseEvent('mouseover', {
  124. bubbles: true,
  125. cancelable: true
  126. })
  127.  
  128. imgs[i].dispatchEvent(mouseOverEvent)
  129. imgs[i].dataset.mouseOver = true
  130. }
  131.  
  132. const entry = Object.assign({}, entryTemplate)
  133. entry.images = [imgs[i].src.replace(/\/\d+x\//, '/originals/'), imgs[i].src]
  134.  
  135. if (imgs[i].alt) {
  136. entry.description = imgs[i].alt
  137. }
  138.  
  139. const pinWrapper = parentQuery(imgs[i], '[data-test-id="pinWrapper"]') || parentQuery(imgs[i], '[role="listitem"]') || parentQuery(imgs[i], '[draggable="true"]')
  140. if (pinWrapper) {
  141. // find metadata
  142. const aText = Array.from(pinWrapper.querySelectorAll('a[href*="/pin/"]')).filter(a => a.firstChild.nodeType === a.TEXT_NODE)
  143. if (aText.length > 0 && aText[0]) {
  144. entry.title = aText[0].textContent.trim()
  145. entry.link = aText[0].href.toString()
  146. } else if (pinWrapper.querySelector('a[href*="/pin/"]')) {
  147. entry.link = pinWrapper.querySelector('a[href*="/pin/"]').href.toString()
  148. }
  149. const aNotes = Array.from(pinWrapper.querySelectorAll('a[href*="/pin/"]')).filter(a => a.querySelector('div[title]'))
  150. if (aNotes.length > 0 && aNotes[0]) {
  151. entry.note = aNotes[0].textContent.trim()
  152. }
  153.  
  154. if (pinWrapper.querySelector('[data-test-id="pinrep-source-link"] a')) {
  155. entry.sourceLink = pinWrapper.querySelector('[data-test-id="pinrep-source-link"] a').href.toString()
  156. }
  157. }
  158.  
  159. if (imgs[i].srcset) {
  160. // e.g. srcset="https://i-h2.pinimg.com/236x/15/87/ae/abcdefg1234.jpg 1x, https://i-h2.pinimg.com/474x/15/87/ae/abcdefg1234.jpg 2x, https://i-h2.pinimg.com/736x/15/87/ae/abcdefg1234.jpg 3x, https://i-h2.pinimg.com/originals/15/87/ae/abcdefg1234.png 4x"
  161.  
  162. let goodUrl = false
  163. let quality = -1
  164. const srcset = imgs[i].srcset.split(', ')
  165. for (let j = 0; j < srcset.length; j++) {
  166. const pair = srcset[j].split(' ')
  167. const q = parseInt(pair[1].replace('x'))
  168. if (q > quality) {
  169. goodUrl = pair[0]
  170. quality = q
  171. }
  172. if (pair[0].indexOf('/originals/') !== -1) {
  173. break
  174. }
  175. }
  176. if (goodUrl && quality !== -1) {
  177. entry.images[0] = goodUrl
  178. }
  179. }
  180.  
  181. let exists = false
  182. for (let j = 0; j < entryList.length; j++) {
  183. if (entryList[j].images[0] === entry.images[0] && entryList[j].images[1] === entry.images[1]) {
  184. exists = true
  185. entryList[j] = entry // replace with newer entry
  186. break
  187. }
  188. }
  189. if (!exists) {
  190. entryList.push(entry)
  191. console.debug(imgs[i].parentNode)
  192. console.debug(entry)
  193. }
  194. }
  195. const statusImageCollector = document.getElementById('statusImageCollector')
  196. if (statusImageCollector) {
  197. statusImageCollector.innerHTML = `Collected ${entryList.length} images`
  198. }
  199. }
  200.  
  201. function addButton () {
  202. if (document.querySelector('.downloadoriginal123button')) {
  203. return
  204. }
  205.  
  206. if (document.querySelector('[data-test-id="board-header"]') && document.querySelectorAll('.gridCentered a[href^="/pin/"] img').length) {
  207. const button = document.createElement('div')
  208. button.type = 'button'
  209. button.classList.add('downloadoriginal123button')
  210. button.setAttribute('style', `
  211. position: absolute;
  212. display: block;
  213. background: white;
  214. border: none;
  215. padding: 5px;
  216. text-align: center;
  217. cursor:pointer;
  218. `)
  219. button.innerHTML = `
  220. <div class="buttonText" style="background: #efefef;border: #efefef 1px solid;border-radius: 24px;padding: 5px;font-size: xx-large;color: #111;width: 62px; height: 58px;">\u2B73</div>
  221. <div style="font-weight: 700;color: #111;font-size: 12px;">Download<br>originals</div>
  222. `
  223. button.addEventListener('click', prepareForDownloading)
  224. document.querySelector('[data-test-id="board-header"]').appendChild(button)
  225. try {
  226. const buttons = document.querySelectorAll('[role="button"] a[href*="/more-ideas/"],[data-test-id="board-header"] [role="button"]')
  227. const rect = buttons[buttons.length - 1].getBoundingClientRect()
  228. button.style.top = rect.top - 2 + 'px'
  229. button.style.left = rect.left - rect.width + 300 + 'px'
  230. } catch (e) {
  231. console.warn(e)
  232. try {
  233. const title = document.querySelector('h1')
  234. const rect = title.getBoundingClientRect()
  235. button.style.top = rect.top - 2 + 'px'
  236. button.style.left = rect.left - 120 + 'px'
  237. } catch (e) {
  238. console.warn(e)
  239. }
  240. }
  241. }
  242. }
  243.  
  244. GM.registerMenuCommand('Pinterest.com - backup originals', prepareForDownloading)
  245. addButton()
  246. window.setInterval(addButton, 1000)
  247. window.setInterval(collectImages, 400)
  248.  
  249. function downloadOriginals () {
  250. try {
  251. boardName = document.querySelector('h1').textContent.trim()
  252. boardNameEscaped = boardName.replace(/[^a-z0-9]/gi, '_')
  253. } catch (e1) {
  254. try {
  255. boardName = document.location.pathname.replace(/^\//, '').replace(/\/$/, '').split('/').pop()
  256. boardNameEscaped = boardName.replace(/[^a-z0-9]/gi, '_')
  257. } catch (e2) {
  258. boardName = 'board-' + Math.random()
  259. boardNameEscaped = boardName
  260. }
  261. }
  262. try {
  263. userName = document.location.href.match(/\.(\w{2,3})\/(.*?)\//)[2]
  264. userNameEscaped = userName.replace(/[^a-z0-9]/gi, '_')
  265. } catch (e) {
  266. try {
  267. userName = document.location.pathname.replace(/^\//, '').replace(/\/$/, '').split('/').shift()
  268. userNameEscaped = userName.replace(/[^a-z0-9]/gi, '_')
  269. } catch (e2) {
  270. userName = 'user'
  271. userNameEscaped = userName
  272. }
  273. }
  274.  
  275. collectImages()
  276. collectActive = false
  277.  
  278. const lst = entryList.slice()
  279.  
  280. const total = lst.length
  281. let zip = new JSZip()
  282. const fileNameSet = new Set()
  283.  
  284. // Create folders
  285. const imagesFolder = zip.folder('images')
  286. const errorFolder = zip.folder('error_thumbnails')
  287. const markdownOut = []
  288. const htmlOut = []
  289.  
  290. document.body.style.padding = '3%'
  291. document.body.innerHTML = '<h1><span id="counter">' + (total - lst.length) + '</span>/' + total + ' downloaded</h1><br>(Keep this tab visible)<br>' + '</div><progress id="status"></progress> image download<br><progress id="total" value="0" max="' + total + '"></progress> total progress<pre id="statusmessage"></pre>'
  292. document.scrollingElement.scrollTo(0, 0)
  293. const pre = document.getElementById('statusmessage')
  294. const statusbar = document.getElementById('status')
  295. const totalbar = document.getElementById('total')
  296. const h1 = document.getElementById('counter');
  297.  
  298. (async function work () {
  299. document.title = (total - lst.length) + '/' + total + ' downloaded'
  300. h1.innerHTML = totalbar.value = total - lst.length
  301. statusbar.removeAttribute('value')
  302. statusbar.removeAttribute('max')
  303.  
  304. if (lst.length === 0) {
  305. document.title = 'Generating zip file...'
  306. document.body.innerHTML = '<h1>Generating zip file...</h1><progress id="gen_zip_progress"></progress>'
  307. }
  308. if (lst.length > 0) {
  309. const entry = lst.pop()
  310. const urls = entry.images
  311. let fileName = null
  312. const prettyFilename = (s) => safeFileName(s.substr(0, 200)).substr(0, 110).replace(/^[^\w]+/, '').replace(/[^\w]+$/, '')
  313. if (entry.title) {
  314. fileName = prettyFilename(entry.title)
  315. } else if (entry.description) {
  316. fileName = prettyFilename(entry.description)
  317. } else if (entry.note) {
  318. fileName = prettyFilename(entry.note)
  319. } else if (entry.sourceLink) {
  320. fileName = prettyFilename(entry.sourceLink.split('/').slice(3).join('-'))
  321. }
  322.  
  323. if (!fileName) {
  324. fileName = urls[0].split('/').pop()
  325. } else {
  326. fileName = fileName + '.' + urls[0].split('/').pop().split('.').pop()
  327. }
  328.  
  329. while (fileNameSet.has(fileName.toLowerCase())) {
  330. const parts = fileName.split('.')
  331. parts.splice(parts.length - 1, 0, parseInt(Math.random() * 10000).toString())
  332. fileName = parts.join('.')
  333. }
  334. fileNameSet.add(fileName.toLowerCase())
  335.  
  336. pre.innerHTML = fileName
  337. GM.xmlHttpRequest({
  338. method: 'GET',
  339. url: urls[0],
  340. responseType: 'arraybuffer',
  341. onload: async function (response) {
  342. const s = String.fromCharCode.apply(null, new Uint8Array(response.response.slice(0, 125)))
  343. if (s.indexOf('<Error>') !== -1) {
  344. // Download thumbnail to error folder
  345. if (!('isError' in entry) || !entry.isError) {
  346. const errorEntry = Object.assign({}, entry)
  347. errorEntry.images = [urls[1]]
  348. errorEntry.isError = true
  349. // TODO change title? of error entry
  350. lst.push(errorEntry)
  351. }
  352. } else {
  353. // Save file to zip
  354. entry.fileName = fileName
  355. entry.fileNameUrl = markdownEncodeURIComponent(fileName)
  356. if (!('isError' in entry) || !entry.isError) {
  357. imagesFolder.file(fileName, response.response)
  358. entry.filePath = 'images/' + fileName
  359. entry.fileUrl = 'images/' + entry.fileNameUrl
  360. await addMetadata('successful', entry, htmlOut, markdownOut)
  361. } else {
  362. errorFolder.file(fileName, response.response)
  363. entry.filePath = 'error_thumbnails/' + fileName
  364. entry.fileUrl = 'error_thumbnails/' + entry.fileNameUrl
  365. await addMetadata('error', entry, htmlOut, markdownOut)
  366. }
  367. }
  368.  
  369. work()
  370. },
  371. onprogress: function (progress) {
  372. try {
  373. statusbar.max = progress.total
  374. statusbar.value = progress.loaded
  375. } catch (e) { }
  376. }
  377. })
  378. } else {
  379. // Create html and markdown overview
  380. htmlOut.unshift(`
  381. <style>
  382. th,td {
  383. word-wrap: break-word;
  384. max-width: 25em
  385. }
  386. tr:nth-child(2n+2){
  387. background-color:#f0f0f0
  388. }
  389. </style>
  390.  
  391. <h1>${escapeXml(boardName)}</h1>
  392. <h3>
  393. ${escapeXml(userName)}
  394. <br>
  395. <time datetime="${startTime.toISOString()}" title=""${startTime.toString()}">
  396. ${startTime.toLocaleDateString(undefined, { year: 'numeric', month: 'long', day: 'numeric' })}
  397. </time>:
  398. <a href="${escapeXml(document.location.href)}">${escapeXml(document.location.href)}</a>
  399. </h3>
  400.  
  401. <table border="1">
  402. <tr>
  403. <th>Title</th>
  404. <th>Image</th>
  405. <th>Pinterest</th>
  406. <th>Source</th>
  407. <th>Description</th>
  408. <th>Notes</th>
  409. </tr>
  410. `)
  411. htmlOut.push('</table>')
  412. zip.file('index.html', htmlOut.join('\n'))
  413. markdownOut.unshift(`
  414. # ${escapeMD(boardName)}
  415.  
  416. ### ${escapeXml(userName)}
  417.  
  418. ${startTime.toLocaleDateString(undefined, { year: 'numeric', month: 'long', day: 'numeric' })}: ${document.location.href}
  419.  
  420. | Title | Image | Pinterest | Source | Description | Notes |
  421. |---|---|---|---|---|---|`)
  422.  
  423. zip.file('README.md', markdownOut.join('\n'))
  424.  
  425. // Done. Open ZIP file
  426. let zipfilename
  427. try {
  428. const d = startTime || new Date()
  429. zipfilename = userNameEscaped + '_' + boardNameEscaped + '_' + d.getFullYear() + '-' + ((d.getMonth() + 1) > 9 ? '' : '0') + (d.getMonth() + 1) + '-' + (d.getDate() > 9 ? '' : '0') + d.getDate() +
  430. '_' + (d.getHours() > 9 ? '' : '0') + d.getHours() + '-' + (d.getMinutes() > 9 ? '' : '0') + d.getMinutes()
  431. } catch (e) {
  432. zipfilename = 'board'
  433. }
  434. zipfilename += '.zip'
  435. const content = await zip.generateAsync({ type: 'blob' }) // TODO catch errors
  436. zip = null
  437. const h = document.createElement('h1')
  438. h.appendChild(document.createTextNode('Click here to Download'))
  439. h.style = 'cursor:pointer; color:blue; background:white; text-decoration:underline'
  440. document.body.appendChild(h)
  441. const genZipProgress = document.getElementById('gen_zip_progress')
  442. if (genZipProgress) {
  443. genZipProgress.remove()
  444. }
  445. h.addEventListener('click', function () {
  446. saveAs(content, zipfilename)
  447. })
  448. saveAs(content, zipfilename)
  449. }
  450. })()
  451. }
  452.  
  453. function addMetadata (status, e, htmlOut, markdownOut) {
  454. return new Promise((resolve) => {
  455. writeMetadata(status, e, htmlOut, markdownOut)
  456. resolve()
  457. })
  458. }
  459.  
  460. function writeMetadata (status, entry, htmlOut, markdownOut) {
  461. // XML escape all values for html
  462. const entryEscaped = Object.fromEntries(Object.entries(entry).map(entry => {
  463. const escapedValue = escapeXml(entry[1])
  464. return [entry[0], escapedValue]
  465. }))
  466.  
  467. // Shorten source link title
  468. let sourceA = ''
  469. if (entry.sourceLink) {
  470. let sourceTitle = decodeURI(entry.sourceLink)
  471. if (sourceTitle.length > 160) {
  472. sourceTitle = sourceTitle.substring(0, 155) + '\u2026'
  473. }
  474. sourceA = `<a href="${entryEscaped.sourceLink}">${escapeXml(sourceTitle)}</a>`
  475. }
  476.  
  477. // HTML table entry
  478. htmlOut.push(` <tr>
  479. <th id="${entryEscaped.fileNameUrl}">
  480. <a href="#${entryEscaped.fileNameUrl}">${entryEscaped.title || entryEscaped.description || entryEscaped.fileName}</a
  481. </th>
  482. <td>
  483. <a href="${entryEscaped.fileUrl}">
  484. <img style="max-width:250px; max-height:250px" src="${entryEscaped.fileUrl}" alt="${entryEscaped.description || entryEscaped.filePath}">
  485. </a>
  486. </td>
  487. <td>
  488. <a href="${entryEscaped.link}">${entryEscaped.link}</a>
  489. </td>
  490. <td>
  491. ${sourceA}
  492. </td>
  493. <td>${entryEscaped.description}</td>
  494. <td>${entryEscaped.note}</td>
  495. </tr>
  496. `)
  497.  
  498. // Shorten source link title
  499. let sourceLink = entry.sourceLink || ''
  500. if (entry.sourceLink) {
  501. let sourceTitle = decodeURI(entry.sourceLink)
  502. if (sourceTitle.length > 160) {
  503. sourceTitle = sourceTitle.substring(0, 155) + '\u2026'
  504. }
  505. sourceLink = `[${escapeMD(sourceTitle)}](${entry.sourceLink})`
  506. }
  507.  
  508. // Markdown
  509. markdownOut.push(`| ${escapeMD(entry.title || entry.description || entry.fileName)}` +
  510. ` | ![${escapeMD(entry.description || entry.fileName)}](${entry.fileUrl})` +
  511. ` | ${entry.link || ''}` +
  512. ` | ${sourceLink}` +
  513. ` | ${escapeMD(entry.description || '')}` +
  514. ` | ${escapeMD(entry.note || '')}` + ' |')
  515. }
  516.  
  517. function parentQuery (node, q) {
  518. const parents = [node.parentElement]
  519. node = node.parentElement.parentElement
  520. while (node) {
  521. const lst = node.querySelectorAll(q)
  522. for (let i = 0; i < lst.length; i++) {
  523. if (parents.indexOf(lst[i]) !== -1) {
  524. return lst[i]
  525. }
  526. }
  527. parents.push(node)
  528. node = node.parentElement
  529. }
  530. return null
  531. }
  532.  
  533. function safeFileName (s) {
  534. const blacklist = /[<>:'"/\\|?*\u0000\n\r\t]/g // eslint-disable-line no-control-regex
  535. s = s.replace(blacklist, ' ').trim().replace(/^\.+/, '').replace(/\.+$/, '')
  536. return s.replace(/\s+/g, ' ').trim()
  537. }
  538.  
  539. function escapeXml (unsafe) {
  540. // https://stackoverflow.com/a/27979933/
  541. const s = (unsafe || '').toString()
  542. return s.replace(/[<>&'"\n\t]/gim, function (c) {
  543. switch (c) {
  544. case '<': return '&lt;'
  545. case '>': return '&gt;'
  546. case '&': return '&amp;'
  547. case '\'': return '&apos;'
  548. case '"': return '&quot;'
  549. case '\n': return '<br>'
  550. case '\t': return ' '
  551. }
  552. })
  553. }
  554.  
  555. function escapeMD (unsafe) {
  556. // Markdown escape
  557. const s = (unsafe || '').toString()
  558. return s.replace(/\W/gim, function (c) {
  559. switch (c) {
  560. case '<': return '&lt;'
  561. case '>': return '&gt;'
  562. case '&': return '&amp;'
  563. case '\'': return '\\\''
  564. case '"': return '\\"'
  565. case '*': return '\\*'
  566. case '[': return '\\['
  567. case ']': return '\\]'
  568. case '(': return '\\('
  569. case ')': return '\\)'
  570. case '{': return '\\{'
  571. case '}': return '\\}'
  572. case '`': return '\\`'
  573. case '!': return '\\!'
  574. case '|': return '\\|'
  575. case '#': return '\\#'
  576. case '+': return '\\+'
  577. case '-': return '\\-'
  578. case '\r': return ' '
  579. case '\n': return '<br>'
  580. default: return c
  581. }
  582. }).trim()
  583. }
  584.  
  585. function markdownEncodeURIComponent (s) {
  586. return encodeURIComponent(s).replace(/[[\](){}`!]/g, function (c) {
  587. switch (c) {
  588. case '[': return '%5B'
  589. case ']': return '%5D'
  590. case '(': return '%28'
  591. case ')': return '%29'
  592. case '{': return '%7B'
  593. case '}': return '%7D'
  594. case '`': return '%60'
  595. case '!': return '%21'
  596. }
  597. })
  598. }