archive.today Archiver - URL Queue Manager

Automate archiving with smart queue. Export filename now dynamically detects the dominant domain (e.g. instagram, twitter) and username.

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         archive.today Archiver - URL Queue Manager
// @namespace    http://archive.today/
// @version      1.4.0
// @description  Automate archiving with smart queue. Export filename now dynamically detects the dominant domain (e.g. instagram, twitter) and username.
// @author       Claude (Anthropic) & Gemini
// @icon         https://archive.is/favicon.ico
// @match        https://archive.ph/*
// @match        https://archive.today/*
// @match        https://archive.is/*
// @match        https://archive.vn/*
// @match        https://archive.fo/*
// @match        https://archive.li/*
// @match        https://archive.md/*
// @grant        GM_setValue
// @grant        GM_getValue
// @grant        GM_deleteValue
// @license      MIT
// ==/UserScript==

(function () {
  'use strict';

  /* ===== CONFIG ===== */
  const ARCHIVE_DOMAINS = [
    'https://archive.ph/', 'https://archive.is/', 'https://archive.today/',
    'https://archive.vn/', 'https://archive.fo/', 'https://archive.li/', 'https://archive.md/'
  ];

  const BASE_WIP_POLL_MS = 90 * 1000;
  const MAX_BACKOFF_EXP = 6;
  const BACKOFF_JITTER_RATIO = 0.12;
  const PROCESS_DELAY = 2000;
  const MIN_REQUEST_DELAY = 2000;
  const MAX_REQUEST_DELAY = 5000;
  const CAPTCHA_CHECK_INTERVAL_MS = 2000;
  const DAILY_LIMIT = 150;
  const ENABLE_DOMAIN_ROTATION = true;

  /* ===== GM Storage Wrappers ===== */
  function gmGet(key, def) { try { const v = GM_getValue(key); return v !== undefined ? v : def; } catch (e) { return def; } }
  function gmSet(key, val) { try { GM_setValue(key, val); } catch (e) {} }
  function gmDelete(key) { try { GM_deleteValue(key); } catch (e) {} }

  /* ===== Utils / Storage ===== */
  function log(...args) { console.log('[ArchiveQueue]', ...args); }
  function dbg(...args) { console.debug('[ArchiveQueue]', ...args); }

  function getQueue() { try { return JSON.parse(gmGet('archiveQueue', '[]')); } catch (e) { return []; } }
  function saveQueue(q) { gmSet('archiveQueue', JSON.stringify(q)); updateOverlay(); }

  function getProcessed() { try { return JSON.parse(gmGet('processedUrls', '[]')); } catch (e) { return []; } }
  function saveProcessed(url) {
    const arr = getProcessed();
    if (!arr.includes(url)) {
      arr.push(url);
      gmSet('processedUrls', JSON.stringify(arr));
      updateDailyStats();
    }
    updateOverlay();
  }

  function getSkipped() { try { return JSON.parse(gmGet('skippedUrls', '[]')); } catch (e) { return []; } }
  function saveSkipped(url) {
    const arr = getSkipped();
    if (!arr.includes(url)) {
      arr.push(url);
      gmSet('skippedUrls', JSON.stringify(arr));
    }
    updateOverlay();
  }

  function getRestricted() { try { return JSON.parse(gmGet('restrictedUrls', '[]')); } catch (e) { return []; } }
  function saveRestricted(url, reason = 'unknown') {
    const arr = getRestricted();
    if (!arr.includes(url)) arr.push(url);
    gmSet('restrictedUrls', JSON.stringify(arr));
    updateOverlay();
  }

  /* ===== Session Flags ===== */
  function getSessionFlag(key) { try { return sessionStorage.getItem(key); } catch (e) { return null; } }
  function setSessionFlag(key, val) { try { if (val) sessionStorage.setItem(key, val); else sessionStorage.removeItem(key); } catch (e) {} }
  function removeSessionFlag(key) { try { sessionStorage.removeItem(key); } catch (e) {} }

  /* ===== Daily Limit ===== */
  function getDailyStats() {
    try {
      const data = gmGet('aq_daily_stats', '{}');
      const stats = JSON.parse(data);
      const today = new Date().toDateString();
      if (stats.date !== today) return { date: today, processed: 0 };
      return stats;
    } catch (e) { return { date: new Date().toDateString(), processed: 0 }; }
  }
  function updateDailyStats() {
    const stats = getDailyStats();
    stats.processed++;
    gmSet('aq_daily_stats', JSON.stringify(stats));
    updateOverlay();
  }
  function checkDailyLimit() {
    const stats = getDailyStats();
    if (stats.processed >= DAILY_LIMIT) {
      if (!sessionStorage.getItem('aq_limit_alerted')) {
          alert(`Daily limit of ${DAILY_LIMIT} URLs reached.`);
          sessionStorage.setItem('aq_limit_alerted', '1');
      }
      setSessionFlag('processingPaused', '1');
      removeSessionFlag('aq_paused_for_captcha');
      updateOverlay();
      return false;
    }
    return true;
  }

  /* ===== Helpers ===== */
  function getNextDomain() {
    if (!ENABLE_DOMAIN_ROTATION) return ARCHIVE_DOMAINS[0];
    try {
      let index = parseInt(gmGet('aq_domain_index', '0'), 10);
      const domain = ARCHIVE_DOMAINS[index];
      index = (index + 1) % ARCHIVE_DOMAINS.length;
      gmSet('aq_domain_index', String(index));
      return domain;
    } catch (e) { return ARCHIVE_DOMAINS[0]; }
  }
  function humanDelay(min = MIN_REQUEST_DELAY, max = MAX_REQUEST_DELAY) {
    return Math.floor(Math.random() * (max - min + 1)) + min;
  }
  function detectCaptcha() {
    const body = (document.body?.innerText || '').toLowerCase();
    return (!!document.querySelector('iframe[src*="recaptcha"], .g-recaptcha, [data-sitekey]') || body.includes("i'm not a robot") || body.includes('captcha') || body.includes('security check'));
  }

  function clearForReplace() {
    gmDelete('archiveQueue'); gmDelete('processedUrls'); gmDelete('skippedUrls'); gmDelete('restrictedUrls');
    gmDelete('aq_last_wip_reload'); gmDelete('aq_daily_stats'); gmDelete('aq_domain_index');
    sessionStorage.clear();
  }
  function clearAll() {
    if (!confirm('Clear ALL lists (Queue, Processed, Skipped, Restricted)?')) return;
    clearForReplace();
    location.reload();
  }

  /* ===== UI ===== */
  function createOverlay() {
    if (document.getElementById('aq-overlay')) return;
    const ov = document.createElement('div');
    ov.id = 'aq-overlay';
    Object.assign(ov.style, {
      position: 'fixed', top: '18px', right: '18px', zIndex: 999999,
      background: 'rgba(255,255,255,0.97)', border: '1px solid #888',
      padding: '10px', fontFamily: 'sans-serif', fontSize: '13px',
      color: '#222', borderRadius: '8px', boxShadow: '0 4px 18px rgba(0,0,0,0.2)',
      maxWidth: '380px', maxHeight: '80vh', overflowY: 'auto'
    });
    ov.innerHTML = `
      <div style="display:flex;justify-content:space-between;align-items:center">
        <strong>archive.today Queue</strong>
        <span id="aq-close" style="cursor:pointer;font-weight:bold">×</span>
      </div>
      <div style="display:grid;grid-template-columns:repeat(2,1fr);gap:6px;margin-top:8px">
        <button id="aq-add">Add URLs</button> <button id="aq-edit">Edit Queue</button>
        <button id="aq-resume">Resume</button> <button id="aq-pause">Pause</button>
        <button id="aq-export">Export Restricted</button> <button id="aq-clear">Clear All</button>
        <button id="aq-import-merge">Import (merge)</button> <button id="aq-import-replace">Import (replace)</button>
      </div>
      <div id="aq-input" style="display:none;margin-top:8px">
        <textarea id="aq-text" style="width:100%;height:80px" placeholder="URLs..."></textarea>
        <div style="display:flex;gap:6px;margin-top:6px"><button id="aq-save">Save</button><button id="aq-cancel">Cancel</button></div>
      </div>
      <div id="aq-edit-area" style="display:none;margin-top:8px">
        <textarea id="aq-edit-text" style="width:100%;height:120px"></textarea>
        <div style="display:flex;gap:6px;margin-top:6px"><button id="aq-update">Update</button><button id="aq-edit-cancel">Cancel</button></div>
      </div>
      <pre id="aq-status" style="white-space:pre-wrap;margin-top:8px;padding:8px;background:#f6f6f6;border-radius:6px"></pre>
      <div id="aq-message" style="font-size:12px;color:#b40010;margin-top:6px"></div>
    `;
    document.body.appendChild(ov);

    const gid = (id) => ov.querySelector('#'+id);
    gid('aq-close').onclick = () => ov.style.display = 'none';
    gid('aq-add').onclick = () => { gid('aq-input').style.display='block'; gid('aq-edit-area').style.display='none'; };
    gid('aq-edit').onclick = () => { gid('aq-edit-area').style.display='block'; gid('aq-input').style.display='none'; gid('aq-edit-text').value = getQueue().join('\n'); };
    gid('aq-resume').onclick = () => { removeSessionFlag('processingPaused'); removeSessionFlag('aq_paused_for_captcha'); updateOverlay(); processQueue(); };
    gid('aq-pause').onclick = () => { setSessionFlag('processingPaused', '1'); updateOverlay(); };
    gid('aq-export').onclick = exportRestricted;
    gid('aq-clear').onclick = clearAll;
    gid('aq-save').onclick = saveInput;
    gid('aq-cancel').onclick = () => gid('aq-input').style.display='none';
    gid('aq-update').onclick = updateQueue;
    gid('aq-edit-cancel').onclick = () => gid('aq-edit-area').style.display='none';

    const fi = document.createElement('input'); fi.type='file'; fi.id='aq-file-input'; fi.style.display='none';
    ov.appendChild(fi);
    gid('aq-import-merge').onclick = () => { fi.dataset.mode='merge'; fi.click(); };
    gid('aq-import-replace').onclick = () => { fi.dataset.mode='replace'; fi.click(); };
    fi.onchange = handleFileImport;
    updateOverlay();
  }

  function updateOverlay() {
    const q = getQueue().length;
    const p = getProcessed().length;
    const s = getSkipped().length;
    const r = getRestricted().length;
    const stats = getDailyStats();
    const st = document.getElementById('aq-status');
    if (st) {
      st.textContent = `Queue: ${q}\nProcessed: ${p} | Skipped: ${s}\nRestricted: ${r}\nToday: ${stats.processed}/${DAILY_LIMIT}\nState: ` + (getSessionFlag('aq_processing') ? 'ACTIVE' : 'IDLE');
    }
    const msg = document.getElementById('aq-message');
    if (msg) {
      const captcha = detectCaptcha();
      const paused = !!getSessionFlag('processingPaused');
      const pausedForCaptcha = !!getSessionFlag('aq_paused_for_captcha');
      let text = '';
      if (paused) {
        if (stats.processed >= DAILY_LIMIT) text = `PAUSED - Daily limit reached.`;
        else if (captcha || pausedForCaptcha) text = 'PAUSED - CAPTCHA detected. Auto-resuming...';
        else text = 'PAUSED by user';
      }
      msg.textContent = text;
    }
  }

  function saveInput() { const l=(document.getElementById('aq-text').value||'').split('\n').map(s=>s.trim()).filter(Boolean); if(l.length){saveQueue(getQueue().concat(l)); document.getElementById('aq-input').style.display='none'; updateOverlay(); setTimeout(processQueue, 250);} }
  function updateQueue() { const l=(document.getElementById('aq-edit-text').value||'').split('\n').map(s=>s.trim()).filter(Boolean); saveQueue(l); document.getElementById('aq-edit-area').style.display='none'; updateOverlay(); setTimeout(processQueue, 250); }

  // --- Dynamic Export Logic ---
  function findMostCommonUsername(urls) {
    const counts = {};
    const regex = /instagram\.com\/([^/]+)\/p\//;
    for (const url of urls) {
      const match = url.match(regex);
      if (match && match[1]) {
        const username = match[1];
        counts[username] = (counts[username] || 0) + 1;
      }
    }
    const keys = Object.keys(counts);
    if (!keys.length) return '';
    return keys.reduce((a, b) => counts[a] > counts[b] ? a : b);
  }

  function findMostCommonDomain(urls) {
    const counts = {};
    for (const url of urls) {
        try {
            const hostname = new URL(url).hostname.toLowerCase();
            const parts = hostname.split('.');
            let name = parts.length > 1 ? parts[parts.length - 2] : hostname;
            if (name === 'www') name = parts.length > 2 ? parts[parts.length - 3] : 'unknown';

            counts[name] = (counts[name] || 0) + 1;
        } catch(e) {}
    }
    const keys = Object.keys(counts);
    if (!keys.length) return 'domain';
    return keys.reduce((a, b) => counts[a] > counts[b] ? a : b);
  }

  function exportRestricted() {
      const arr = getRestricted();
      if(!arr.length) return alert('No restricted URLs.');

      const username = findMostCommonUsername(arr);
      const domain = findMostCommonDomain(arr);
      const date = new Date().toISOString().slice(0, 10);

      let filename = '';
      if (username) {
          filename = `${username}_${domain}-restricted-urls_${date}.txt`;
      } else {
          filename = `${domain}-restricted-urls_${date}.txt`;
      }

      const blob = new Blob([arr.join('\n')], {type:'text/plain'});
      const a = document.createElement('a');
      a.href=URL.createObjectURL(blob);
      a.download=filename;
      a.click();
      URL.revokeObjectURL(a.href);
  }

  function handleFileImport(evt) {
      const f=evt.target.files[0]; if(!f)return; const mode=evt.target.dataset.mode;
      const r=new FileReader(); r.onload=e=>{
          const l=e.target.result.split(/\r?\n/).map(s=>s.trim()).filter(Boolean);
          if(!l.length)return alert('Empty file');
          if(mode==='replace' && !confirm('Replace queue?')) return;
          if(mode==='replace') clearForReplace();
          saveQueue(mode==='replace'?l:getQueue().concat(l));
          evt.target.value=''; setTimeout(processQueue,250);
      }; r.readAsText(f);
  }

  /* ===== Logic ===== */
  function startProgressMonitor() {
    let lastStats = { p: getProcessed().length, s: getSkipped().length, r: getRestricted().length };
    setInterval(() => {
        if (!getSessionFlag('aq_paused_for_captcha')) return;
        const cur = { p: getProcessed().length, s: getSkipped().length, r: getRestricted().length };
        if (cur.p !== lastStats.p || cur.s !== lastStats.s || cur.r !== lastStats.r) {
            log('Watchdog: Progress detected. Resuming.');
            lastStats = cur;
            removeSessionFlag('processingPaused'); removeSessionFlag('aq_paused_for_captcha'); updateOverlay(); processQueue();
            return;
        }
        if (location.pathname.startsWith('/wip/') || document.getElementById('SHARE_LONGLINK') || document.querySelector('.THUMBS-BLOCK')) {
            if (!detectCaptcha()) {
                removeSessionFlag('processingPaused'); removeSessionFlag('aq_paused_for_captcha'); updateOverlay(); processQueue();
            }
        }
    }, CAPTCHA_CHECK_INTERVAL_MS);
  }

  function processQueue() {
    if (getSessionFlag('processingPaused')) { updateOverlay(); return; }
    if (!checkDailyLimit()) return;
    if (getSessionFlag('aq_processing')) return;

    const q = getQueue();
    if (!q.length) { setSessionFlag('aq_processing', ''); updateOverlay(); return; }

    const next = q[0];
    const p = getProcessed();
    const s = getSkipped();
    const r = getRestricted();

    if (p.includes(next) || r.includes(next) || s.includes(next)) {
      log('Already handled:', next);
      q.shift(); saveQueue(q);
      setTimeout(processQueue, PROCESS_DELAY);
      return;
    }

    setSessionFlag('aq_processing', '1');
    setSessionFlag('forceSaveUrl', next);
    updateOverlay();

    const nav = getNextDomain() + next;
    setTimeout(() => { window.location.href = nav; }, humanDelay());
  }

  function handlePreCheckPage() {
    const q = getQueue();
    if(!q.length){removeSessionFlag('aq_processing');return;}

    let archiveLink = document.querySelector('a[href^="/?url="]');
    if (!archiveLink) {
        archiveLink = Array.from(document.querySelectorAll('a')).find(e =>
            e.textContent.toLowerCase().includes('archive this url')
        );
    }

    if (archiveLink) {
        log('Pre-check: Found "archive this url" link. Clicking...');
        setSessionFlag('forceSaveUrl', q[0]);
        try { archiveLink.click(); } catch (e) { window.location.href = archiveLink.href || getNextDomain(); }
        return;
    }

    if (document.getElementById('row0')) {
        log('Pre-check: History list (#row0) detected. Handing off to Final handler.');
        handleFinalPage();
        return;
    }

    const txt = document.body.innerText.toLowerCase();
    if(txt.includes('redirected')) { handleFinalPage(); return; }

    setTimeout(processQueue, 3000);
  }

  function handleFinalPage() {
    const q = getQueue();
    if (!q.length) { removeSessionFlag('aq_processing'); return; }
    const current = q[0];
    const body = (document.body.innerText || '').toLowerCase();

    const historyRow = document.getElementById('row0');
    if (historyRow) {
        const titleLink = historyRow.querySelector('.TEXT-BLOCK a');
        const titleText = (titleLink ? titleLink.innerText : '').trim().toLowerCase();

        const redirectLink = historyRow.querySelector('ul > li > a');
        const redirectUrl = (redirectLink ? redirectLink.textContent : '').trim();

        log('History row. Title:', titleText, 'Redirect:', redirectUrl);

        const failKeys = ["post isn't available", "page not found", "login • instagram", "not available"];
        const isGenericRedirect = redirectUrl.includes('instagram.com/p/');

        if (failKeys.some(k => titleText.includes(k)) || isGenericRedirect) {
            log('-> Restricted');
            saveRestricted(current, 'history-fail');
        } else {
            log('-> Skipped');
            saveSkipped(current);
        }

        q.shift(); saveQueue(q);
        removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
        setTimeout(processQueue, PROCESS_DELAY);
        return;
    }

    const already = document.querySelector('#DIVALREADY, #DIVALREADY2, div[role="dialog"]');
    if (already && (already.innerText || '').toLowerCase().includes('this page was last archived')) {
        saveSkipped(current);
        q.shift(); saveQueue(q);
        removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
        setTimeout(processQueue, PROCESS_DELAY);
        return;
    }

    if (document.getElementById('SHARE_LONGLINK') || document.querySelector('.THUMBS-BLOCK')) {
        saveProcessed(current);
        q.shift(); saveQueue(q);
        removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
        setTimeout(processQueue, PROCESS_DELAY);
        return;
    }

    if (body.includes('restricted photo') || body.includes('post isn\'t available') || body.includes('profile may have been removed')) {
        saveRestricted(current, 'restricted-content');
        q.shift(); saveQueue(q);
        removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
        setTimeout(processQueue, PROCESS_DELAY);
        return;
    }

    if (body.includes('redirected to')) {
        saveRestricted(current, 'redirected');
        q.shift(); saveQueue(q);
        removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
        setTimeout(processQueue, PROCESS_DELAY);
        return;
    }

    const btn = Array.from(document.querySelectorAll('input[type="submit"], button')).find(e => (e.value||e.innerText||'').toLowerCase().includes('save'));
    if (btn) { setTimeout(() => btn.click(), 80); return; }

    log('Unknown -> Restricted');
    saveRestricted(current, 'unknown');
    q.shift(); saveQueue(q);
    removeSessionFlag('aq_processing');
    setTimeout(processQueue, PROCESS_DELAY);
  }

  function handleWipPage() {
      const q=getQueue(); if(!q.length){removeSessionFlag('aq_processing');return;}
      const forced=getSessionFlag('forceSaveUrl');
      const share=document.getElementById('SHARE_LONGLINK');
      if(share){
          if(forced) saveProcessed(forced);
          removeSessionFlag('forceSaveUrl'); removeSessionFlag('aq_processing');
          if(share.querySelector('input')?.value) window.location.href=share.querySelector('input').value;
          else setTimeout(processQueue, PROCESS_DELAY);
          return;
      }
      setTimeout(()=>location.reload(), 15000);
  }

  function handleHomepage() {
      const f=getSessionFlag('forceSaveUrl');
      if(!f){removeSessionFlag('aq_processing'); setTimeout(processQueue,1000); return;}
      const i=document.querySelector('input[name="url"]');
      if(i){ i.value=f; setTimeout(()=>document.querySelector('input[type="submit"]').click(), 100); }
  }

  function mainRouter() {
    createOverlay(); updateOverlay(); startProgressMonitor();
    if (getSessionFlag('processingPaused')) return;
    const path = location.pathname;
    if (path.startsWith('/wip/')) return handleWipPage();
    if (path.length < 2 || path.startsWith('/submit/')) return handleHomepage();
    if (document.querySelector('input[name="url"]')) return handleHomepage();

    if ((document.getElementById('CONTENT') && document.body.innerText.includes('No results')) || document.getElementById('row0')) {
        return handlePreCheckPage();
    }

    if (path.startsWith('/https://') || path.startsWith('/http://')) { handlePreCheckPage(); return; }

    handleFinalPage();
  }

  if (document.readyState === 'loading') window.addEventListener('DOMContentLoaded', mainRouter);
  else mainRouter();
  setTimeout(() => { if (!getSessionFlag('processingPaused')) processQueue(); }, 1000);

})();