Title Fetcher Optimized

Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only

当前为 2024-12-01 提交的版本,查看 最新版本

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.4.3
// @description  Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license      MIT
// ==/UserScript==

const cache = JSON.parse(localStorage.getItem('cache')) || {};
const MAX_CACHE_SIZE = 5000;

// Trim cache if necessary
if (Object.keys(cache).length > MAX_CACHE_SIZE) {
    Object.keys(cache).slice(0, Object.keys(cache).length - MAX_CACHE_SIZE).forEach(k => delete cache[k]);
    localStorage.setItem('cache', JSON.stringify(cache));
}

const saveCache = () => localStorage.setItem('cache', JSON.stringify(cache));
const supportsRangeCache = {};
let skipDomains = JSON.parse(localStorage.getItem('GFD_skipDomains')) || ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"];

// Create and append textarea for skipDomains
const textarea = Object.assign(document.createElement('textarea'), {
    value: skipDomains.join(', '),
    title: "Домены для которых никогда не собирать тайтл и деск"
});
document.querySelector("#navbar-header").appendChild(textarea);

textarea.addEventListener('input', () => {
    skipDomains = textarea.value.split(',').map(d => d.trim()).filter(Boolean);
    localStorage.setItem('GFD_skipDomains', JSON.stringify(skipDomains));
    refreshTable();
});

// Normalize URLs
const normalizeUrl = url => /^https?:\/\//i.test(url.trim()) ? url.trim() : `http://${url.trim()}`;

// Get headers based on user agent
const getUserAgentHeaders = ua => {
    const agents = {
        'Googlebot': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)',
        'YandexBot': 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'
    };
    return ua ? { 'User-Agent': agents[ua], 'X-User-Agent': agents[ua] } : {};
};

// Decode HTML entities
const decodeEntities = str => {
    const entities = {
        '&nbsp;': ' ', '&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"', '&apos;': "'",
        '&copy;': '©', '&reg;': '®', '&euro;': '€', '&trade;': '™', '&mdash;': '—', '&ndash;': '–',
        '&uarr;': '↑', '&darr;': '↓', '&larr;': '←', '&rarr;': '→', '&harr;': '↔', '&bull;': '•',
        '&hellip;': '…', '&laquo;': '«', '&raquo;': '»', '&lsquo;': '‘', '&rsquo;': '’',
        '&ldquo;': '“', '&rdquo;': '”', '&frasl;': '⁄', '&times;': '×', '&divide;': '÷', '&para;': '¶'
    };
    return str.replace(/&amp;#(\d+);|&#(\d+);|&\w+;/g, (match, dec1, dec2) => {
        if (dec1) return String.fromCharCode(dec1);
        if (dec2) return String.fromCharCode(dec2);
        return entities[match] || match;
    });
};

// Update cell content
const updateCell = (cell, text) => {
    cell.textContent = text;
    cell.title = text;
    if (text.startsWith('Error')) cell.classList.add('GFD_title_error');
};

// Extract title and description from HTML
const extractContent = text => ({
    title: (text.match(/<title[^>]*>([^<]*)<\/title>/i) || [])[1]?.trim(),
    description: (text.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i) || [])[1]?.trim()
});

// Fetch data with optional range and user agent
const fetchData = (url, cellTitle, cellDesc, range, ua) => {
    if (cache[url]) {
        updateCell(cellTitle, cache[url].title);
        updateCell(cellDesc, cache[url].description || '-');
        return;
    }

    cellTitle.textContent = cellDesc.textContent = 'Fetching... 0';
    let seconds = 0;
    const timer = setInterval(() => {
        seconds++;
        cellTitle.textContent = `Fetching... ${seconds}`;
        cellDesc.textContent = `Fetching... ${seconds}`;
    }, 1000);

    GM_xmlhttpRequest({
        method: 'GET',
        url,
        headers: { ...(range ? { 'Range': range } : {}), ...getUserAgentHeaders(ua) },
        onload: res => {
            clearInterval(timer);
            if ([200, 206].includes(res.status)) {
                const { title, description } = extractContent(res.responseText);
                if (title) {
                    updateCell(cellTitle, decodeEntities(title));
                    updateCell(cellDesc, decodeEntities(description || '-'));
                    cache[url] = { title: decodeEntities(title), description: decodeEntities(description || '-') };
                    saveCache();
                } else if (range === 'bytes=0-1024') {
                    fetchData(url, cellTitle, cellDesc, 'bytes=0-2048', ua);
                } else {
                    updateCell(cellTitle, 'Title not found');
                    updateCell(cellDesc, 'Description not found');
                }
            } else handleError(url, cellTitle, cellDesc, range, ua, res.status);
        },
        onerror: () => { clearInterval(timer); handleError(url, cellTitle, cellDesc, range, ua, 'Network Error'); },
        ontimeout: () => { clearInterval(timer); updateCell(cellTitle, 'Request timed out'); updateCell(cellDesc, 'Request timed out'); },
        timeout: 10000
    });
};

// Handle fetch errors with retries
const handleError = (url, cellTitle, cellDesc, range, ua, status) => {
    if (ua === 'Googlebot') {
        fetchData(url, cellTitle, cellDesc, range, 'YandexBot');
    } else if (ua === 'YandexBot') {
        fetchData(url, cellTitle, cellDesc, range, null);
    } else {
        updateCell(cellTitle, `Error: ${status}`);
        updateCell(cellDesc, `Error: ${status}`);
    }
};

// Check if server supports range requests
const checkRangeSupport = url => new Promise(resolve => {
    const domain = new URL(url).origin;
    if (supportsRangeCache[domain] !== undefined) return resolve(supportsRangeCache[domain]);

    GM_xmlhttpRequest({
        method: 'HEAD',
        url,
        headers: getUserAgentHeaders('Googlebot'),
        onload: res => {
            const supports = /Accept-Ranges:\s*bytes/i.test(res.responseHeaders);
            supportsRangeCache[domain] = supports;
            resolve(supports);
        },
        onerror: () => { supportsRangeCache[domain] = false; resolve(false); }
    });
});

// Process each URL
const processUrl = async (url, cellTitle, cellDesc) => {
    const normalized = normalizeUrl(url);
    const domain = new URL(normalized).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        updateCell(cellTitle, '-');
        updateCell(cellDesc, '-');
        return;
    }

    if (cache[normalized]) {
        updateCell(cellTitle, cache[normalized].title);
        updateCell(cellDesc, cache[normalized].description || '-');
        return;
    }

    const supportsRange = await checkRangeSupport(normalized);
    fetchData(normalized, cellTitle, cellDesc, supportsRange ? 'bytes=0-1024' : null, 'Googlebot');
};

// Process the table by adding headers and cells
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        ['Title', 'Description'].forEach(text => {
            const th = document.createElement('th');
            th.textContent = text;
            th.classList.add(text.toLowerCase() + '-header');
            header.insertBefore(th, header.lastElementChild);
        });
    }

    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            const cells = ['title', 'description'].map(cls => {
                const td = document.createElement('td');
                td.classList.add(`${cls}-cell`);
                const div = document.createElement('div');
                td.appendChild(div);
                row.insertBefore(td, row.lastElementChild);
                return div;
            });

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, cells[0], cells[1]);
            else {
                updateCell(cells[0], '-');
                updateCell(cells[1], 'No link');
            }
        }
    });
};

// Refresh table based on updated skipDomains
const refreshTable = () => {
    document.querySelectorAll('table#myTable').forEach(table => {
        table.querySelectorAll('tbody tr').forEach(row => {
            const cellTitle = row.querySelector('.title-cell div');
            const cellDesc = row.querySelector('.description-cell div');
            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, cellTitle, cellDesc);
            else {
                updateCell(cellTitle, '-');
                updateCell(cellDesc, 'No link');
            }
        });
    });
};

// Initial processing
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to handle dynamic changes
const observer = new MutationObserver(() => document.querySelectorAll('table#myTable').forEach(processTable));
observer.observe(document.body, { childList: true, subtree: true });