Title Fetcher Optimized

Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only

当前为 2024-12-01 提交的版本,查看 最新版本

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.4.3
// @description  Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license      MIT
// ==/UserScript==

const cache = JSON.parse(localStorage.getItem('cache')) || {};
const MAX_CACHE_SIZE = 5000;

// Trim cache if necessary
if (Object.keys(cache).length > MAX_CACHE_SIZE) {
    Object.keys(cache).slice(0, Object.keys(cache).length - MAX_CACHE_SIZE).forEach(k => delete cache[k]);
    localStorage.setItem('cache', JSON.stringify(cache));
}

const saveCache = () => localStorage.setItem('cache', JSON.stringify(cache));
const supportsRangeCache = {};
let skipDomains = JSON.parse(localStorage.getItem('GFD_skipDomains')) || ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"];

// Create and append textarea for skipDomains
const textarea = Object.assign(document.createElement('textarea'), {
    value: skipDomains.join(', '),
    title: "Домены для которых никогда не собирать тайтл и деск"
});
document.querySelector("#navbar-header").appendChild(textarea);

textarea.addEventListener('input', () => {
    skipDomains = textarea.value.split(',').map(d => d.trim()).filter(Boolean);
    localStorage.setItem('GFD_skipDomains', JSON.stringify(skipDomains));
    refreshTable();
});

// Normalize URLs
const normalizeUrl = url => /^https?:\/\//i.test(url.trim()) ? url.trim() : `http://${url.trim()}`;

// Get headers based on user agent
const getUserAgentHeaders = ua => {
    const agents = {
        'Googlebot': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)',
        'YandexBot': 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'
    };
    return ua ? { 'User-Agent': agents[ua], 'X-User-Agent': agents[ua] } : {};
};

// Decode HTML entities
const decodeEntities = str => {
    const entities = {
        '&nbsp;': ' ', '&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"', '&apos;': "'",
        '&copy;': '©', '&reg;': '®', '&euro;': '€', '&trade;': '™', '&mdash;': '—', '&ndash;': '–',
        '&uarr;': '↑', '&darr;': '↓', '&larr;': '←', '&rarr;': '→', '&harr;': '↔', '&bull;': '•',
        '&hellip;': '…', '&laquo;': '«', '&raquo;': '»', '&lsquo;': '‘', '&rsquo;': '’',
        '&ldquo;': '“', '&rdquo;': '”', '&frasl;': '⁄', '&times;': '×', '&divide;': '÷', '&para;': '¶'
    };
    return str.replace(/&amp;#(\d+);|&#(\d+);|&\w+;/g, (match, dec1, dec2) => {
        if (dec1) return String.fromCharCode(dec1);
        if (dec2) return String.fromCharCode(dec2);
        return entities[match] || match;
    });
};

// Update cell content
const updateCell = (cell, text) => {
    cell.textContent = text;
    cell.title = text;
    if (text.startsWith('Error')) cell.classList.add('GFD_title_error');
};

// Extract title and description from HTML
const extractContent = text => ({
    title: (text.match(/<title[^>]*>([^<]*)<\/title>/i) || [])[1]?.trim(),
    description: (text.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i) || [])[1]?.trim()
});

// Fetch data with optional range and user agent
const fetchData = (url, cellTitle, cellDesc, range, ua) => {
    if (cache[url]) {
        updateCell(cellTitle, cache[url].title);
        updateCell(cellDesc, cache[url].description || '-');
        return;
    }

    cellTitle.textContent = cellDesc.textContent = 'Fetching... 0';
    let seconds = 0;
    const timer = setInterval(() => {
        seconds++;
        cellTitle.textContent = `Fetching... ${seconds}`;
        cellDesc.textContent = `Fetching... ${seconds}`;
    }, 1000);

    GM_xmlhttpRequest({
        method: 'GET',
        url,
        headers: { ...(range ? { 'Range': range } : {}), ...getUserAgentHeaders(ua) },
        onload: res => {
            clearInterval(timer);
            if ([200, 206].includes(res.status)) {
                const { title, description } = extractContent(res.responseText);
                if (title) {
                    updateCell(cellTitle, decodeEntities(title));
                    updateCell(cellDesc, decodeEntities(description || '-'));
                    cache[url] = { title: decodeEntities(title), description: decodeEntities(description || '-') };
                    saveCache();
                } else if (range === 'bytes=0-1024') {
                    fetchData(url, cellTitle, cellDesc, 'bytes=0-2048', ua);
                } else {
                    updateCell(cellTitle, 'Title not found');
                    updateCell(cellDesc, 'Description not found');
                }
            } else handleError(url, cellTitle, cellDesc, range, ua, res.status);
        },
        onerror: () => { clearInterval(timer); handleError(url, cellTitle, cellDesc, range, ua, 'Network Error'); },
        ontimeout: () => { clearInterval(timer); updateCell(cellTitle, 'Request timed out'); updateCell(cellDesc, 'Request timed out'); },
        timeout: 10000
    });
};

// Handle fetch errors with retries
const handleError = (url, cellTitle, cellDesc, range, ua, status) => {
    if (ua === 'Googlebot') {
        fetchData(url, cellTitle, cellDesc, range, 'YandexBot');
    } else if (ua === 'YandexBot') {
        fetchData(url, cellTitle, cellDesc, range, null);
    } else {
        updateCell(cellTitle, `Error: ${status}`);
        updateCell(cellDesc, `Error: ${status}`);
    }
};

// Check if server supports range requests
const checkRangeSupport = url => new Promise(resolve => {
    const domain = new URL(url).origin;
    if (supportsRangeCache[domain] !== undefined) return resolve(supportsRangeCache[domain]);

    GM_xmlhttpRequest({
        method: 'HEAD',
        url,
        headers: getUserAgentHeaders('Googlebot'),
        onload: res => {
            const supports = /Accept-Ranges:\s*bytes/i.test(res.responseHeaders);
            supportsRangeCache[domain] = supports;
            resolve(supports);
        },
        onerror: () => { supportsRangeCache[domain] = false; resolve(false); }
    });
});

// Process each URL
const processUrl = async (url, cellTitle, cellDesc) => {
    const normalized = normalizeUrl(url);
    const domain = new URL(normalized).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        updateCell(cellTitle, '-');
        updateCell(cellDesc, '-');
        return;
    }

    if (cache[normalized]) {
        updateCell(cellTitle, cache[normalized].title);
        updateCell(cellDesc, cache[normalized].description || '-');
        return;
    }

    const supportsRange = await checkRangeSupport(normalized);
    fetchData(normalized, cellTitle, cellDesc, supportsRange ? 'bytes=0-1024' : null, 'Googlebot');
};

// Process the table by adding headers and cells
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        ['Title', 'Description'].forEach(text => {
            const th = document.createElement('th');
            th.textContent = text;
            th.classList.add(text.toLowerCase() + '-header');
            header.insertBefore(th, header.lastElementChild);
        });
    }

    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            const cells = ['title', 'description'].map(cls => {
                const td = document.createElement('td');
                td.classList.add(`${cls}-cell`);
                const div = document.createElement('div');
                td.appendChild(div);
                row.insertBefore(td, row.lastElementChild);
                return div;
            });

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, cells[0], cells[1]);
            else {
                updateCell(cells[0], '-');
                updateCell(cells[1], 'No link');
            }
        }
    });
};

// Refresh table based on updated skipDomains
const refreshTable = () => {
    document.querySelectorAll('table#myTable').forEach(table => {
        table.querySelectorAll('tbody tr').forEach(row => {
            const cellTitle = row.querySelector('.title-cell div');
            const cellDesc = row.querySelector('.description-cell div');
            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, cellTitle, cellDesc);
            else {
                updateCell(cellTitle, '-');
                updateCell(cellDesc, 'No link');
            }
        });
    });
};

// Initial processing
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to handle dynamic changes
const observer = new MutationObserver(() => document.querySelectorAll('table#myTable').forEach(processTable));
observer.observe(document.body, { childList: true, subtree: true });