Title Fetcher Optimized

Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only

当前为 2024-11-21 提交的版本,查看 最新版本

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.3
// @description  Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license MIT
// ==/UserScript==

// Initialize titles cache from localStorage
const titles = JSON.parse(localStorage.getItem('titles')) || {};
const skipDomains = ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"]; // Domains to exclude from fetching
const saveTitles = () => localStorage.setItem('titles', JSON.stringify(titles));

const supportsRangeCache = {}; // Cache to store whether a domain supports range requests

// Helper function to normalize the URL
const normalizeUrl = (url) => {
    return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};

// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
    const headers = {};
    if (userAgent) {
        const agentString = userAgent === 'Googlebot' ?
              'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
        'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
        headers['User-Agent'] = agentString;
        headers['X-User-Agent'] = agentString;
    }
    return headers;
};

// Helper function to update the cell with the title
const updateCellWithTitle = (cell, title) => {
    cell.textContent = title;
    cell.title = title;
};

// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
    cell.textContent = message;
    cell.title = message;
    cell.classList.add('GFD_title_error');
};

// Helper function to extract the title from response text
const extractTitle = (text) => {
    const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
    return match ? match[1].trim() : null;
};

// Helper function to start fetching timer
const startFetchingTimer = (cell) => {
    let seconds = 0;
    cell.textContent = `Fetching... ${seconds}`;
    cell.timerId = setInterval(() => {
        seconds += 1;
        cell.textContent = `Fetching... ${seconds}`;
    }, 1000);
    cell.fetchStartTime = Date.now();
};

// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cell) => {
    if (cell.timerId) {
        clearInterval(cell.timerId);
        cell.timerId = null;
    }
    const elapsedSeconds = Math.floor((Date.now() - cell.fetchStartTime) / 1000);
    return elapsedSeconds;
};

// Error handling function
const handleFetchError = (url, cell, range, userAgent, status) => {
    const elapsedSeconds = stopFetchingTimer(cell);
    if (userAgent === 'Googlebot') {
        // Retry with YandexBot user agent
        setTimeout(() => fetchTitleWithRange(url, cell, 'bytes=0-2048', 'YandexBot'), 1000);
    } else if (userAgent === 'YandexBot') {
        // Final fallback without specifying user agent
        fetchTitleWithRange(url, cell, 'bytes=0-2048', null);
    } else {
        updateCellWithError(cell, `Error: ${status}`);
    }
};

// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
    return new Promise((resolve) => {
        const domain = new URL(url).origin;
        if (supportsRangeCache[domain] !== undefined) {
            resolve(supportsRangeCache[domain]);
            return;
        }

        GM_xmlhttpRequest({
            method: 'HEAD',
            url: url,
            headers: getUserAgentHeaders('Googlebot'),
            onload: (res) => {
                const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
                const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
                supportsRangeCache[domain] = supportsRange;
                resolve(supportsRange);
            },
            onerror: () => {
                supportsRangeCache[domain] = false;
                resolve(false);
            }
        });
    });
};

// Function to fetch the title using GM_xmlhttpRequest with range requests
const fetchTitleWithRange = (url, cell, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
    const normalizedUrl = normalizeUrl(url);

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    startFetchingTimer(cell);

    GM_xmlhttpRequest({
        method: 'GET',
        url: normalizedUrl,
        headers: {
            'Range': range,
            ...getUserAgentHeaders(userAgent),
        },
        onload: (res) => {
            if (res.status === 206) { // Partial Content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else if (range === 'bytes=0-1024') {
                    // Try with a larger range
                    fetchTitleWithRange(url, cell, 'bytes=0-2048', userAgent);
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else if (res.status === 200) { // OK, full content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else {
                stopFetchingTimer(cell);
                handleFetchError(url, cell, range, userAgent, res.status);
            }
        },
        onerror: () => {
            stopFetchingTimer(cell);
            handleFetchError(url, cell, range, userAgent, 'Network Error');
        },
        ontimeout: () => {
            stopFetchingTimer(cell);
            updateCellWithError(cell, `Request timed out`);
        },
        timeout: 10000 // 10 seconds timeout
    });
};

// Main function to process each URL
const processUrl = async (url, cell) => {
    const normalizedUrl = normalizeUrl(url);
    const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        cell.textContent = '-';
        cell.title = 'Skipped';
        return;
    }

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    const supportsRange = await checkSupportsRange(normalizedUrl);
    if (supportsRange) {
        // Use range requests with GM_xmlhttpRequest
        fetchTitleWithRange(normalizedUrl, cell, 'bytes=0-1024', 'Googlebot');
    } else {
        // Server does not support range requests, attempt without range
        fetchTitleWithRange(normalizedUrl, cell, null, 'Googlebot');
    }
};

// Function to process the table
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        const th = document.createElement('th');
        th.textContent = 'Title';
        th.classList.add('title-header');
        header.insertBefore(th, header.lastElementChild);
    }
    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            const cell = document.createElement('td');
            cell.classList.add('title-cell');
            row.insertBefore(cell, row.lastElementChild);

            const div = document.createElement('div');
            div.classList.add('title-content');
            cell.appendChild(div);

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, div);
        }
    });
};

// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
    document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });