Title Fetcher Optimized

Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only

当前为 2024-11-21 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.3
// @description  Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license MIT
// ==/UserScript==

// Initialize titles cache from localStorage
const titles = JSON.parse(localStorage.getItem('titles')) || {};
const skipDomains = ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"]; // Domains to exclude from fetching
const saveTitles = () => localStorage.setItem('titles', JSON.stringify(titles));

const supportsRangeCache = {}; // Cache to store whether a domain supports range requests

// Helper function to normalize the URL
const normalizeUrl = (url) => {
    return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};

// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
    const headers = {};
    if (userAgent) {
        const agentString = userAgent === 'Googlebot' ?
              'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
        'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
        headers['User-Agent'] = agentString;
        headers['X-User-Agent'] = agentString;
    }
    return headers;
};

// Helper function to update the cell with the title
const updateCellWithTitle = (cell, title) => {
    cell.textContent = title;
    cell.title = title;
};

// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
    cell.textContent = message;
    cell.title = message;
    cell.classList.add('GFD_title_error');
};

// Helper function to extract the title from response text
const extractTitle = (text) => {
    const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
    return match ? match[1].trim() : null;
};

// Helper function to start fetching timer
const startFetchingTimer = (cell) => {
    let seconds = 0;
    cell.textContent = `Fetching... ${seconds}`;
    cell.timerId = setInterval(() => {
        seconds += 1;
        cell.textContent = `Fetching... ${seconds}`;
    }, 1000);
    cell.fetchStartTime = Date.now();
};

// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cell) => {
    if (cell.timerId) {
        clearInterval(cell.timerId);
        cell.timerId = null;
    }
    const elapsedSeconds = Math.floor((Date.now() - cell.fetchStartTime) / 1000);
    return elapsedSeconds;
};

// Error handling function
const handleFetchError = (url, cell, range, userAgent, status) => {
    const elapsedSeconds = stopFetchingTimer(cell);
    if (userAgent === 'Googlebot') {
        // Retry with YandexBot user agent
        setTimeout(() => fetchTitleWithRange(url, cell, 'bytes=0-2048', 'YandexBot'), 1000);
    } else if (userAgent === 'YandexBot') {
        // Final fallback without specifying user agent
        fetchTitleWithRange(url, cell, 'bytes=0-2048', null);
    } else {
        updateCellWithError(cell, `Error: ${status}`);
    }
};

// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
    return new Promise((resolve) => {
        const domain = new URL(url).origin;
        if (supportsRangeCache[domain] !== undefined) {
            resolve(supportsRangeCache[domain]);
            return;
        }

        GM_xmlhttpRequest({
            method: 'HEAD',
            url: url,
            headers: getUserAgentHeaders('Googlebot'),
            onload: (res) => {
                const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
                const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
                supportsRangeCache[domain] = supportsRange;
                resolve(supportsRange);
            },
            onerror: () => {
                supportsRangeCache[domain] = false;
                resolve(false);
            }
        });
    });
};

// Function to fetch the title using GM_xmlhttpRequest with range requests
const fetchTitleWithRange = (url, cell, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
    const normalizedUrl = normalizeUrl(url);

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    startFetchingTimer(cell);

    GM_xmlhttpRequest({
        method: 'GET',
        url: normalizedUrl,
        headers: {
            'Range': range,
            ...getUserAgentHeaders(userAgent),
        },
        onload: (res) => {
            if (res.status === 206) { // Partial Content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else if (range === 'bytes=0-1024') {
                    // Try with a larger range
                    fetchTitleWithRange(url, cell, 'bytes=0-2048', userAgent);
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else if (res.status === 200) { // OK, full content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else {
                stopFetchingTimer(cell);
                handleFetchError(url, cell, range, userAgent, res.status);
            }
        },
        onerror: () => {
            stopFetchingTimer(cell);
            handleFetchError(url, cell, range, userAgent, 'Network Error');
        },
        ontimeout: () => {
            stopFetchingTimer(cell);
            updateCellWithError(cell, `Request timed out`);
        },
        timeout: 10000 // 10 seconds timeout
    });
};

// Main function to process each URL
const processUrl = async (url, cell) => {
    const normalizedUrl = normalizeUrl(url);
    const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        cell.textContent = '-';
        cell.title = 'Skipped';
        return;
    }

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    const supportsRange = await checkSupportsRange(normalizedUrl);
    if (supportsRange) {
        // Use range requests with GM_xmlhttpRequest
        fetchTitleWithRange(normalizedUrl, cell, 'bytes=0-1024', 'Googlebot');
    } else {
        // Server does not support range requests, attempt without range
        fetchTitleWithRange(normalizedUrl, cell, null, 'Googlebot');
    }
};

// Function to process the table
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        const th = document.createElement('th');
        th.textContent = 'Title';
        th.classList.add('title-header');
        header.insertBefore(th, header.lastElementChild);
    }
    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            const cell = document.createElement('td');
            cell.classList.add('title-cell');
            row.insertBefore(cell, row.lastElementChild);

            const div = document.createElement('div');
            div.classList.add('title-content');
            cell.appendChild(div);

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, div);
        }
    });
};

// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
    document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });