Title Fetcher Optimized

Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only

目前為 2024-11-21 提交的版本,檢視 最新版本

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.3
// @description  Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license MIT
// ==/UserScript==

// Initialize titles cache from localStorage
const titles = JSON.parse(localStorage.getItem('titles')) || {};
const skipDomains = ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"]; // Domains to exclude from fetching
const saveTitles = () => localStorage.setItem('titles', JSON.stringify(titles));

const supportsRangeCache = {}; // Cache to store whether a domain supports range requests

// Helper function to normalize the URL
const normalizeUrl = (url) => {
    return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};

// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
    const headers = {};
    if (userAgent) {
        const agentString = userAgent === 'Googlebot' ?
              'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
        'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
        headers['User-Agent'] = agentString;
        headers['X-User-Agent'] = agentString;
    }
    return headers;
};

// Helper function to update the cell with the title
const updateCellWithTitle = (cell, title) => {
    cell.textContent = title;
    cell.title = title;
};

// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
    cell.textContent = message;
    cell.title = message;
    cell.classList.add('GFD_title_error');
};

// Helper function to extract the title from response text
const extractTitle = (text) => {
    const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
    return match ? match[1].trim() : null;
};

// Helper function to start fetching timer
const startFetchingTimer = (cell) => {
    let seconds = 0;
    cell.textContent = `Fetching... ${seconds}`;
    cell.timerId = setInterval(() => {
        seconds += 1;
        cell.textContent = `Fetching... ${seconds}`;
    }, 1000);
    cell.fetchStartTime = Date.now();
};

// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cell) => {
    if (cell.timerId) {
        clearInterval(cell.timerId);
        cell.timerId = null;
    }
    const elapsedSeconds = Math.floor((Date.now() - cell.fetchStartTime) / 1000);
    return elapsedSeconds;
};

// Error handling function
const handleFetchError = (url, cell, range, userAgent, status) => {
    const elapsedSeconds = stopFetchingTimer(cell);
    if (userAgent === 'Googlebot') {
        // Retry with YandexBot user agent
        setTimeout(() => fetchTitleWithRange(url, cell, 'bytes=0-2048', 'YandexBot'), 1000);
    } else if (userAgent === 'YandexBot') {
        // Final fallback without specifying user agent
        fetchTitleWithRange(url, cell, 'bytes=0-2048', null);
    } else {
        updateCellWithError(cell, `Error: ${status}`);
    }
};

// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
    return new Promise((resolve) => {
        const domain = new URL(url).origin;
        if (supportsRangeCache[domain] !== undefined) {
            resolve(supportsRangeCache[domain]);
            return;
        }

        GM_xmlhttpRequest({
            method: 'HEAD',
            url: url,
            headers: getUserAgentHeaders('Googlebot'),
            onload: (res) => {
                const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
                const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
                supportsRangeCache[domain] = supportsRange;
                resolve(supportsRange);
            },
            onerror: () => {
                supportsRangeCache[domain] = false;
                resolve(false);
            }
        });
    });
};

// Function to fetch the title using GM_xmlhttpRequest with range requests
const fetchTitleWithRange = (url, cell, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
    const normalizedUrl = normalizeUrl(url);

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    startFetchingTimer(cell);

    GM_xmlhttpRequest({
        method: 'GET',
        url: normalizedUrl,
        headers: {
            'Range': range,
            ...getUserAgentHeaders(userAgent),
        },
        onload: (res) => {
            if (res.status === 206) { // Partial Content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else if (range === 'bytes=0-1024') {
                    // Try with a larger range
                    fetchTitleWithRange(url, cell, 'bytes=0-2048', userAgent);
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else if (res.status === 200) { // OK, full content
                const title = extractTitle(res.responseText);
                if (title) {
                    stopFetchingTimer(cell);
                    updateCellWithTitle(cell, title);
                    titles[normalizedUrl] = title;
                    saveTitles();
                } else {
                    stopFetchingTimer(cell);
                    updateCellWithError(cell, `Title not found`);
                }
            } else {
                stopFetchingTimer(cell);
                handleFetchError(url, cell, range, userAgent, res.status);
            }
        },
        onerror: () => {
            stopFetchingTimer(cell);
            handleFetchError(url, cell, range, userAgent, 'Network Error');
        },
        ontimeout: () => {
            stopFetchingTimer(cell);
            updateCellWithError(cell, `Request timed out`);
        },
        timeout: 10000 // 10 seconds timeout
    });
};

// Main function to process each URL
const processUrl = async (url, cell) => {
    const normalizedUrl = normalizeUrl(url);
    const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        cell.textContent = '-';
        cell.title = 'Skipped';
        return;
    }

    if (titles[normalizedUrl]) {
        updateCellWithTitle(cell, titles[normalizedUrl]);
        return;
    }

    const supportsRange = await checkSupportsRange(normalizedUrl);
    if (supportsRange) {
        // Use range requests with GM_xmlhttpRequest
        fetchTitleWithRange(normalizedUrl, cell, 'bytes=0-1024', 'Googlebot');
    } else {
        // Server does not support range requests, attempt without range
        fetchTitleWithRange(normalizedUrl, cell, null, 'Googlebot');
    }
};

// Function to process the table
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        const th = document.createElement('th');
        th.textContent = 'Title';
        th.classList.add('title-header');
        header.insertBefore(th, header.lastElementChild);
    }
    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            const cell = document.createElement('td');
            cell.classList.add('title-cell');
            row.insertBefore(cell, row.lastElementChild);

            const div = document.createElement('div');
            div.classList.add('title-content');
            cell.appendChild(div);

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, div);
        }
    });
};

// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
    document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });