Title Fetcher Optimized

Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only

目前為 2024-11-28 提交的版本,檢視 最新版本

// ==UserScript==
// @name         Title Fetcher Optimized
// @namespace    coolakov
// @version      2.4.2
// @description  Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only
// @author       GreatFireDragon
// @match        https://coolakov.ru/tools/most_promoted/
// @grant        GM_xmlhttpRequest
// @connect      *
// @icon         https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at       document-end
// @license      MIT
// ==/UserScript==

// Initialize cache from localStorage
const cache = JSON.parse(localStorage.getItem('cache')) || {};
const saveCache = () => localStorage.setItem('cache', JSON.stringify(cache));
const supportsRangeCache = {}; // Cache to store whether a domain supports range requests
// Get the skip domains from localStorage (or use default if not set)
const storedDomains = localStorage.getItem('GFD_skipDomains');
const skipDomains = storedDomains ? JSON.parse(storedDomains) : ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"];
const textarea = document.createElement('textarea');
textarea.value = skipDomains.join(', ');
textarea.title = "Домены для которых никогда не собирать татйл и деск";
document.querySelector("#navbar-header").appendChild(textarea);
textarea.addEventListener('change', () => {
    const domains = textarea.value.split(',').map(d => d.trim()).filter(Boolean);
    localStorage.setItem('GFD_skipDomains', JSON.stringify(domains));
});
// Update skipDomains array on page load to match localStorage
const updatedSkipDomains = JSON.parse(localStorage.getItem('GFD_skipDomains') || '[]');
if (updatedSkipDomains.length > 0) {
    skipDomains.length = 0; // Clear existing domains
    Array.prototype.push.apply(skipDomains, updatedSkipDomains);
}
console.log('Skip Domains:', skipDomains);



// Helper function to normalize the URL
const normalizeUrl = (url) => {
    return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};

// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
    const headers = {};
    if (userAgent) {
        const agentString = userAgent === 'Googlebot' ?
              'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
        'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
        headers['User-Agent'] = agentString;
        headers['X-User-Agent'] = agentString;
    }
    return headers;
};

// Helper function to update the title cell
const updateTitleCell = (cell, title) => {
    cell.textContent = title;
    cell.title = title;
};

// Helper function to update the description cell
const updateDescriptionCell = (cell, description) => {
    cell.textContent = description;
    cell.title = description;
};

// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
    cell.textContent = message;
    cell.title = message;
    cell.classList.add('GFD_title_error');
};

// Helper function to extract the title from response text
const extractTitle = (text) => {
    const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
    return match ? match[1].trim() : null;
};

// Helper function to extract the description from response text
const extractDescription = (text) => {
    const match = text.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
    return match ? match[1].trim() : null;
};

// Helper function to start fetching timer
const startFetchingTimer = (cellTitle, cellDescription) => {
    let seconds = 0;
    cellTitle.textContent = `Fetching... ${seconds}`;
    cellDescription.textContent = `Fetching... ${seconds}`;
    cellTitle.timerId = setInterval(() => {
        seconds += 1;
        cellTitle.textContent = `Fetching... ${seconds}`;
        cellDescription.textContent = `Fetching... ${seconds}`;
    }, 1000);
    cellTitle.fetchStartTime = Date.now();
};

// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cellTitle, cellDescription) => {
    if (cellTitle.timerId) {
        clearInterval(cellTitle.timerId);
        cellTitle.timerId = null;
    }
    const elapsedSeconds = Math.floor((Date.now() - cellTitle.fetchStartTime) / 1000);
    return elapsedSeconds;
};

// Error handling function
const handleFetchError = (url, cellTitle, cellDescription, range, userAgent, status) => {
    const elapsedSeconds = stopFetchingTimer(cellTitle, cellDescription);
    if (userAgent === 'Googlebot') {
        // Retry with YandexBot user agent
        setTimeout(() => fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', 'YandexBot'), 1000);
    } else if (userAgent === 'YandexBot') {
        // Final fallback without specifying user agent
        fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', null);
    } else {
        updateCellWithError(cellTitle, `Error: ${status}`);
        updateCellWithError(cellDescription, `Error: ${status}`);
    }
};

// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
    return new Promise((resolve) => {
        const domain = new URL(url).origin;
        if (supportsRangeCache[domain] !== undefined) {
            resolve(supportsRangeCache[domain]);
            return;
        }

        GM_xmlhttpRequest({
            method: 'HEAD',
            url: url,
            headers: getUserAgentHeaders('Googlebot'),
            onload: (res) => {
                const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
                const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
                supportsRangeCache[domain] = supportsRange;
                resolve(supportsRange);
            },
            onerror: () => {
                supportsRangeCache[domain] = false;
                resolve(false);
            }
        });
    });
};

// Function to fetch the title and description using GM_xmlhttpRequest with range requests
const fetchDataWithRange = (url, cellTitle, cellDescription, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
    const normalizedUrl = normalizeUrl(url);

    if (cache[normalizedUrl]) {
        updateTitleCell(cellTitle, cache[normalizedUrl].title);
        updateDescriptionCell(cellDescription, cache[normalizedUrl].description || '-');
        return;
    }

    startFetchingTimer(cellTitle, cellDescription);

    GM_xmlhttpRequest({
        method: 'GET',
        url: normalizedUrl,
        headers: {
            'Range': range,
            ...getUserAgentHeaders(userAgent),
        },
        onload: (res) => {
            if (res.status === 206 || res.status === 200) { // Partial Content or OK
                const title = extractTitle(res.responseText);
                const description = extractDescription(res.responseText);
                if (title) {
                    stopFetchingTimer(cellTitle, cellDescription);
                    updateTitleCell(cellTitle, title);
                    updateDescriptionCell(cellDescription, description || '-');
                    cache[normalizedUrl] = { title, description: description || '-' };
                    saveCache();
                } else if (range === 'bytes=0-1024') {
                    // Try with a larger range
                    fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', userAgent);
                } else {
                    stopFetchingTimer(cellTitle, cellDescription);
                    updateCellWithError(cellTitle, `Title not found`);
                    updateCellWithError(cellDescription, `Description not found`);
                }
            } else {
                stopFetchingTimer(cellTitle, cellDescription);
                handleFetchError(url, cellTitle, cellDescription, range, userAgent, res.status);
            }
        },
        onerror: () => {
            stopFetchingTimer(cellTitle, cellDescription);
            handleFetchError(url, cellTitle, cellDescription, range, userAgent, 'Network Error');
        },
        ontimeout: () => {
            stopFetchingTimer(cellTitle, cellDescription);
            updateCellWithError(cellTitle, `Request timed out`);
            updateCellWithError(cellDescription, `Request timed out`);
        },
        timeout: 10000 // 10 seconds timeout
    });
};

// Main function to process each URL
const processUrl = async (url, cellTitle, cellDescription) => {
    const normalizedUrl = normalizeUrl(url);
    const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');

    if (skipDomains.includes(domain)) {
        updateTitleCell(cellTitle, '-');
        updateDescriptionCell(cellDescription, '-');
        return;
    }

    if (cache[normalizedUrl]) {
        updateTitleCell(cellTitle, cache[normalizedUrl].title);
        updateDescriptionCell(cellDescription, cache[normalizedUrl].description || '-');
        return;
    }

    const supportsRange = await checkSupportsRange(normalizedUrl);
    if (supportsRange) {
        // Use range requests with GM_xmlhttpRequest
        fetchDataWithRange(normalizedUrl, cellTitle, cellDescription, 'bytes=0-1024', 'Googlebot');
    } else {
        // Server does not support range requests, attempt without range
        fetchDataWithRange(normalizedUrl, cellTitle, cellDescription, null, 'Googlebot');
    }
};

// Function to process the table
const processTable = table => {
    const header = table.querySelector('thead tr');
    if (header && !header.querySelector('.title-header')) {
        // Insert Title header
        const thTitle = document.createElement('th');
        thTitle.textContent = 'Title';
        thTitle.classList.add('title-header');
        header.insertBefore(thTitle, header.lastElementChild);

        // Insert Description header
        const thDescription = document.createElement('th');
        thDescription.textContent = 'Description';
        thDescription.classList.add('description-header');
        header.insertBefore(thDescription, header.lastElementChild);
    }

    table.querySelectorAll('tbody tr').forEach(row => {
        if (!row.querySelector('.title-cell')) {
            // Insert Title cell
            const cellTitle = document.createElement('td');
            cellTitle.classList.add('title-cell');
            row.insertBefore(cellTitle, row.lastElementChild);
            const cellTitleDiv = cellTitle.appendChild(document.createElement('div'));

            // Insert Description cell
            const cellDescription = document.createElement('td');
            cellDescription.classList.add('description-cell');
            row.insertBefore(cellDescription, row.lastElementChild);
            const cellDescriptionDiv = cellDescription.appendChild(document.createElement('div'));

            const link = row.cells[1]?.querySelector('a');
            if (link) processUrl(link.href, cellTitleDiv, cellDescriptionDiv);
            else {
                updateTitleCell(cellTitle, '-');
                updateDescriptionCell(cellDescription, 'No link');
            }
        }
    });
};

// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);

// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
    document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });