// ==UserScript==
// @name Title Fetcher Optimized
// @namespace coolakov
// @version 2.4.3
// @description Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only
// @author GreatFireDragon
// @match https://coolakov.ru/tools/most_promoted/
// @grant GM_xmlhttpRequest
// @connect *
// @icon https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at document-end
// @license MIT
// ==/UserScript==
const cache = JSON.parse(localStorage.getItem('cache')) || {};
const MAX_CACHE_SIZE = 5000;
// Trim cache if necessary
if (Object.keys(cache).length > MAX_CACHE_SIZE) {
Object.keys(cache).slice(0, Object.keys(cache).length - MAX_CACHE_SIZE).forEach(k => delete cache[k]);
localStorage.setItem('cache', JSON.stringify(cache));
}
const saveCache = () => localStorage.setItem('cache', JSON.stringify(cache));
const supportsRangeCache = {};
let skipDomains = JSON.parse(localStorage.getItem('GFD_skipDomains')) || ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"];
// Create and append textarea for skipDomains
const textarea = Object.assign(document.createElement('textarea'), {
value: skipDomains.join(', '),
title: "Домены для которых никогда не собирать тайтл и деск"
});
document.querySelector("#navbar-header").appendChild(textarea);
textarea.addEventListener('input', () => {
skipDomains = textarea.value.split(',').map(d => d.trim()).filter(Boolean);
localStorage.setItem('GFD_skipDomains', JSON.stringify(skipDomains));
refreshTable();
});
// Normalize URLs
const normalizeUrl = url => /^https?:\/\//i.test(url.trim()) ? url.trim() : `http://${url.trim()}`;
// Get headers based on user agent
const getUserAgentHeaders = ua => {
const agents = {
'Googlebot': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)',
'YandexBot': 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'
};
return ua ? { 'User-Agent': agents[ua], 'X-User-Agent': agents[ua] } : {};
};
// Decode HTML entities
const decodeEntities = str => {
const entities = {
' ': ' ', '&': '&', '<': '<', '>': '>', '"': '"', ''': "'",
'©': '©', '®': '®', '€': '€', '™': '™', '—': '—', '–': '–',
'↑': '↑', '↓': '↓', '←': '←', '→': '→', '↔': '↔', '•': '•',
'…': '…', '«': '«', '»': '»', '‘': '‘', '’': '’',
'“': '“', '”': '”', '⁄': '⁄', '×': '×', '÷': '÷', '¶': '¶'
};
return str.replace(/&#(\d+);|&#(\d+);|&\w+;/g, (match, dec1, dec2) => {
if (dec1) return String.fromCharCode(dec1);
if (dec2) return String.fromCharCode(dec2);
return entities[match] || match;
});
};
// Update cell content
const updateCell = (cell, text) => {
cell.textContent = text;
cell.title = text;
if (text.startsWith('Error')) cell.classList.add('GFD_title_error');
};
// Extract title and description from HTML
const extractContent = text => ({
title: (text.match(/<title[^>]*>([^<]*)<\/title>/i) || [])[1]?.trim(),
description: (text.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i) || [])[1]?.trim()
});
// Fetch data with optional range and user agent
const fetchData = (url, cellTitle, cellDesc, range, ua) => {
if (cache[url]) {
updateCell(cellTitle, cache[url].title);
updateCell(cellDesc, cache[url].description || '-');
return;
}
cellTitle.textContent = cellDesc.textContent = 'Fetching... 0';
let seconds = 0;
const timer = setInterval(() => {
seconds++;
cellTitle.textContent = `Fetching... ${seconds}`;
cellDesc.textContent = `Fetching... ${seconds}`;
}, 1000);
GM_xmlhttpRequest({
method: 'GET',
url,
headers: { ...(range ? { 'Range': range } : {}), ...getUserAgentHeaders(ua) },
onload: res => {
clearInterval(timer);
if ([200, 206].includes(res.status)) {
const { title, description } = extractContent(res.responseText);
if (title) {
updateCell(cellTitle, decodeEntities(title));
updateCell(cellDesc, decodeEntities(description || '-'));
cache[url] = { title: decodeEntities(title), description: decodeEntities(description || '-') };
saveCache();
} else if (range === 'bytes=0-1024') {
fetchData(url, cellTitle, cellDesc, 'bytes=0-2048', ua);
} else {
updateCell(cellTitle, 'Title not found');
updateCell(cellDesc, 'Description not found');
}
} else handleError(url, cellTitle, cellDesc, range, ua, res.status);
},
onerror: () => { clearInterval(timer); handleError(url, cellTitle, cellDesc, range, ua, 'Network Error'); },
ontimeout: () => { clearInterval(timer); updateCell(cellTitle, 'Request timed out'); updateCell(cellDesc, 'Request timed out'); },
timeout: 10000
});
};
// Handle fetch errors with retries
const handleError = (url, cellTitle, cellDesc, range, ua, status) => {
if (ua === 'Googlebot') {
fetchData(url, cellTitle, cellDesc, range, 'YandexBot');
} else if (ua === 'YandexBot') {
fetchData(url, cellTitle, cellDesc, range, null);
} else {
updateCell(cellTitle, `Error: ${status}`);
updateCell(cellDesc, `Error: ${status}`);
}
};
// Check if server supports range requests
const checkRangeSupport = url => new Promise(resolve => {
const domain = new URL(url).origin;
if (supportsRangeCache[domain] !== undefined) return resolve(supportsRangeCache[domain]);
GM_xmlhttpRequest({
method: 'HEAD',
url,
headers: getUserAgentHeaders('Googlebot'),
onload: res => {
const supports = /Accept-Ranges:\s*bytes/i.test(res.responseHeaders);
supportsRangeCache[domain] = supports;
resolve(supports);
},
onerror: () => { supportsRangeCache[domain] = false; resolve(false); }
});
});
// Process each URL
const processUrl = async (url, cellTitle, cellDesc) => {
const normalized = normalizeUrl(url);
const domain = new URL(normalized).hostname.replace(/^www\./, '');
if (skipDomains.includes(domain)) {
updateCell(cellTitle, '-');
updateCell(cellDesc, '-');
return;
}
if (cache[normalized]) {
updateCell(cellTitle, cache[normalized].title);
updateCell(cellDesc, cache[normalized].description || '-');
return;
}
const supportsRange = await checkRangeSupport(normalized);
fetchData(normalized, cellTitle, cellDesc, supportsRange ? 'bytes=0-1024' : null, 'Googlebot');
};
// Process the table by adding headers and cells
const processTable = table => {
const header = table.querySelector('thead tr');
if (header && !header.querySelector('.title-header')) {
['Title', 'Description'].forEach(text => {
const th = document.createElement('th');
th.textContent = text;
th.classList.add(text.toLowerCase() + '-header');
header.insertBefore(th, header.lastElementChild);
});
}
table.querySelectorAll('tbody tr').forEach(row => {
if (!row.querySelector('.title-cell')) {
const cells = ['title', 'description'].map(cls => {
const td = document.createElement('td');
td.classList.add(`${cls}-cell`);
const div = document.createElement('div');
td.appendChild(div);
row.insertBefore(td, row.lastElementChild);
return div;
});
const link = row.cells[1]?.querySelector('a');
if (link) processUrl(link.href, cells[0], cells[1]);
else {
updateCell(cells[0], '-');
updateCell(cells[1], 'No link');
}
}
});
};
// Refresh table based on updated skipDomains
const refreshTable = () => {
document.querySelectorAll('table#myTable').forEach(table => {
table.querySelectorAll('tbody tr').forEach(row => {
const cellTitle = row.querySelector('.title-cell div');
const cellDesc = row.querySelector('.description-cell div');
const link = row.cells[1]?.querySelector('a');
if (link) processUrl(link.href, cellTitle, cellDesc);
else {
updateCell(cellTitle, '-');
updateCell(cellDesc, 'No link');
}
});
});
};
// Initial processing
document.querySelectorAll('table#myTable').forEach(processTable);
// Observe mutations to handle dynamic changes
const observer = new MutationObserver(() => document.querySelectorAll('table#myTable').forEach(processTable));
observer.observe(document.body, { childList: true, subtree: true });