// ==UserScript==
// @name Title Fetcher Optimized
// @namespace coolakov
// @version 2.3
// @description Observes table changes and adds a new column with fetched link titles efficiently using GM_xmlhttpRequest only
// @author GreatFireDragon
// @match https://coolakov.ru/tools/most_promoted/
// @grant GM_xmlhttpRequest
// @connect *
// @icon https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at document-end
// @license MIT
// ==/UserScript==
// Initialize titles cache from localStorage
const titles = JSON.parse(localStorage.getItem('titles')) || {};
const skipDomains = ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"]; // Domains to exclude from fetching
const saveTitles = () => localStorage.setItem('titles', JSON.stringify(titles));
const supportsRangeCache = {}; // Cache to store whether a domain supports range requests
// Helper function to normalize the URL
const normalizeUrl = (url) => {
return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};
// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
const headers = {};
if (userAgent) {
const agentString = userAgent === 'Googlebot' ?
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
headers['User-Agent'] = agentString;
headers['X-User-Agent'] = agentString;
}
return headers;
};
// Helper function to update the cell with the title
const updateCellWithTitle = (cell, title) => {
cell.textContent = title;
cell.title = title;
};
// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
cell.textContent = message;
cell.title = message;
cell.classList.add('GFD_title_error');
};
// Helper function to extract the title from response text
const extractTitle = (text) => {
const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
return match ? match[1].trim() : null;
};
// Helper function to start fetching timer
const startFetchingTimer = (cell) => {
let seconds = 0;
cell.textContent = `Fetching... ${seconds}`;
cell.timerId = setInterval(() => {
seconds += 1;
cell.textContent = `Fetching... ${seconds}`;
}, 1000);
cell.fetchStartTime = Date.now();
};
// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cell) => {
if (cell.timerId) {
clearInterval(cell.timerId);
cell.timerId = null;
}
const elapsedSeconds = Math.floor((Date.now() - cell.fetchStartTime) / 1000);
return elapsedSeconds;
};
// Error handling function
const handleFetchError = (url, cell, range, userAgent, status) => {
const elapsedSeconds = stopFetchingTimer(cell);
if (userAgent === 'Googlebot') {
// Retry with YandexBot user agent
setTimeout(() => fetchTitleWithRange(url, cell, 'bytes=0-2048', 'YandexBot'), 1000);
} else if (userAgent === 'YandexBot') {
// Final fallback without specifying user agent
fetchTitleWithRange(url, cell, 'bytes=0-2048', null);
} else {
updateCellWithError(cell, `Error: ${status}`);
}
};
// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
return new Promise((resolve) => {
const domain = new URL(url).origin;
if (supportsRangeCache[domain] !== undefined) {
resolve(supportsRangeCache[domain]);
return;
}
GM_xmlhttpRequest({
method: 'HEAD',
url: url,
headers: getUserAgentHeaders('Googlebot'),
onload: (res) => {
const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
supportsRangeCache[domain] = supportsRange;
resolve(supportsRange);
},
onerror: () => {
supportsRangeCache[domain] = false;
resolve(false);
}
});
});
};
// Function to fetch the title using GM_xmlhttpRequest with range requests
const fetchTitleWithRange = (url, cell, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
const normalizedUrl = normalizeUrl(url);
if (titles[normalizedUrl]) {
updateCellWithTitle(cell, titles[normalizedUrl]);
return;
}
startFetchingTimer(cell);
GM_xmlhttpRequest({
method: 'GET',
url: normalizedUrl,
headers: {
'Range': range,
...getUserAgentHeaders(userAgent),
},
onload: (res) => {
if (res.status === 206) { // Partial Content
const title = extractTitle(res.responseText);
if (title) {
stopFetchingTimer(cell);
updateCellWithTitle(cell, title);
titles[normalizedUrl] = title;
saveTitles();
} else if (range === 'bytes=0-1024') {
// Try with a larger range
fetchTitleWithRange(url, cell, 'bytes=0-2048', userAgent);
} else {
stopFetchingTimer(cell);
updateCellWithError(cell, `Title not found`);
}
} else if (res.status === 200) { // OK, full content
const title = extractTitle(res.responseText);
if (title) {
stopFetchingTimer(cell);
updateCellWithTitle(cell, title);
titles[normalizedUrl] = title;
saveTitles();
} else {
stopFetchingTimer(cell);
updateCellWithError(cell, `Title not found`);
}
} else {
stopFetchingTimer(cell);
handleFetchError(url, cell, range, userAgent, res.status);
}
},
onerror: () => {
stopFetchingTimer(cell);
handleFetchError(url, cell, range, userAgent, 'Network Error');
},
ontimeout: () => {
stopFetchingTimer(cell);
updateCellWithError(cell, `Request timed out`);
},
timeout: 10000 // 10 seconds timeout
});
};
// Main function to process each URL
const processUrl = async (url, cell) => {
const normalizedUrl = normalizeUrl(url);
const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');
if (skipDomains.includes(domain)) {
cell.textContent = '-';
cell.title = 'Skipped';
return;
}
if (titles[normalizedUrl]) {
updateCellWithTitle(cell, titles[normalizedUrl]);
return;
}
const supportsRange = await checkSupportsRange(normalizedUrl);
if (supportsRange) {
// Use range requests with GM_xmlhttpRequest
fetchTitleWithRange(normalizedUrl, cell, 'bytes=0-1024', 'Googlebot');
} else {
// Server does not support range requests, attempt without range
fetchTitleWithRange(normalizedUrl, cell, null, 'Googlebot');
}
};
// Function to process the table
const processTable = table => {
const header = table.querySelector('thead tr');
if (header && !header.querySelector('.title-header')) {
const th = document.createElement('th');
th.textContent = 'Title';
th.classList.add('title-header');
header.insertBefore(th, header.lastElementChild);
}
table.querySelectorAll('tbody tr').forEach(row => {
if (!row.querySelector('.title-cell')) {
const cell = document.createElement('td');
cell.classList.add('title-cell');
row.insertBefore(cell, row.lastElementChild);
const div = document.createElement('div');
div.classList.add('title-content');
cell.appendChild(div);
const link = row.cells[1]?.querySelector('a');
if (link) processUrl(link.href, div);
}
});
};
// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);
// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });