// ==UserScript==
// @name Title Fetcher Optimized
// @namespace coolakov
// @version 2.4.2
// @description Observes table changes and adds new columns with fetched link titles and descriptions efficiently using GM_xmlhttpRequest only
// @author GreatFireDragon
// @match https://coolakov.ru/tools/most_promoted/
// @grant GM_xmlhttpRequest
// @connect *
// @icon https://www.google.com/s2/favicons?sz=64&domain=coolakov.ru
// @run-at document-end
// @license MIT
// ==/UserScript==
// Initialize cache from localStorage
const cache = JSON.parse(localStorage.getItem('cache')) || {};
const saveCache = () => localStorage.setItem('cache', JSON.stringify(cache));
const supportsRangeCache = {}; // Cache to store whether a domain supports range requests
// Get the skip domains from localStorage (or use default if not set)
const storedDomains = localStorage.getItem('GFD_skipDomains');
const skipDomains = storedDomains ? JSON.parse(storedDomains) : ['megamarket.ru', "market.yandex.ru", "ozon.ru", "ozon.by", "avito.ru"];
const textarea = document.createElement('textarea');
textarea.value = skipDomains.join(', ');
textarea.title = "Домены для которых никогда не собирать татйл и деск";
document.querySelector("#navbar-header").appendChild(textarea);
textarea.addEventListener('change', () => {
const domains = textarea.value.split(',').map(d => d.trim()).filter(Boolean);
localStorage.setItem('GFD_skipDomains', JSON.stringify(domains));
});
// Update skipDomains array on page load to match localStorage
const updatedSkipDomains = JSON.parse(localStorage.getItem('GFD_skipDomains') || '[]');
if (updatedSkipDomains.length > 0) {
skipDomains.length = 0; // Clear existing domains
Array.prototype.push.apply(skipDomains, updatedSkipDomains);
}
console.log('Skip Domains:', skipDomains);
// Helper function to normalize the URL
const normalizeUrl = (url) => {
return /^https?:\/\//i.test(url.trim()) ? url.trim() : 'http://' + url.trim();
};
// Helper function to get user agent headers
const getUserAgentHeaders = (userAgent) => {
const headers = {};
if (userAgent) {
const agentString = userAgent === 'Googlebot' ?
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/118.0.5993.70 Safari/537.36)' :
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
headers['User-Agent'] = agentString;
headers['X-User-Agent'] = agentString;
}
return headers;
};
// Helper function to update the title cell
const updateTitleCell = (cell, title) => {
cell.textContent = title;
cell.title = title;
};
// Helper function to update the description cell
const updateDescriptionCell = (cell, description) => {
cell.textContent = description;
cell.title = description;
};
// Helper function to update the cell with an error
const updateCellWithError = (cell, message) => {
cell.textContent = message;
cell.title = message;
cell.classList.add('GFD_title_error');
};
// Helper function to extract the title from response text
const extractTitle = (text) => {
const match = text.match(/<title[^>]*>([^<]*)<\/title>/i);
return match ? match[1].trim() : null;
};
// Helper function to extract the description from response text
const extractDescription = (text) => {
const match = text.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
return match ? match[1].trim() : null;
};
// Helper function to start fetching timer
const startFetchingTimer = (cellTitle, cellDescription) => {
let seconds = 0;
cellTitle.textContent = `Fetching... ${seconds}`;
cellDescription.textContent = `Fetching... ${seconds}`;
cellTitle.timerId = setInterval(() => {
seconds += 1;
cellTitle.textContent = `Fetching... ${seconds}`;
cellDescription.textContent = `Fetching... ${seconds}`;
}, 1000);
cellTitle.fetchStartTime = Date.now();
};
// Helper function to stop fetching timer and return elapsed seconds
const stopFetchingTimer = (cellTitle, cellDescription) => {
if (cellTitle.timerId) {
clearInterval(cellTitle.timerId);
cellTitle.timerId = null;
}
const elapsedSeconds = Math.floor((Date.now() - cellTitle.fetchStartTime) / 1000);
return elapsedSeconds;
};
// Error handling function
const handleFetchError = (url, cellTitle, cellDescription, range, userAgent, status) => {
const elapsedSeconds = stopFetchingTimer(cellTitle, cellDescription);
if (userAgent === 'Googlebot') {
// Retry with YandexBot user agent
setTimeout(() => fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', 'YandexBot'), 1000);
} else if (userAgent === 'YandexBot') {
// Final fallback without specifying user agent
fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', null);
} else {
updateCellWithError(cellTitle, `Error: ${status}`);
updateCellWithError(cellDescription, `Error: ${status}`);
}
};
// Function to check if the server supports range requests using GM_xmlhttpRequest
const checkSupportsRange = (url) => {
return new Promise((resolve) => {
const domain = new URL(url).origin;
if (supportsRangeCache[domain] !== undefined) {
resolve(supportsRangeCache[domain]);
return;
}
GM_xmlhttpRequest({
method: 'HEAD',
url: url,
headers: getUserAgentHeaders('Googlebot'),
onload: (res) => {
const acceptRangesMatch = res.responseHeaders.match(/Accept-Ranges:\s*(\w+)/i);
const supportsRange = acceptRangesMatch && acceptRangesMatch[1].toLowerCase() === 'bytes';
supportsRangeCache[domain] = supportsRange;
resolve(supportsRange);
},
onerror: () => {
supportsRangeCache[domain] = false;
resolve(false);
}
});
});
};
// Function to fetch the title and description using GM_xmlhttpRequest with range requests
const fetchDataWithRange = (url, cellTitle, cellDescription, range = 'bytes=0-1024', userAgent = 'Googlebot') => {
const normalizedUrl = normalizeUrl(url);
if (cache[normalizedUrl]) {
updateTitleCell(cellTitle, cache[normalizedUrl].title);
updateDescriptionCell(cellDescription, cache[normalizedUrl].description || '-');
return;
}
startFetchingTimer(cellTitle, cellDescription);
GM_xmlhttpRequest({
method: 'GET',
url: normalizedUrl,
headers: {
'Range': range,
...getUserAgentHeaders(userAgent),
},
onload: (res) => {
if (res.status === 206 || res.status === 200) { // Partial Content or OK
const title = extractTitle(res.responseText);
const description = extractDescription(res.responseText);
if (title) {
stopFetchingTimer(cellTitle, cellDescription);
updateTitleCell(cellTitle, title);
updateDescriptionCell(cellDescription, description || '-');
cache[normalizedUrl] = { title, description: description || '-' };
saveCache();
} else if (range === 'bytes=0-1024') {
// Try with a larger range
fetchDataWithRange(url, cellTitle, cellDescription, 'bytes=0-2048', userAgent);
} else {
stopFetchingTimer(cellTitle, cellDescription);
updateCellWithError(cellTitle, `Title not found`);
updateCellWithError(cellDescription, `Description not found`);
}
} else {
stopFetchingTimer(cellTitle, cellDescription);
handleFetchError(url, cellTitle, cellDescription, range, userAgent, res.status);
}
},
onerror: () => {
stopFetchingTimer(cellTitle, cellDescription);
handleFetchError(url, cellTitle, cellDescription, range, userAgent, 'Network Error');
},
ontimeout: () => {
stopFetchingTimer(cellTitle, cellDescription);
updateCellWithError(cellTitle, `Request timed out`);
updateCellWithError(cellDescription, `Request timed out`);
},
timeout: 10000 // 10 seconds timeout
});
};
// Main function to process each URL
const processUrl = async (url, cellTitle, cellDescription) => {
const normalizedUrl = normalizeUrl(url);
const domain = new URL(normalizedUrl).hostname.replace(/^www\./, '');
if (skipDomains.includes(domain)) {
updateTitleCell(cellTitle, '-');
updateDescriptionCell(cellDescription, '-');
return;
}
if (cache[normalizedUrl]) {
updateTitleCell(cellTitle, cache[normalizedUrl].title);
updateDescriptionCell(cellDescription, cache[normalizedUrl].description || '-');
return;
}
const supportsRange = await checkSupportsRange(normalizedUrl);
if (supportsRange) {
// Use range requests with GM_xmlhttpRequest
fetchDataWithRange(normalizedUrl, cellTitle, cellDescription, 'bytes=0-1024', 'Googlebot');
} else {
// Server does not support range requests, attempt without range
fetchDataWithRange(normalizedUrl, cellTitle, cellDescription, null, 'Googlebot');
}
};
// Function to process the table
const processTable = table => {
const header = table.querySelector('thead tr');
if (header && !header.querySelector('.title-header')) {
// Insert Title header
const thTitle = document.createElement('th');
thTitle.textContent = 'Title';
thTitle.classList.add('title-header');
header.insertBefore(thTitle, header.lastElementChild);
// Insert Description header
const thDescription = document.createElement('th');
thDescription.textContent = 'Description';
thDescription.classList.add('description-header');
header.insertBefore(thDescription, header.lastElementChild);
}
table.querySelectorAll('tbody tr').forEach(row => {
if (!row.querySelector('.title-cell')) {
// Insert Title cell
const cellTitle = document.createElement('td');
cellTitle.classList.add('title-cell');
row.insertBefore(cellTitle, row.lastElementChild);
const cellTitleDiv = cellTitle.appendChild(document.createElement('div'));
// Insert Description cell
const cellDescription = document.createElement('td');
cellDescription.classList.add('description-cell');
row.insertBefore(cellDescription, row.lastElementChild);
const cellDescriptionDiv = cellDescription.appendChild(document.createElement('div'));
const link = row.cells[1]?.querySelector('a');
if (link) processUrl(link.href, cellTitleDiv, cellDescriptionDiv);
else {
updateTitleCell(cellTitle, '-');
updateDescriptionCell(cellDescription, 'No link');
}
}
});
};
// Initialize the script by processing existing tables
document.querySelectorAll('table#myTable').forEach(processTable);
// Observe mutations to dynamically process new tables or rows
const observer = new MutationObserver(() => {
document.querySelectorAll('table#myTable').forEach(processTable);
});
observer.observe(document.body, { childList: true, subtree: true });