Google Scholar to free PDFs

Adds Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, CORE, and EuropePMC to Google Scholar results. (v1.31: Restored original working logic for Sci-Hub & Sci-Net)

当前为 2025-07-29 提交的版本,查看 最新版本

// ==UserScript==
// @name         Google Scholar to free PDFs
// @namespace    ScholarToSciHub
// @version      1.29
// @description  Adds Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, CORE, and EuropePMC to Google Scholar results. (v1.31: Restored original working logic for Sci-Hub & Sci-Net)
// @author       Bui Quoc Dung
// @match        https://scholar.google.*/*
// @license      AGPL-3.0-or-later
// @grant        GM.xmlHttpRequest
// @connect      *
// ==/UserScript==

const DOI_REGEX = /\b(10.\d{4,}(?:.\d+)*\/(?:(?!["&'<>])\S)+)\b/gi;

const getURL = (type, opts = {}) => {
    const { doi, title, href, query, accid } = opts;
    const enc = (str) => encodeURIComponent(str || '');
    switch (type) {
        case 'scihub':              return 'https://tesble.com/'; // Base URL only
        case 'libgen_search':       return `https://libgen.bz/index.php?req=${enc(query)}`;
        case 'libgen_edition':      return 'https://libgen.bz/' + href;
        case 'anna_check':          return `https://annas-archive.org/search?index=journals&q=${enc(doi)}`;
        case 'anna_scidb':          return `https://annas-archive.org/scidb/${doi}`;
        case 'scinet':              return 'https://sci-net.xyz/'; // Base URL only
        case 'crossref':            return `https://api.crossref.org/works?query.title=${enc(title)}&rows=1`;
        case 'semantic_title':      return `https://api.semanticscholar.org/graph/v1/paper/search?fields=title,url,openAccessPdf&limit=1&query=${enc(title)}`;
        case 'semantic_doi':        return `https://api.semanticscholar.org/graph/v1/paper/DOI:${enc(doi)}?fields=title,url,openAccessPdf`;
        case 'semantic_fallback':   return `https://www.semanticscholar.org/search?q=${enc(doi || title)}`;
        case 'unpaywall_api':       return `https://api.unpaywall.org/v2/${enc(doi)}?email=support%40unpaywall.org`;
        case 'unpaywall_fallback':  return 'https://unpaywall.org/';
        case 'core_api':            return `https://api.core.ac.uk/v3/search/works/?q=doi:${enc(doi)}`;
        case 'core_fallback':       return `https://core.ac.uk/search/?q=${enc(doi)}`;
        case 'europepmc_api':       return `https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=DOI:${enc(doi)}&format=json`;
        case 'europepmc_pdf':       return `https://europepmc.org/backend/ptpmcrender.fcgi?accid=${accid}&blobtype=pdf`;
        case 'europepmc_fallback':  return `https://europepmc.org/search?query=${enc(doi)}`;
        default:                    return '#';
    }
};

const httpRequest = (details) => new Promise((resolve, reject) => GM.xmlHttpRequest({ ...details, onload: resolve, onerror: reject }));

function updateLink(span, text, href, isNo = false) {
    const link = Object.assign(document.createElement('a'), {
        href, target: '_blank', rel: 'noopener noreferrer',
        innerHTML: text.replace(/\[(PDF|Chat|Maybe)\]/g, '<b>[$1]</b>')
    });
    Object.assign(link.style, { fontSize: '15px', color: isNo ? 'gray' : '' });
    span.replaceWith(link);
}

const addLoadingIndicator = (container) => container.appendChild(Object.assign(document.createElement('div'), {
    textContent: 'Loading...',
    style: 'margin-bottom: 4px; color: gray; font-size: 15px;'
}));

async function fetchDOI(titleLink) {
    const cleanDOI = (doi) => (doi.match(/^10\.\d{4,}(?:\.\d+)*\/[^\s\/?#<>]+/) || [doi.trim()])[0];
    const fromUrl = titleLink.href.match(DOI_REGEX);
    if (fromUrl) return cleanDOI(fromUrl[0]);
    try {
        const res = await httpRequest({ method: 'GET', url: titleLink.href });
        const fromBody = res.responseText.match(DOI_REGEX);
        if (fromBody) return cleanDOI(fromBody[0]);
        const crRes = await httpRequest({ method: 'GET', url: getURL('crossref', { title: titleLink.textContent.trim() }) });
        return JSON.parse(crRes.responseText).message.items?.[0]?.DOI || null;
    } catch (err) { return null; }
}


async function checkSciHub(doi, href, span) {
    const baseUrl = getURL('scihub', {});
    const tryParam = async (param) => {
        if (!param) return false;
        try {
            const res = await httpRequest({ method: 'GET', url: baseUrl + param });
            if (/iframe|embed/.test(res.responseText)) {
                updateLink(span, '[PDF] Sci-Hub', baseUrl + param);
                return true;
            }
        } catch (e) {}
        return false;
    };

    if (await tryParam(href)) return;
    if (await tryParam(doi)) return;

    updateLink(span, '[No] Sci-Hub', baseUrl + (doi || ''), true);
}

async function checkSciNet(doi, href, span) {
    const baseUrl = getURL('scinet', {});
     const tryParam = async (param) => {
        if (!param) return false;
        try {
            const res = await httpRequest({ method: 'GET', url: baseUrl + param });
            if (/iframe|pdf|embed/.test(res.responseText)) {
                updateLink(span, '[PDF] Sci-net', baseUrl + param);
                return true;
            }
        } catch (e) {}
        return false;
    };

    if (await tryParam(href)) return;
    if (await tryParam(doi)) return;

    updateLink(span, '[No] Sci-net', baseUrl + (doi || ''), true);
}


const checkLibgen = async (title, doi, span) => {
    const trySearch = async (query) => {
        try {
            const res = await httpRequest({ method: 'GET', url: getURL('libgen_search', { query }) });
            const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
            const linkEl = doc.querySelector('.table.table-striped a[href^="edition.php?id="]');
            if (!linkEl) return false;
            const detailRes = await httpRequest({ method: 'GET', url: getURL('libgen_edition', { href: linkEl.getAttribute('href') }) });
            if (new DOMParser().parseFromString(detailRes.responseText, 'text/html').querySelector('table')) {
                updateLink(span, '[PDF] LibGen', getURL('libgen_search', { query }));
                return true;
            }
        } catch (e) { console.error('LibGen error', e); }
        return false;
    };
    if (!(await trySearch(title)) && doi) {
        if (!(await trySearch(doi))) updateLink(span, '[No] LibGen', getURL('libgen_search', { query: doi }), true);
    } else if (!doi) {
        updateLink(span, '[No] LibGen', getURL('libgen_search', { query: title }), true);
    }
};

async function checkSemanticScholar(title, span, doi = null) {
    const check = async (url) => {
        try {
            const res = await httpRequest({ method: 'GET', url });
            const pdfUrl = JSON.parse(res.responseText)?.openAccessPdf?.url || JSON.parse(res.responseText)?.data?.[0]?.openAccessPdf?.url;
            if (pdfUrl) { updateLink(span, '[PDF] Semantic', pdfUrl); return true; }
        } catch (err) {}
        return false;
    };
    if (!(await check(getURL('semantic_title', { title }))) && !(doi && await check(getURL('semantic_doi', { doi })))) {
        updateLink(span, '[No] Semantic', getURL('semantic_fallback', { doi, title }), true);
    }
}

async function checkUnpaywall(doi, span) {
    try {
        const res = await httpRequest({ method: 'GET', url: getURL('unpaywall_api', { doi }) });
        const data = JSON.parse(res.responseText);
        if (data?.is_oa && data.best_oa_location?.url) {
            updateLink(span, '[PDF] Unpaywall', data.best_oa_location.url);
        } else {
            updateLink(span, '[No] Unpaywall', getURL('unpaywall_fallback'), true);
        }
    } catch { updateLink(span, '[No] Unpaywall', getURL('unpaywall_fallback'), true); }
}

async function checkAnna(doi, span, retry = 0) {
    const checkUrl = getURL('anna_check', { doi });
    const directUrl = getURL('anna_scidb', { doi });
    try {
        const res = await httpRequest({ method: 'GET', url: checkUrl });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        if (doc.body.textContent.includes("Rate limited") && retry < 10) {
            setTimeout(() => checkAnna(doi, span, retry + 1), 5000); return;
        }
        if (doc.querySelector('.mt-4.uppercase.text-xs.text-gray-500') || [...doc.querySelectorAll('div.text-gray-500')].some(div => div.textContent.includes(doi))) {
            const res2 = await httpRequest({ method: 'GET', url: directUrl });
            const hasPDF = new DOMParser().parseFromString(res2.responseText, 'text/html').querySelector('.pdfViewer, #viewerContainer, iframe');
            updateLink(span, hasPDF ? '[PDF] Anna' : '[Maybe] Anna', directUrl);
        } else { updateLink(span, '[No] Anna', checkUrl, true); }
    } catch { updateLink(span, '[No] Anna', checkUrl, true); }
}

async function checkCore(doi, span) {
    const searchUrl = getURL('core_api', { doi });
    try {
        const res = await httpRequest({ method: 'GET', url: searchUrl, headers: { accept: 'application/json' } });
        const pdfUrl = JSON.parse(res.responseText)?.results?.find(r => r?.doi?.toLowerCase() === doi.toLowerCase())?.downloadUrl;
        if (pdfUrl) { updateLink(span, '[PDF] Core', pdfUrl); }
        else { updateLink(span, '[No] Core', getURL('core_fallback', { doi }), true); }
    } catch { updateLink(span, '[No] Core', getURL('core_fallback', { doi }), true); }
}

async function checkEuropePMC(doi, span) {
    const apiUrl = getURL('europepmc_api', { doi });
    try {
        const res = await httpRequest({ method: 'GET', url: apiUrl });
        const result = JSON.parse(res.responseText)?.resultList?.result?.find(r => r.doi?.toLowerCase() === doi.toLowerCase());
        if (result?.isOpenAccess === 'Y' && result.fullTextIdList?.fullTextId?.length) {
            const accid = result.fullTextIdList.fullTextId[0];
            updateLink(span, '[PDF] EuropePMC', getURL('europepmc_pdf', { accid })); return;
        }
        const pdfObj = result?.fullTextIdList?.fullTextId?.find(obj => obj.documentStyle === 'pdf' && obj.url);
        if (pdfObj?.url) { updateLink(span, '[PDF] EuropePMC', pdfObj.url); return; }
        updateLink(span, '[No] EuropePMC', getURL('europepmc_fallback', { doi }), true);
    } catch { updateLink(span, '[No] EuropePMC', getURL('europepmc_fallback', { doi }), true); }
}

async function processEntry(result) {
    const titleLink = result.querySelector('.gs_rt a');
    if (!titleLink) return;

    const buttonContainer = result.querySelector('.gs_or_ggsm') || (() => {
        const div = result.insertBefore(document.createElement('div'), result.firstChild);
        div.className = 'gs_ggs gs_fl';
        div.innerHTML = '<div class="gs_ggsd"><div class="gs_or_ggsm"></div></div>';
        return div.querySelector('.gs_or_ggsm');
    })();

    if (buttonContainer.classList.contains('scihub-processed')) return;
    buttonContainer.classList.add('scihub-processed');

    const serviceLayout = [['semantic', 'unpaywall'], ['scihub', 'libgen'], ['anna', 'scinet'], ['core', 'europepmc']];

    const serviceSpans = Object.fromEntries(
        serviceLayout.map(rowServices => {
            const row = buttonContainer.appendChild(Object.assign(document.createElement('span'), { style: 'display: inline-flex; gap: 6px;' }));
            return rowServices.map(name => [`${name}Span`, addLoadingIndicator(row)]);
        }).flat()
    );

    const doi = await fetchDOI(titleLink);

    checkSciHub(doi, titleLink.href, serviceSpans.scihubSpan);
    checkSciNet(doi, titleLink.href, serviceSpans.scinetSpan);
    checkLibgen(titleLink.textContent, doi, serviceSpans.libgenSpan);
    checkSemanticScholar(titleLink.textContent, serviceSpans.semanticSpan, doi);

    const doiOnlyServices = [
        { name: 'Anna', key: 'anna', check: checkAnna },
        { name: 'Unpaywall', key: 'unpaywall', check: checkUnpaywall },
        { name: 'Core', key: 'core', check: checkCore },
        { name: 'EuropePMC', key: 'europepmc', check: checkEuropePMC },
    ];

    doiOnlyServices.forEach(({ name, key, check }) => {
        const span = serviceSpans[`${key}Span`];
        doi ? check(doi, span) : updateLink(span, `[No] ${name}`, '#', true);
    });
}

async function addButtons() {
    const results = document.querySelectorAll('#gs_res_ccl_mid .gs_r.gs_or.gs_scl');
    for (const result of results) {
        await processEntry(result);
    }
}

addButtons();
new MutationObserver(addButtons).observe(document.body, { childList: true, subtree: true });