ITデジタル就職展 公司批量下载

Scrape company data from pages and save as CSV on button click with debugging

// ==UserScript==
// @name         ITデジタル就職展 公司批量下载
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  Scrape company data from pages and save as CSV on button click with debugging
// @author       cheerchen37
// @match        https://digital-career-fair.com/*
// @grant        GM_xmlhttpRequest
// @grant        GM_download
// @license MIT
// ==/UserScript==

(function() {
    'use strict';

    const baseUrl = "https://digital-career-fair.com/company/page/";
    const totalPages = 4;
    let posts = [];

    function fetchPage(pageNumber) {
        console.log(`Fetching page ${pageNumber}`);
        GM_xmlhttpRequest({
            method: "GET",
            url: `${baseUrl}${pageNumber}/`,
            headers: {
                "Cookie": "your-cookie-string-here"
            },
            onload: function(response) {
                if (response.status !== 200) {
                    console.error(`Failed to fetch page ${pageNumber}: ${response.statusText}`);
                    return;
                }

                const parser = new DOMParser();
                const doc = parser.parseFromString(response.responseText, "text/html");
                const items = doc.querySelectorAll(".company-article-cards__item");

                if (items.length === 0) {
                    console.warn(`No items found on page ${pageNumber}`);
                }

                items.forEach(item => {
                    const title = item.querySelector(".company-article-cards__name")?.textContent.trim();
                    const url = item.querySelector("a")?.href;
                    if (title && url) {
                        posts.push({ title, url });
                        console.log(`Found post: ${title} - ${url}`);
                    }
                });

                if (pageNumber < totalPages) {
                    fetchPage(pageNumber + 1);
                } else {
                    console.log("All pages fetched. Total posts:", posts.length);
                    if (posts.length > 0) {
                        saveDataAsCSV();
                    } else {
                        console.error("No posts to save.");
                    }
                }
            },
            onerror: function(response) {
                console.error(`Error fetching page ${pageNumber}: ${response.statusText}`);
            }
        });
    }

    function saveDataAsCSV() {
        console.log("Saving data as CSV...");
        let csvContent = "data:text/csv;charset=utf-8,";
        csvContent += "Title,URL\r\n";

        posts.forEach(post => {
            csvContent += `"${post.title}","${post.url}"\r\n`;
        });

        console.log("CSV Content prepared:");
        console.log(csvContent);

        const encodedUri = encodeURI(csvContent);
        triggerDownload(encodedUri, "company_data.csv");
    }

    function triggerDownload(dataUri, fileName) {
        const link = document.createElement('a');
        link.href = dataUri;
        link.download = fileName;
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }

    function addButton() {
        const target = document.querySelector('.company-search__title');
        if (target) {
            const button = document.createElement("button");
            button.textContent = "Download Company Data";
            button.style.marginLeft = "20px";
            button.style.padding = "5px 10px";
            button.style.fontSize = "12px";
            button.style.cursor = "pointer";

            button.addEventListener("click", function() {
                console.log("Button clicked. Starting data fetch...");
                fetchPage(1);
            });

            target.appendChild(button);
            console.log("Button added to the page.");
        } else {
            console.error("Target element '.company-search__title' not found.");
        }
    }

    // 确保页面完全加载后再添加按钮
    window.addEventListener('load', function() {
        if (window.location.href.includes("/company")) {
            addButton();
        }
    });
})();