您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Scrapes reviews from Google Maps with improved date handling and photo extraction
// ==UserScript== // @name Google Maps Reviews Scraper & Exporter (Enhanced) // @namespace http://tampermonkey.net/ // @version 0.16 // @description Scrapes reviews from Google Maps with improved date handling and photo extraction // @author sharmanhall // @match https://www.google.com/maps/place/* // @icon https://www.google.com/s2/favicons?sz=64&domain=google.com // @grant GM_addStyle // @license MIT // ==/UserScript== (function() { 'use strict'; // Add styles for floating panel GM_addStyle(` #review-scraper-panel { position: fixed; bottom: 20px; right: 20px; background-color: #fff; border: 1px solid #ccc; border-radius: 8px; padding: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); z-index: 1000; font-family: Arial, sans-serif; min-width: 200px; } #review-scraper-panel button { background-color: #4285f4; color: white; border: none; padding: 8px 12px; margin: 5px; border-radius: 4px; cursor: pointer; font-size: 14px; width: 100%; } #review-scraper-panel button:hover { background-color: #3367d6; } #scraper-status { margin-top: 10px; font-size: 12px; color: #666; } .scraper-progress { background-color: #e8f0fe; border-radius: 4px; padding: 5px; margin-top: 5px; } `); // Create floating panel const panel = document.createElement('div'); panel.id = 'review-scraper-panel'; panel.innerHTML = ` <button id="scrape-reviews">Scrape Reviews</button> <button id="copy-to-clipboard">Copy to Clipboard</button> <button id="export-csv">Export as CSV</button> <div id="scraper-status"></div> `; document.body.appendChild(panel); const setStatus = (message) => { const statusDiv = document.getElementById('scraper-status'); statusDiv.innerHTML = `<div class="scraper-progress">${message}</div>`; }; // Enhanced date parsing function const parseReviewDate = (dateText) => { if (!dateText) return { raw_date: '', estimated_date: '' }; const now = new Date(); const rawDate = dateText.trim(); let estimatedDate = ''; try { // Handle relative dates if (rawDate.includes('ago')) { const timeMatch = rawDate.match(/(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago/i); if (timeMatch) { const [, amount, unit] = timeMatch; const num = parseInt(amount); const date = new Date(now); switch(unit.toLowerCase()) { case 'second': date.setSeconds(date.getSeconds() - num); break; case 'minute': date.setMinutes(date.getMinutes() - num); break; case 'hour': date.setHours(date.getHours() - num); break; case 'day': date.setDate(date.getDate() - num); break; case 'week': date.setDate(date.getDate() - (num * 7)); break; case 'month': date.setMonth(date.getMonth() - num); break; case 'year': date.setFullYear(date.getFullYear() - num); break; } estimatedDate = date.toISOString().split('T')[0]; } } else { // Try to parse absolute dates const parsedDate = new Date(rawDate); if (!isNaN(parsedDate.getTime())) { estimatedDate = parsedDate.toISOString().split('T')[0]; } } } catch (e) { console.warn('Date parsing error:', e); } return { raw_date: rawDate, estimated_date: estimatedDate || 'unknown' }; }; // Extract review photos - Updated based on actual Google Maps HTML structure const extractReviewPhotos = (reviewDiv) => { const photos = []; // Look for the specific photo container class from Google Maps const photoContainer = reviewDiv.querySelector('.KtCyie'); if (photoContainer) { // Find photo buttons with background images const photoButtons = photoContainer.querySelectorAll('button.Tya61d[style*="background-image"]'); photoButtons.forEach(button => { if (button.style.backgroundImage) { const bgMatch = button.style.backgroundImage.match(/url\(["']?(.*?)["']?\)/); if (bgMatch && bgMatch[1]) { let photoUrl = bgMatch[1]; // Filter out profile pictures (they contain /a- or /a/) if (!photoUrl.includes('/a-/') && !photoUrl.includes('/a/')) { // Upgrade to higher resolution if possible if (photoUrl.includes('=w') && photoUrl.includes('-h')) { photoUrl = photoUrl.replace(/=w\d+-h\d+([^=]*)?/g, '=w1920-h1080-k-no'); } if (!photos.includes(photoUrl)) { photos.push(photoUrl); } } } } }); } // Backup: Look for any other elements with background images containing googleusercontent const elementsWithBg = reviewDiv.querySelectorAll('[style*="googleusercontent.com"]'); elementsWithBg.forEach(element => { if (element.style.backgroundImage) { const bgMatch = element.style.backgroundImage.match(/url\(["']?(.*?)["']?\)/); if (bgMatch && bgMatch[1]) { let photoUrl = bgMatch[1]; // Only include review photos, not profile pictures if (photoUrl.includes('googleusercontent.com') && !photoUrl.includes('/a-/') && !photoUrl.includes('/a/') && !photoUrl.includes('avatar') && !photoUrl.includes('NBa7we')) { // NBa7we is the profile image class // Upgrade resolution if (photoUrl.includes('=w') && photoUrl.includes('-h')) { photoUrl = photoUrl.replace(/=w\d+-h\d+([^=]*)?/g, '=w1920-h1080-k-no'); } if (!photos.includes(photoUrl)) { photos.push(photoUrl); } } } } }); return photos; }; const autoScroll = async () => { setStatus('Auto-scrolling to load all reviews...'); // Try to find and scroll within the reviews container const reviewsContainer = document.querySelector('.m6QErb[data-value="Sort"]')?.parentElement?.parentElement; if (reviewsContainer) { let lastScrollHeight = 0; let currentScrollHeight = reviewsContainer.scrollHeight; let scrollAttempts = 0; do { lastScrollHeight = currentScrollHeight; reviewsContainer.scrollTo(0, currentScrollHeight); await new Promise(r => setTimeout(r, 2000)); currentScrollHeight = reviewsContainer.scrollHeight; scrollAttempts++; setStatus(`Loading reviews... (attempt ${scrollAttempts})`); // Prevent infinite loops if (scrollAttempts > 50) break; } while(currentScrollHeight > lastScrollHeight); reviewsContainer.scrollTo(0, 0); } else { // Fallback to window scrolling let lastScrollHeight = 0; let currentScrollHeight = document.documentElement.scrollHeight; do { lastScrollHeight = currentScrollHeight; window.scrollTo(0, currentScrollHeight); await new Promise(r => setTimeout(r, 2000)); currentScrollHeight = document.documentElement.scrollHeight; } while(currentScrollHeight > lastScrollHeight); window.scrollTo(0, 0); } }; const expandReviews = async () => { setStatus('Expanding truncated reviews...'); // Multiple selectors for "More" buttons const moreButtonSelectors = [ "button.w8nwRe.kyuRq", "button[aria-label*='more']", "button.review-more-link", ".review-full-text button" ]; for (const selector of moreButtonSelectors) { const moreButtons = document.querySelectorAll(selector); for (const button of moreButtons) { if (button.textContent.toLowerCase().includes('more') || button.getAttribute('aria-label')?.toLowerCase().includes('more')) { button.click(); await new Promise(r => setTimeout(r, 300)); } } } }; const scrapeReviews = async () => { await autoScroll(); await expandReviews(); setStatus('Extracting review data...'); const reviewDivs = document.querySelectorAll("div[data-review-id]"); const reviews = []; const scrapedReviewIds = new Set(); for (let i = 0; i < reviewDivs.length; i++) { const reviewDiv = reviewDivs[i]; const reviewId = reviewDiv.getAttribute("data-review-id"); if (scrapedReviewIds.has(reviewId)) continue; setStatus(`Processing review ${i + 1} of ${reviewDivs.length}...`); // Extract date information const dateElement = reviewDiv.querySelector("span.rsqaWe"); const dateInfo = parseReviewDate(dateElement?.textContent); // Extract photos const reviewPhotos = extractReviewPhotos(reviewDiv); // Extract star rating more reliably const starElement = reviewDiv.querySelector("span.kvMYJc[role='img']"); let starRating = 0; if (starElement) { const ariaLabel = starElement.getAttribute("aria-label"); const ratingMatch = ariaLabel?.match(/(\d+)/); starRating = ratingMatch ? parseInt(ratingMatch[0]) : 0; } // Extract reviewer information const reviewerElement = reviewDiv.querySelector("div.d4r55"); const reviewerName = reviewerElement?.textContent.trim() || ''; // Get profile image with higher resolution const profileImg = reviewDiv.querySelector("img.NBa7we"); let profileImgUrl = ''; if (profileImg?.src) { profileImgUrl = profileImg.src.replace('=w36-h36-p-rp-mo-br100', '=s200'); } const review = { reviewer_name: reviewerName, profile_img_url: profileImgUrl, review_date_raw: dateInfo.raw_date, review_date_estimated: dateInfo.estimated_date, star_rating: starRating, review_url: reviewDiv.querySelector("button[data-href]")?.getAttribute("data-href") || '', review_content: reviewDiv.querySelector("span.wiI7pd")?.textContent.trim() || '', review_photos: reviewPhotos, review_id: reviewId, scraped_at: new Date().toISOString() }; scrapedReviewIds.add(reviewId); reviews.push(review); } setStatus(`✅ Successfully scraped ${reviews.length} reviews with enhanced data!`); window.scrapedReviews = reviews; // Store globally for other functions return reviews; }; const copyToClipboard = async () => { const reviews = window.scrapedReviews || await scrapeReviews(); const contentToCopy = JSON.stringify(reviews, null, 2); try { await navigator.clipboard.writeText(contentToCopy); setStatus(`✅ ${reviews.length} reviews copied to clipboard as JSON!`); } catch (err) { setStatus('❌ Error copying to clipboard. Check console.'); console.error("Could not copy content to clipboard: ", err); } }; const exportAsCSV = async () => { const reviews = window.scrapedReviews || await scrapeReviews(); // CSV headers const headers = [ 'reviewer_name', 'profile_img_url', 'review_date_raw', 'review_date_estimated', 'star_rating', 'review_url', 'review_content', 'review_photos', 'review_id', 'scraped_at' ]; // Convert to CSV const csvContent = [ headers.join(','), ...reviews.map(review => headers.map(header => { let value = review[header]; if (Array.isArray(value)) { value = value.join('; '); } // Escape quotes and wrap in quotes if contains comma or quote value = String(value).replace(/"/g, '""'); if (value.includes(',') || value.includes('"') || value.includes('\n')) { value = `"${value}"`; } return value; }).join(',') ) ].join('\n'); // Download CSV file const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const link = document.createElement('a'); const url = URL.createObjectURL(blob); link.setAttribute('href', url); link.setAttribute('download', `google_maps_reviews_${new Date().toISOString().split('T')[0]}.csv`); link.style.visibility = 'hidden'; document.body.appendChild(link); link.click(); document.body.removeChild(link); setStatus(`✅ CSV file downloaded with ${reviews.length} reviews!`); }; // Event listeners document.getElementById('scrape-reviews').addEventListener('click', scrapeReviews); document.getElementById('copy-to-clipboard').addEventListener('click', copyToClipboard); document.getElementById('export-csv').addEventListener('click', exportAsCSV); // Initialize setStatus('Ready to scrape reviews! 🚀'); })();