您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Scrape emails across multiple pages and save to CSV
// ==UserScript== // @name Email Scraper // @namespace http://tampermonkey.net/ // @version 0.2 // @description Scrape emails across multiple pages and save to CSV // @match https://solicitors.lawsociety.org.uk/* // @grant GM_setValue // @grant GM_getValue // @run-at document-end // @license MIT // ==/UserScript== // Install this in Tampermonkey by clicking [https://gist.githubusercontent.com/MarwanShehata/34acd0b3d2a4431cf8a93c9d3cdf2d41/raw/3050c898be3f8fc651eaeaca3e982cee1a84642f/Email-Scraper-gig.js] // or copying the code from [https://gist.github.com/MarwanShehata/34acd0b3d2a4431cf8a93c9d3cdf2d41]. // https://solicitors.lawsociety.org.uk/search/results?Pro=False&Page=1 (function() { 'use strict'; const DELAY_BETWEEN_PAGES = 6000; const MAX_PAGES = 80; // Upper limit to prevent infinite loops // Helper functions function getEmails() { const dataEmailsArray = document.querySelectorAll("a[data-email]"); return [...dataEmailsArray].map(element => element.getAttribute('data-email')); } function getStoredEmails() { return GM_getValue('emails', []); } function setStoredEmails(emails) { GM_setValue('emails', emails); } function getCurrentPage() { return GM_getValue('currentPage', 1); } function setCurrentPage(page) { GM_setValue('currentPage', page); } function saveToCSV(emails) { const csvContent = "data:text/csv;charset=utf-8," + emails.map(email => `"${email}"`).join("\n"); const encodedUri = encodeURI(csvContent); const link = document.createElement("a"); link.setAttribute("href", encodedUri); link.setAttribute("download", "extracted_emails.csv"); const button = document.createElement("div"); button.innerHTML = "Download Extracted Emails"; button.style.position = "fixed"; button.style.top = "10px"; button.style.left = "50%"; button.style.transform = "translateX(-50%)"; button.style.backgroundColor = "green"; button.style.color = "white"; button.style.padding = "10px 20px"; button.style.zIndex = "9999"; button.style.cursor = "pointer"; button.addEventListener("click", () => { document.body.appendChild(link); link.click(); document.body.removeChild(link); document.body.removeChild(button); // Reset storage after download GM_setValue('emails', []); GM_setValue('currentPage', 1); }); document.body.appendChild(button); console.log(`Total emails extracted: ${emails.length}`); } // Main logic function scrapePage() { let currentPage = getCurrentPage(); let allEmails = getStoredEmails(); if (currentPage > MAX_PAGES) { console.log('Reached max pages, stopping.'); saveToCSV(allEmails); return; } // Scrape current page const newEmails = getEmails(); allEmails = [...new Set([...allEmails, ...newEmails])]; // Remove duplicates setStoredEmails(allEmails); console.log(`Page ${currentPage}: Extracted ${newEmails.length} emails, Total: ${allEmails.length}`); // Check for next page const nextPage = document.querySelector('li.next>a'); if (nextPage && currentPage < MAX_PAGES) { setCurrentPage(currentPage + 1); setTimeout(() => nextPage.click(), DELAY_BETWEEN_PAGES); } else { console.log('No more pages or max reached, finishing.'); saveToCSV(allEmails); } } // Start scraping scrapePage(); })();