// ==UserScript==
// @name Avito Scraping Assistant
// @namespace https://danielfragomeli.com/
// @version 1.2
// @description Assistente per navigare e scaricare dati da Avito con gestione manuale dei CAPTCHA e supporto corretto per caratteri cirillici
// @author dan098
// @match *://*.avito.ru/*
// @grant GM_setValue
// @grant GM_getValue
// @grant GM_deleteValue
// @grant GM_listValues
// @grant GM_setClipboard
// @grant GM_xmlhttpRequest
// @grant unsafeWindow
// @license MIT
// @require https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js
// @require https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.3.2/papaparse.min.js
// ==/UserScript==
(function() {
'use strict';
// Stili CSS per l'interfaccia
const css = `
#avito-scraper-panel {
position: fixed;
bottom: 10px;
right: 10px;
background-color: #fff;
border: 2px solid #0078d7;
border-radius: 5px;
padding: 10px;
width: 400px;
z-index: 10000;
font-family: Arial, sans-serif;
box-shadow: 0 0 10px rgba(0,0,0,0.2);
}
#avito-scraper-panel h3 {
margin-top: 0;
margin-bottom: 10px;
color: #0078d7;
border-bottom: 1px solid #eee;
padding-bottom: 5px;
}
#avito-urls {
width: 100%;
height: 100px;
margin-bottom: 10px;
resize: vertical;
}
#avito-status {
margin: 10px 0;
padding: 5px;
border-radius: 3px;
background-color: #f0f0f0;
}
.avito-button {
background-color: #0078d7;
color: white;
border: none;
padding: 8px 12px;
margin: 5px 5px 5px 0;
border-radius: 3px;
cursor: pointer;
}
.avito-button:hover {
background-color: #005a9e;
}
.avito-button:disabled {
background-color: #cccccc;
cursor: not-allowed;
}
.progress-bar {
height: 15px;
background-color: #e0e0e0;
border-radius: 5px;
margin: 10px 0;
}
.progress-fill {
height: 100%;
background-color: #0078d7;
border-radius: 5px;
width: 0%;
transition: width 0.3s;
}
.captcha-alert {
color: #d61e00;
font-weight: bold;
margin-top: 5px;
display: none;
}
.controls-row {
display: flex;
justify-content: space-between;
align-items: center;
}
.pause-button {
background-color: #ff9800;
}
.pause-button:hover {
background-color: #e68a00;
}
.loading-indicator {
color: blue;
font-style: italic;
}
#current-url-container {
margin: 10px 0;
border: 1px solid #ddd;
padding: 5px;
border-radius: 3px;
word-break: break-all;
font-size: 12px;
}
#extracted-preview {
margin: 10px 0;
max-height: 100px;
overflow-y: auto;
border: 1px solid #ddd;
padding: 5px;
font-size: 12px;
background-color: #f9f9f9;
}
`;
// Aggiungi stili alla pagina
const styleEl = document.createElement('style');
styleEl.textContent = css;
document.head.appendChild(styleEl);
// Crea e aggiungi il pannello di controllo alla pagina
const panel = document.createElement('div');
panel.id = 'avito-scraper-panel';
panel.innerHTML = `
<h3>Avito Scraping Assistant</h3>
<div>
<textarea id="avito-urls" placeholder="Inserisci gli URL da visitare, uno per riga"></textarea>
<div class="controls-row">
<button id="load-urls" class="avito-button">Carica URLs</button>
<button id="load-file" class="avito-button">Carica da File</button>
<input type="file" id="url-file" style="display: none;" accept=".txt,.csv">
</div>
</div>
<div id="current-url-container" style="display: none;">
<strong>URL Corrente:</strong> <span id="current-url"></span>
</div>
<div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div>
</div>
<div id="avito-status">Stato: Pronto</div>
<div class="captcha-alert" id="captcha-alert">⚠️ CAPTCHA rilevato! Risolvilo manualmente, poi clicca "Continua"</div>
<div class="loading-indicator" id="loading-indicator" style="display: none;">Caricamento pagina in corso...</div>
<div id="extracted-preview" style="display: none;"></div>
<div>
<button id="start-scraping" class="avito-button" disabled>Inizia</button>
<button id="extract-data" class="avito-button" disabled>Estrai Dati</button>
<button id="next-url" class="avito-button" disabled>Prossimo</button>
<button id="pause-resume" class="avito-button pause-button" disabled>Pausa</button>
</div>
<div>
<button id="export-csv" class="avito-button" disabled>Esporta CSV</button>
<button id="clear-data" class="avito-button" disabled>Cancella Dati</button>
<button id="view-data" class="avito-button" disabled>Visualizza Dati</button>
</div>
`;
document.body.appendChild(panel);
// Funzioni helper per gestire lo stato dello scraping
class ScrapingState {
constructor() {
this.urls = [];
this.currentIndex = 0;
this.extractedData = [];
this.isPaused = false;
this.isLoading = false;
this.loadState();
}
loadState() {
const savedUrls = GM_getValue('avitoUrls', null);
const savedIndex = GM_getValue('avitoCurrentIndex', 0);
const savedData = GM_getValue('avitoExtractedData', null);
if (savedUrls) {
this.urls = JSON.parse(savedUrls);
document.getElementById('avito-urls').value = this.urls.join('\n');
}
this.currentIndex = savedIndex;
if (savedData) {
this.extractedData = JSON.parse(savedData);
}
this.updateButtonStates();
this.updateProgress();
if (this.urls.length > 0) {
document.getElementById('start-scraping').disabled = false;
if (this.extractedData.length > 0) {
document.getElementById('export-csv').disabled = false;
document.getElementById('clear-data').disabled = false;
document.getElementById('view-data').disabled = false;
}
}
}
saveState() {
GM_setValue('avitoUrls', JSON.stringify(this.urls));
GM_setValue('avitoCurrentIndex', this.currentIndex);
GM_setValue('avitoExtractedData', JSON.stringify(this.extractedData));
}
setUrls(urlArray) {
this.urls = urlArray.filter(url => url.trim() !== '');
this.saveState();
this.updateButtonStates();
this.updateProgress();
}
getCurrentUrl() {
if (this.currentIndex < this.urls.length) {
return this.urls[this.currentIndex];
}
return null;
}
moveToNext() {
if (this.currentIndex < this.urls.length - 1) {
this.currentIndex++;
this.saveState();
this.updateProgress();
return true;
}
return false;
}
addExtractedData(data) {
this.extractedData.push(data);
this.saveState();
document.getElementById('export-csv').disabled = false;
document.getElementById('clear-data').disabled = false;
document.getElementById('view-data').disabled = false;
// Mostra un'anteprima dei dati estratti
this.showDataPreview(data);
}
showDataPreview(data) {
const previewEl = document.getElementById('extracted-preview');
previewEl.style.display = 'block';
// Seleziona alcuni campi chiave da mostrare
const preview = `
<strong>Dati estratti:</strong><br>
Venditore: ${data['seller name']}<br>
Città: ${data['Seller City']}<br>
Prezzo: ${data.Price}<br>
<small>(Dati salvati correttamente)</small>
`;
previewEl.innerHTML = preview;
}
updateProgress() {
const progressElement = document.getElementById('progress-fill');
const percent = this.urls.length > 0 ? (this.currentIndex / this.urls.length) * 100 : 0;
progressElement.style.width = `${percent}%`;
const statusElement = document.getElementById('avito-status');
statusElement.textContent = `Stato: ${this.currentIndex}/${this.urls.length} URL processati`;
const currentUrlContainer = document.getElementById('current-url-container');
const currentUrlSpan = document.getElementById('current-url');
if (this.getCurrentUrl()) {
currentUrlContainer.style.display = 'block';
currentUrlSpan.textContent = this.getCurrentUrl();
} else {
currentUrlContainer.style.display = 'none';
}
}
updateButtonStates() {
const startButton = document.getElementById('start-scraping');
const extractButton = document.getElementById('extract-data');
const nextButton = document.getElementById('next-url');
const pauseButton = document.getElementById('pause-resume');
const viewDataButton = document.getElementById('view-data');
startButton.disabled = this.urls.length === 0;
const isCurrentPage = window.location.href.includes(this.getCurrentUrl());
extractButton.disabled = !isCurrentPage || this.isLoading;
nextButton.disabled = this.currentIndex >= this.urls.length - 1 || this.isLoading;
pauseButton.disabled = this.urls.length === 0;
viewDataButton.disabled = this.extractedData.length === 0;
}
setLoading(isLoading) {
this.isLoading = isLoading;
const loadingIndicator = document.getElementById('loading-indicator');
loadingIndicator.style.display = isLoading ? 'block' : 'none';
this.updateButtonStates();
}
clearData() {
this.extractedData = [];
this.saveState();
document.getElementById('export-csv').disabled = true;
document.getElementById('clear-data').disabled = true;
document.getElementById('view-data').disabled = true;
document.getElementById('extracted-preview').style.display = 'none';
}
}
// Istanza dello stato dello scraping
const state = new ScrapingState();
// Funzione per estrarre i dati dalla pagina attuale
function extractDataFromPage() {
const data = {
link: window.location.href,
'seller name': 'N/A',
'shop link': 'N/A',
marketplace: 'Avito',
product: 'Jewelry',
image: 'N/A',
description: 'N/A',
'Seller City': 'N/A',
Price: 'N/A',
'Data scaricamento': new Date().toISOString().split('T')[0],
month: '2025-03',
Brand: 'Roberto Coin',
Country: 'Russia',
'Type of platform': 'Marketplace'
};
// Gli XPath forniti
const xpaths = {
'seller name': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[2]/div/div/div/div[3]/div[2]/div/div/div/div[1]/div/div[1]/div/div[1]/div/div/div/h3/a",
'shop link': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[2]/div/div/div/div[3]/div[2]/div/div/div/div[1]/div/div[1]/div/div[1]/div/div/div/h3/a",
'image': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[1]/div[2]/div[1]/div[1]/div/div/div/div/div[3]/img",
'description': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[1]/div[2]/div[2]/div/div/div/p[1]",
'Seller City': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/div[1]/div/p[1]/span",
'Price': "/html/body/div[1]/div/div[3]/div[1]/div/div[2]/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]/div/div/div/span/span/span[1]"
};
// Estrai i dati utilizzando gli XPath
for (const [field, xpath] of Object.entries(xpaths)) {
try {
const element = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
if (element) {
if (field === 'shop link') {
data[field] = element.getAttribute('href') || 'N/A';
// Aggiunge dominio se il link è relativo
if (data[field] && !data[field].startsWith('http')) {
data[field] = `https://www.avito.ru${data[field]}`;
}
} else if (field === 'image') {
data[field] = element.getAttribute('src') || 'N/A';
} else {
data[field] = element.textContent.trim() || 'N/A';
}
}
} catch (e) {
console.error(`Errore nell'estrazione di ${field}:`, e);
}
}
state.addExtractedData(data);
// Aggiorna lo stato e mostra un messaggio di successo
const statusElement = document.getElementById('avito-status');
statusElement.textContent = `Stato: Dati estratti da ${state.currentIndex + 1}/${state.urls.length} URL`;
// Abilita il pulsante "Next" se ci sono ancora URL da processare
document.getElementById('next-url').disabled = state.currentIndex >= state.urls.length - 1;
return data;
}
// Funzione per rilevare CAPTCHA
function detectCaptcha() {
// Questa è una semplificazione: adatta i selettori in base al reale CAPTCHA di Avito
const captchaSelectors = [
'iframe[src*="captcha"]',
'div[class*="captcha"]',
'div[id*="captcha"]',
'input[name*="captcha"]',
'img[src*="captcha"]',
'form[action*="captcha"]',
'div.firewall-container' // Aggiunto per il firewall di Avito
];
for (const selector of captchaSelectors) {
if (document.querySelector(selector)) {
return true;
}
}
// Cerca anche per testo di CAPTCHA o blocco
const bodyText = document.body.innerText.toLowerCase();
const captchaTexts = ['captcha', 'проверка', 'безопасность', 'подтвердите', 'robot', 'человек'];
for (const text of captchaTexts) {
if (bodyText.includes(text)) {
return true;
}
}
return false;
}
// Gestori degli eventi per i pulsanti
document.getElementById('load-urls').addEventListener('click', () => {
const urlText = document.getElementById('avito-urls').value;
const urlArray = urlText.split('\n').map(url => url.trim()).filter(url => url !== '');
state.setUrls(urlArray);
document.getElementById('start-scraping').disabled = urlArray.length === 0;
});
document.getElementById('load-file').addEventListener('click', () => {
document.getElementById('url-file').click();
});
document.getElementById('url-file').addEventListener('change', (e) => {
const file = e.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = (event) => {
const content = event.target.result;
document.getElementById('avito-urls').value = content;
const urlArray = content.split('\n').map(url => url.trim()).filter(url => url !== '');
state.setUrls(urlArray);
document.getElementById('start-scraping').disabled = urlArray.length === 0;
};
reader.readAsText(file);
}
});
document.getElementById('start-scraping').addEventListener('click', () => {
const currentUrl = state.getCurrentUrl();
if (currentUrl) {
state.setLoading(true);
window.location.href = currentUrl;
}
});
document.getElementById('extract-data').addEventListener('click', () => {
const data = extractDataFromPage();
const previewEl = document.getElementById('extracted-preview');
previewEl.style.display = 'block';
});
document.getElementById('next-url').addEventListener('click', () => {
if (state.moveToNext()) {
const nextUrl = state.getCurrentUrl();
if (nextUrl) {
state.setLoading(true);
window.location.href = nextUrl;
}
} else {
alert('Hai completato tutti gli URL!');
}
});
document.getElementById('pause-resume').addEventListener('click', () => {
state.isPaused = !state.isPaused;
const pauseButton = document.getElementById('pause-resume');
pauseButton.textContent = state.isPaused ? 'Riprendi' : 'Pausa';
});
// Funzione per gestire correttamente l'encoding del CSV
function exportCSV() {
if (state.extractedData.length === 0) {
alert('Nessun dato da esportare.');
return;
}
// Utilizza PapaParse con configurazione per UTF-8
const csvConfig = {
quotes: true, // Usa sempre le virgolette per proteggere i valori con caratteri speciali
quoteChar: '"',
escapeChar: '"',
delimiter: ",",
header: true,
newline: "\r\n",
skipEmptyLines: false
};
const csv = Papa.unparse(state.extractedData, csvConfig);
// Aggiunge BOM (Byte Order Mark) per far riconoscere correttamente UTF-8 a Excel e altri programmi
const BOM = "\uFEFF";
const csvWithBOM = BOM + csv;
// Crea un blob con encoding esplicito UTF-8
const blob = new Blob([csvWithBOM], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
// Crea un link per il download
const link = document.createElement('a');
link.href = url;
link.setAttribute('download', `avito_data_${new Date().toISOString().split('T')[0]}.csv`);
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
document.getElementById('export-csv').addEventListener('click', exportCSV);
document.getElementById('clear-data').addEventListener('click', () => {
if (confirm('Sei sicuro di voler cancellare tutti i dati estratti?')) {
state.clearData();
const statusElement = document.getElementById('avito-status');
statusElement.textContent = 'Stato: Dati cancellati';
}
});
document.getElementById('view-data').addEventListener('click', () => {
if (state.extractedData.length === 0) {
alert('Nessun dato da visualizzare.');
return;
}
// Crea una nuova finestra per visualizzare i dati
const dataWindow = window.open('', 'Dati Estratti', 'width=800,height=600');
dataWindow.document.write(`
<html>
<head>
<title>Dati Estratti da Avito</title>
<meta charset="UTF-8">
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
tr:nth-child(even) { background-color: #f9f9f9; }
.container { max-width: 100%; overflow-x: auto; }
</style>
</head>
<body>
<h2>Dati Estratti da Avito (${state.extractedData.length} record)</h2>
<div class="container">
<table>
<thead>
<tr>
${Object.keys(state.extractedData[0]).map(key => `<th>${key}</th>`).join('')}
</tr>
</thead>
<tbody>
${state.extractedData.map(record => `
<tr>
${Object.values(record).map(value => `<td>${value}</td>`).join('')}
</tr>
`).join('')}
</tbody>
</table>
</div>
</body>
</html>
`);
});
// Controllo periodico di CAPTCHA e caricamento pagina
function checkPageStatus() {
const captchaAlert = document.getElementById('captcha-alert');
if (detectCaptcha()) {
captchaAlert.style.display = 'block';
state.setLoading(false);
} else {
captchaAlert.style.display = 'none';
// Controlla se la pagina è completamente caricata
if (document.readyState === 'complete') {
state.setLoading(false);
// Se l'URL corrente corrisponde all'URL che stiamo processando, abilita il pulsante di estrazione
const currentUrl = state.getCurrentUrl();
if (currentUrl && window.location.href.includes(currentUrl)) {
document.getElementById('extract-data').disabled = false;
}
}
}
}
// Controlla lo stato della pagina ogni secondo
setInterval(checkPageStatus, 1000);
// Evento quando la pagina è completamente caricata
window.addEventListener('load', () => {
// Aggiungiamo un piccolo ritardo per assicurarci che tutti gli elementi siano caricati
setTimeout(() => {
state.setLoading(false);
state.updateButtonStates();
// Verifica se l'URL corrente corrisponde a uno degli URL nella lista
const currentUrl = state.getCurrentUrl();
if (currentUrl && window.location.href.includes(currentUrl)) {
document.getElementById('extract-data').disabled = false;
}
}, 1500);
});
console.log('Avito Scraping Assistant avviato con supporto UTF-8');
})();