Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation
// ==UserScript==
// @name Amazon Review Scraper
// @namespace http://tampermonkey.net/
// @version 1.48
// @description Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation
// @match *://www.amazon.com/*
// @grant none
// @license MIT
// @author Lightning
// ==/UserScript==
//==================================================================================================
//==================================================================================================
//==================================================================================================
// CONFIGURATION ===================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================
const SCROLL_TO_BOTTOM = true; //Should the "show 10 more" button, scroll us to the bottom of the page?
//Button alignment
let LEFT = 0;
let BOTTOM = 20;
let MIN_WIDTH = 250;
let HEIGHT = 50;
//==================================================================================================
//==================================================================================================
//==================================================================================================
// SELECTORS =======================================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================
const reviewSelector = '[data-hook="review"], div[id^="customer_review-"]'; //For each element that counts as a review div or container
const SEE_MORE_TAG = '[data-hook="see-all-reviews-link-foot"]'; //For seeing all reviews or searching them
const SHOW_MORE_REVIEWS_TAG = '[data-hook="show-more-button"]'; //For the "show 10 more" tag
const RATING_COUNT_TAG = '[data-hook="cr-filter-info-review-rating-count"]'; //The tag that tells us how many reviews we are dealing with
//==================================================================================================
//==================================================================================================
//==================================================================================================
// ACTUAL SCRAPING CODE ============================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================
const getReviewsAsMap = () => {
const reviewElements = document.querySelectorAll(reviewSelector);
if (reviewElements.length === 0) {
alert("No reviews found! Please scroll down until the reviews are visible on your screen.");
return;
}
// Use a Map to store unique reviews.
const uniqueReviews = new Map();
reviewElements.forEach(el => {
// IMPROVED DEDUPLICATION:
// 1. Try to find the actual Amazon Review ID (looks like R2ABC123...)
// 2. Fallback to the element ID
// 3. Last resort: hash the text content
let reviewId = el.getAttribute('id') || "";
if (reviewId.startsWith('customer_review-')) {
reviewId = reviewId.replace('customer_review-', '');
}
// If we still don't have a solid ID, check for data attributes
if (!reviewId) {
reviewId = el.getAttribute('data-review-id') || el.innerText.substring(0, 100).replace(/\s/g, '');
}
// Only process if we haven't seen this specific ID in this click session
if (reviewId && !uniqueReviews.has(reviewId)) {
// 1. Get Title and strip the "5.0 out of 5 stars" junk
let titleEl = el.querySelector('[data-hook="review-title"], .review-title');
let title = titleEl ? titleEl.innerText.trim() : "N/A";
title = title.replace(/^\d\.\d out of \d stars\s+/, '');
// 2. Get Stars (the first number found)
const starsEl = el.querySelector('[data-hook="review-star-rating"], .review-rating');
const stars = starsEl ? starsEl.innerText.split(' ')[0] : "N/A";
// 3. Get Description - Targeting the inner span to avoid metadata
const bodyEl = el.querySelector('[data-hook="review-body"] span.a-size-base') ||
el.querySelector('.review-text-content span') ||
el.querySelector('[data-hook="review-body"]');
let description = "N/A";
if (bodyEl) {
description = bodyEl.innerText
.replace(/Read more/gi, '')
.replace(/\s+/g, ' ')
.trim();
}
// 4. Get Images (joined by pipe)
const imgs = Array.from(el.querySelectorAll('img.review-image-tile')).map(img => img.src).join(' | ');
// Final check: If description is "N/A", it might be a malformed/duplicate container, skip it
if (description !== "N/A" || title !== "N/A") {
uniqueReviews.set(reviewId, { title, stars, description, imgs });
}
}
});
return uniqueReviews;
};
const saveReviewsToCSV = () => {
let uniqueReviews = getReviewsAsMap();
// Convert Map to CSV Array
let csvRows = [['Title', 'Stars', 'Description', 'ImageURL']];
uniqueReviews.forEach(val => {
csvRows.push([val.title, val.stars, val.description, val.imgs]);
});
// Format as proper CSV with escaping
let csvContent = csvRows.map(row =>
row.map(cell => `"${cell.toString().replace(/"/g, '""')}"`).join(",")
).join("\n");
// Download Trigger
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
const link = document.createElement("a");
link.setAttribute("href", url);
link.setAttribute("download", `amazon_reviews_${new Date().getTime()}.csv`);
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
console.log(`Scraped ${uniqueReviews.size} unique reviews.`);
};
//==================================================================================================
//==================================================================================================
//==================================================================================================
// OUR OTHER STUFF =================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================
(function() {
'use strict';
// Prevent duplicate buttons and only show on product/review pages
if (document.getElementById('scrp-btn') || !window.location.href.includes('/dp/') && !window.location.href.includes('/product-reviews/')){
console.log("We cannot scrape reviews on this page");
return;
}
const version = GM_info.script.version;
console.log(`Review scraper v${version} is enabled!`);
const info = document.createElement('div');
info.id = 'scrp-info';
info.innerHTML = `Amazon Review Scraper ${version}`;
info.style.cssText = `
position: fixed;
left: 0px;
bottom: 0px;
z-index: 999999;
padding: 10px;
width: 100%;
background: #eeeeee;
border: 1px solid #a88734;
color: black;
text-align: center;
user-select:none;
cursor:pointer;
`;
info.setAttribute('title', 'Click to refresh counters');
document.body.appendChild(info);
const updateInfo = (val) =>{
info.innerHTML = val;
}
var visibleReviews = 0;
var totalReviewsStr = "";
const btn = document.createElement('button');
btn.id = 'scrp-btn';
btn.innerHTML = 'DOWNLOAD REVIEWS AS CSV';
btn.style.cssText = `
position: fixed;
left: ${LEFT}px;
bottom: ${BOTTOM}px;
z-index: 999999;
padding: 15px;
min-width: ${MIN_WIDTH}px;
height: ${HEIGHT}px;
background: #ff9900;
border: 1px solid #a88734;
color: black;
border-radius: 8px;
cursor: pointer;
font-weight: bold;
box-shadow: 0 4px 10px rgba(0,0,0,0.5);
`;
btn.onclick = saveReviewsToCSV;
document.body.appendChild(btn);
const updateVisibleReviewCount = () => {
visibleReviews = getReviewsAsMap().size;
const ratingCountElement = document.querySelector(RATING_COUNT_TAG);
if (ratingCountElement) {
totalReviewsStr = ratingCountElement.innerText.trim();
}
btn.innerHTML = `📥 DOWNLOAD ${visibleReviews} REVIEWS AS CSV`;
updateInfo(`${visibleReviews} Visible reviews; ${totalReviewsStr}`);
};
const mutateTag = (seeMoreLink) => {
seeMoreLink.style.position = 'fixed';
seeMoreLink.style.left = LEFT+'px';
seeMoreLink.style.bottom = BOTTOM+HEIGHT+'px';
seeMoreLink.style.zIndex = '10000';
seeMoreLink.style.display = 'flex';
seeMoreLink.style.alignItems = 'center';
seeMoreLink.style.justifyContent = 'center';
seeMoreLink.style.padding = '10px 15px';
seeMoreLink.style.width = MIN_WIDTH+'px';
seeMoreLink.style.height = HEIGHT+'px';
seeMoreLink.style.backgroundColor = '#fff';
seeMoreLink.style.color = '#111';
seeMoreLink.style.border = '2px solid #e77600';
seeMoreLink.style.borderRadius = '8px';
seeMoreLink.style.boxShadow = '0px 2px 10px rgba(0,0,0,0.3)';
seeMoreLink.style.fontWeight = 'bold';
seeMoreLink.style.textDecoration = 'none';
seeMoreLink.style.cursor = 'pointer';
};
const scrollToFooter = () => {
if(SCROLL_TO_BOTTOM){
window.scrollTo({top: document.body.scrollHeight - 1000,behavior: 'smooth'});
}
};
// 2. Logic for the "See all reviews" link
const transformButtons = () => {
const seeMoreLink = document.querySelector(SEE_MORE_TAG);
const showMoreReviewsLink = document.querySelectorAll(SHOW_MORE_REVIEWS_TAG);
if (seeMoreLink) {
mutateTag(seeMoreLink);
if (!seeMoreLink.dataset.hasScrollListener) {
seeMoreLink.addEventListener('click', () => {
setTimeout(() => {}, 500);
});
seeMoreLink.dataset.hasScrollListener = 'true';
}
}
showMoreReviewsLink.forEach(button => {
if (button.textContent.trim().includes("Show 10 more reviews")) {
mutateTag(button);
if (!button.dataset.hasScrollListener) {
button.addEventListener('click', () => {
setTimeout(() => scrollToFooter(), 500);
});
button.dataset.hasScrollListener = 'true';
}
}
});
};
//Just to be save, transform the buttons every second
setInterval(function() {
transformButtons();
}, 1000);
//Set up our logic for handling DOM changes
let debounceTimer;
let isRunning = false;
const runUpdates = () => {
// 1. Run the actual logic
transformButtons();
updateVisibleReviewCount();
// 2. Allow it to run again after a short cooldown
setTimeout(() => {
isRunning = false;
}, 1000);
};
info.onclick = runUpdates;
const observer = new MutationObserver((mutations) => {
// If we are currently in the cooldown, do nothing
if (isRunning) return;
// Otherwise, lock it and run
isRunning = true;
runUpdates();
});
// Start observing
observer.observe(document.body, {
childList: true,
subtree: true
});
// Initial run
runUpdates();
})();