Amazon Review Scraper

Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation

Du musst eine Erweiterung wie Tampermonkey, Greasemonkey oder Violentmonkey installieren, um dieses Skript zu installieren.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

Sie müssten eine Skript Manager Erweiterung installieren damit sie dieses Skript installieren können

(Ich habe schon ein Skript Manager, Lass mich es installieren!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name         Amazon Review Scraper
// @namespace    http://tampermonkey.net/
// @version      1.48
// @description  Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation
// @match        *://www.amazon.com/*
// @grant        none
// @license MIT
// @author Lightning
// ==/UserScript==

//==================================================================================================
//==================================================================================================
//==================================================================================================
// CONFIGURATION ===================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================

const SCROLL_TO_BOTTOM = true; //Should the "show 10 more" button, scroll us to the bottom of the page?

//Button alignment
let LEFT = 0;
let BOTTOM = 20;
let MIN_WIDTH = 250;
let HEIGHT = 50;

//==================================================================================================
//==================================================================================================
//==================================================================================================
// SELECTORS =======================================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================

const reviewSelector = '[data-hook="review"], div[id^="customer_review-"]'; //For each element that counts as a review div or container
const SEE_MORE_TAG = '[data-hook="see-all-reviews-link-foot"]'; //For seeing all reviews or searching them
const SHOW_MORE_REVIEWS_TAG = '[data-hook="show-more-button"]'; //For the "show 10 more" tag
const RATING_COUNT_TAG = '[data-hook="cr-filter-info-review-rating-count"]'; //The tag that tells us how many reviews we are dealing with

//==================================================================================================
//==================================================================================================
//==================================================================================================
// ACTUAL SCRAPING CODE ============================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================

const getReviewsAsMap = () => {
    const reviewElements = document.querySelectorAll(reviewSelector);

    if (reviewElements.length === 0) {
        alert("No reviews found! Please scroll down until the reviews are visible on your screen.");
        return;
    }

    // Use a Map to store unique reviews.
    const uniqueReviews = new Map();

    reviewElements.forEach(el => {
        // IMPROVED DEDUPLICATION:
        // 1. Try to find the actual Amazon Review ID (looks like R2ABC123...)
        // 2. Fallback to the element ID
        // 3. Last resort: hash the text content
        let reviewId = el.getAttribute('id') || "";
        if (reviewId.startsWith('customer_review-')) {
            reviewId = reviewId.replace('customer_review-', '');
        }

        // If we still don't have a solid ID, check for data attributes
        if (!reviewId) {
            reviewId = el.getAttribute('data-review-id') || el.innerText.substring(0, 100).replace(/\s/g, '');
        }

        // Only process if we haven't seen this specific ID in this click session
        if (reviewId && !uniqueReviews.has(reviewId)) {
            // 1. Get Title and strip the "5.0 out of 5 stars" junk
            let titleEl = el.querySelector('[data-hook="review-title"], .review-title');
            let title = titleEl ? titleEl.innerText.trim() : "N/A";
            title = title.replace(/^\d\.\d out of \d stars\s+/, '');

            // 2. Get Stars (the first number found)
            const starsEl = el.querySelector('[data-hook="review-star-rating"], .review-rating');
            const stars = starsEl ? starsEl.innerText.split(' ')[0] : "N/A";

            // 3. Get Description - Targeting the inner span to avoid metadata
            const bodyEl = el.querySelector('[data-hook="review-body"] span.a-size-base') ||
                  el.querySelector('.review-text-content span') ||
                  el.querySelector('[data-hook="review-body"]');

            let description = "N/A";
            if (bodyEl) {
                description = bodyEl.innerText
                    .replace(/Read more/gi, '')
                    .replace(/\s+/g, ' ')
                    .trim();
            }

            // 4. Get Images (joined by pipe)
            const imgs = Array.from(el.querySelectorAll('img.review-image-tile')).map(img => img.src).join(' | ');

            // Final check: If description is "N/A", it might be a malformed/duplicate container, skip it
            if (description !== "N/A" || title !== "N/A") {
                uniqueReviews.set(reviewId, { title, stars, description, imgs });
            }
        }
    });
    return uniqueReviews;

};


const saveReviewsToCSV = () => {
    let uniqueReviews = getReviewsAsMap();
    // Convert Map to CSV Array
    let csvRows = [['Title', 'Stars', 'Description', 'ImageURL']];
    uniqueReviews.forEach(val => {
        csvRows.push([val.title, val.stars, val.description, val.imgs]);
    });

    // Format as proper CSV with escaping
    let csvContent = csvRows.map(row =>
                                 row.map(cell => `"${cell.toString().replace(/"/g, '""')}"`).join(",")
                                ).join("\n");

    // Download Trigger
    const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
    const url = URL.createObjectURL(blob);
    const link = document.createElement("a");
    link.setAttribute("href", url);
    link.setAttribute("download", `amazon_reviews_${new Date().getTime()}.csv`);
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
    console.log(`Scraped ${uniqueReviews.size} unique reviews.`);
};

//==================================================================================================
//==================================================================================================
//==================================================================================================
// OUR OTHER STUFF =================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================

(function() {
    'use strict';


  // Prevent duplicate buttons and only show on product/review pages
if (document.getElementById('scrp-btn') || !window.location.href.includes('/dp/') && !window.location.href.includes('/product-reviews/')){
    console.log("We cannot scrape reviews on this page");
    return;
}
const version = GM_info.script.version;
console.log(`Review scraper v${version} is enabled!`);


const info = document.createElement('div');
info.id = 'scrp-info';
info.innerHTML = `Amazon Review Scraper ${version}`;
info.style.cssText = `
    position: fixed;
    left: 0px;
    bottom: 0px;
    z-index: 999999;
    padding: 10px;
    width: 100%;
    background: #eeeeee;
    border: 1px solid #a88734;
    color: black;
    text-align: center;
    user-select:none;
    cursor:pointer;
`;
info.setAttribute('title', 'Click to refresh counters');
document.body.appendChild(info);

const updateInfo = (val) =>{
info.innerHTML = val;
}

var visibleReviews = 0;
var totalReviewsStr = "";


const btn = document.createElement('button');
btn.id = 'scrp-btn';
btn.innerHTML = 'DOWNLOAD REVIEWS AS CSV';
btn.style.cssText = `
    position: fixed;
    left: ${LEFT}px;
    bottom: ${BOTTOM}px;
    z-index: 999999;
    padding: 15px;
    min-width: ${MIN_WIDTH}px;
    height: ${HEIGHT}px;
    background: #ff9900;
    border: 1px solid #a88734;
    color: black;
    border-radius: 8px;
    cursor: pointer;
    font-weight: bold;
    box-shadow: 0 4px 10px rgba(0,0,0,0.5);
`;

btn.onclick = saveReviewsToCSV;
document.body.appendChild(btn);


const updateVisibleReviewCount = () => {
    visibleReviews = getReviewsAsMap().size;
    const ratingCountElement = document.querySelector(RATING_COUNT_TAG);
    if (ratingCountElement) {
        totalReviewsStr = ratingCountElement.innerText.trim();
    }
    btn.innerHTML = `📥 DOWNLOAD ${visibleReviews} REVIEWS AS CSV`;
    updateInfo(`${visibleReviews} Visible reviews; ${totalReviewsStr}`);
};

const mutateTag = (seeMoreLink) => {
    seeMoreLink.style.position = 'fixed';
        seeMoreLink.style.left = LEFT+'px';
        seeMoreLink.style.bottom = BOTTOM+HEIGHT+'px';
        seeMoreLink.style.zIndex = '10000';
        seeMoreLink.style.display = 'flex';
        seeMoreLink.style.alignItems = 'center';
        seeMoreLink.style.justifyContent = 'center';
        seeMoreLink.style.padding = '10px 15px';
        seeMoreLink.style.width = MIN_WIDTH+'px';
        seeMoreLink.style.height = HEIGHT+'px';
        seeMoreLink.style.backgroundColor = '#fff';
        seeMoreLink.style.color = '#111';
        seeMoreLink.style.border = '2px solid #e77600';
        seeMoreLink.style.borderRadius = '8px';
        seeMoreLink.style.boxShadow = '0px 2px 10px rgba(0,0,0,0.3)';
        seeMoreLink.style.fontWeight = 'bold';
        seeMoreLink.style.textDecoration = 'none';
        seeMoreLink.style.cursor = 'pointer';
};


const scrollToFooter = () => {
    if(SCROLL_TO_BOTTOM){
        window.scrollTo({top: document.body.scrollHeight - 1000,behavior: 'smooth'});
    }
};

// 2. Logic for the "See all reviews" link
const transformButtons = () => {
    const seeMoreLink = document.querySelector(SEE_MORE_TAG);
    const showMoreReviewsLink = document.querySelectorAll(SHOW_MORE_REVIEWS_TAG);

    if (seeMoreLink) {
        mutateTag(seeMoreLink);

        if (!seeMoreLink.dataset.hasScrollListener) {
            seeMoreLink.addEventListener('click', () => {
                setTimeout(() => {}, 500);
            });
            seeMoreLink.dataset.hasScrollListener = 'true';
        }
    }

    showMoreReviewsLink.forEach(button => {
        if (button.textContent.trim().includes("Show 10 more reviews")) {
            mutateTag(button);

            if (!button.dataset.hasScrollListener) {
                button.addEventListener('click', () => {
                    setTimeout(() => scrollToFooter(), 500);
                });
                button.dataset.hasScrollListener = 'true';
            }
        }
    });
};


//Just to be save, transform the buttons every second
setInterval(function() {
    transformButtons();
}, 1000);

//Set up our logic for handling DOM changes
let debounceTimer;
let isRunning = false;

const runUpdates = () => {
    // 1. Run the actual logic
    transformButtons();
    updateVisibleReviewCount();

    // 2. Allow it to run again after a short cooldown
    setTimeout(() => {
        isRunning = false;
    }, 1000);
};
info.onclick = runUpdates;

const observer = new MutationObserver((mutations) => {
    // If we are currently in the cooldown, do nothing
    if (isRunning) return;

    // Otherwise, lock it and run
    isRunning = true;
    runUpdates();
});

// Start observing
observer.observe(document.body, {
    childList: true,
    subtree: true
});

// Initial run
runUpdates();

})();