Amazon Review Scraper

Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation

이 스크립트를 설치하려면 Tampermonkey, Greasemonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램을 설치해야 합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Userscripts와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 유저 스크립트 관리자 확장 프로그램이 필요합니다.

(이미 유저 스크립트 관리자가 설치되어 있습니다. 설치를 진행합니다!)

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

(이미 유저 스타일 관리자가 설치되어 있습니다. 설치를 진행합니다!)

// ==UserScript==
// @name         Amazon Review Scraper
// @namespace    http://tampermonkey.net/
// @version      1.48
// @description  Aggressively cleans reviews and prevents duplicate entries using a Map and unique ID validation
// @match        *://www.amazon.com/*
// @grant        none
// @license MIT
// @author Lightning
// ==/UserScript==

//==================================================================================================
//==================================================================================================
//==================================================================================================
// CONFIGURATION ===================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================

const SCROLL_TO_BOTTOM = true; //Should the "show 10 more" button, scroll us to the bottom of the page?

//Button alignment
let LEFT = 0;
let BOTTOM = 20;
let MIN_WIDTH = 250;
let HEIGHT = 50;

//==================================================================================================
//==================================================================================================
//==================================================================================================
// SELECTORS =======================================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================

const reviewSelector = '[data-hook="review"], div[id^="customer_review-"]'; //For each element that counts as a review div or container
const SEE_MORE_TAG = '[data-hook="see-all-reviews-link-foot"]'; //For seeing all reviews or searching them
const SHOW_MORE_REVIEWS_TAG = '[data-hook="show-more-button"]'; //For the "show 10 more" tag
const RATING_COUNT_TAG = '[data-hook="cr-filter-info-review-rating-count"]'; //The tag that tells us how many reviews we are dealing with

//==================================================================================================
//==================================================================================================
//==================================================================================================
// ACTUAL SCRAPING CODE ============================================================================
// This may have to change as amazon updates their website =========================================
//==================================================================================================
//==================================================================================================

const getReviewsAsMap = () => {
    const reviewElements = document.querySelectorAll(reviewSelector);

    if (reviewElements.length === 0) {
        alert("No reviews found! Please scroll down until the reviews are visible on your screen.");
        return;
    }

    // Use a Map to store unique reviews.
    const uniqueReviews = new Map();

    reviewElements.forEach(el => {
        // IMPROVED DEDUPLICATION:
        // 1. Try to find the actual Amazon Review ID (looks like R2ABC123...)
        // 2. Fallback to the element ID
        // 3. Last resort: hash the text content
        let reviewId = el.getAttribute('id') || "";
        if (reviewId.startsWith('customer_review-')) {
            reviewId = reviewId.replace('customer_review-', '');
        }

        // If we still don't have a solid ID, check for data attributes
        if (!reviewId) {
            reviewId = el.getAttribute('data-review-id') || el.innerText.substring(0, 100).replace(/\s/g, '');
        }

        // Only process if we haven't seen this specific ID in this click session
        if (reviewId && !uniqueReviews.has(reviewId)) {
            // 1. Get Title and strip the "5.0 out of 5 stars" junk
            let titleEl = el.querySelector('[data-hook="review-title"], .review-title');
            let title = titleEl ? titleEl.innerText.trim() : "N/A";
            title = title.replace(/^\d\.\d out of \d stars\s+/, '');

            // 2. Get Stars (the first number found)
            const starsEl = el.querySelector('[data-hook="review-star-rating"], .review-rating');
            const stars = starsEl ? starsEl.innerText.split(' ')[0] : "N/A";

            // 3. Get Description - Targeting the inner span to avoid metadata
            const bodyEl = el.querySelector('[data-hook="review-body"] span.a-size-base') ||
                  el.querySelector('.review-text-content span') ||
                  el.querySelector('[data-hook="review-body"]');

            let description = "N/A";
            if (bodyEl) {
                description = bodyEl.innerText
                    .replace(/Read more/gi, '')
                    .replace(/\s+/g, ' ')
                    .trim();
            }

            // 4. Get Images (joined by pipe)
            const imgs = Array.from(el.querySelectorAll('img.review-image-tile')).map(img => img.src).join(' | ');

            // Final check: If description is "N/A", it might be a malformed/duplicate container, skip it
            if (description !== "N/A" || title !== "N/A") {
                uniqueReviews.set(reviewId, { title, stars, description, imgs });
            }
        }
    });
    return uniqueReviews;

};


const saveReviewsToCSV = () => {
    let uniqueReviews = getReviewsAsMap();
    // Convert Map to CSV Array
    let csvRows = [['Title', 'Stars', 'Description', 'ImageURL']];
    uniqueReviews.forEach(val => {
        csvRows.push([val.title, val.stars, val.description, val.imgs]);
    });

    // Format as proper CSV with escaping
    let csvContent = csvRows.map(row =>
                                 row.map(cell => `"${cell.toString().replace(/"/g, '""')}"`).join(",")
                                ).join("\n");

    // Download Trigger
    const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
    const url = URL.createObjectURL(blob);
    const link = document.createElement("a");
    link.setAttribute("href", url);
    link.setAttribute("download", `amazon_reviews_${new Date().getTime()}.csv`);
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
    console.log(`Scraped ${uniqueReviews.size} unique reviews.`);
};

//==================================================================================================
//==================================================================================================
//==================================================================================================
// OUR OTHER STUFF =================================================================================
//==================================================================================================
//==================================================================================================
//==================================================================================================

(function() {
    'use strict';


  // Prevent duplicate buttons and only show on product/review pages
if (document.getElementById('scrp-btn') || !window.location.href.includes('/dp/') && !window.location.href.includes('/product-reviews/')){
    console.log("We cannot scrape reviews on this page");
    return;
}
const version = GM_info.script.version;
console.log(`Review scraper v${version} is enabled!`);


const info = document.createElement('div');
info.id = 'scrp-info';
info.innerHTML = `Amazon Review Scraper ${version}`;
info.style.cssText = `
    position: fixed;
    left: 0px;
    bottom: 0px;
    z-index: 999999;
    padding: 10px;
    width: 100%;
    background: #eeeeee;
    border: 1px solid #a88734;
    color: black;
    text-align: center;
    user-select:none;
    cursor:pointer;
`;
info.setAttribute('title', 'Click to refresh counters');
document.body.appendChild(info);

const updateInfo = (val) =>{
info.innerHTML = val;
}

var visibleReviews = 0;
var totalReviewsStr = "";


const btn = document.createElement('button');
btn.id = 'scrp-btn';
btn.innerHTML = 'DOWNLOAD REVIEWS AS CSV';
btn.style.cssText = `
    position: fixed;
    left: ${LEFT}px;
    bottom: ${BOTTOM}px;
    z-index: 999999;
    padding: 15px;
    min-width: ${MIN_WIDTH}px;
    height: ${HEIGHT}px;
    background: #ff9900;
    border: 1px solid #a88734;
    color: black;
    border-radius: 8px;
    cursor: pointer;
    font-weight: bold;
    box-shadow: 0 4px 10px rgba(0,0,0,0.5);
`;

btn.onclick = saveReviewsToCSV;
document.body.appendChild(btn);


const updateVisibleReviewCount = () => {
    visibleReviews = getReviewsAsMap().size;
    const ratingCountElement = document.querySelector(RATING_COUNT_TAG);
    if (ratingCountElement) {
        totalReviewsStr = ratingCountElement.innerText.trim();
    }
    btn.innerHTML = `📥 DOWNLOAD ${visibleReviews} REVIEWS AS CSV`;
    updateInfo(`${visibleReviews} Visible reviews; ${totalReviewsStr}`);
};

const mutateTag = (seeMoreLink) => {
    seeMoreLink.style.position = 'fixed';
        seeMoreLink.style.left = LEFT+'px';
        seeMoreLink.style.bottom = BOTTOM+HEIGHT+'px';
        seeMoreLink.style.zIndex = '10000';
        seeMoreLink.style.display = 'flex';
        seeMoreLink.style.alignItems = 'center';
        seeMoreLink.style.justifyContent = 'center';
        seeMoreLink.style.padding = '10px 15px';
        seeMoreLink.style.width = MIN_WIDTH+'px';
        seeMoreLink.style.height = HEIGHT+'px';
        seeMoreLink.style.backgroundColor = '#fff';
        seeMoreLink.style.color = '#111';
        seeMoreLink.style.border = '2px solid #e77600';
        seeMoreLink.style.borderRadius = '8px';
        seeMoreLink.style.boxShadow = '0px 2px 10px rgba(0,0,0,0.3)';
        seeMoreLink.style.fontWeight = 'bold';
        seeMoreLink.style.textDecoration = 'none';
        seeMoreLink.style.cursor = 'pointer';
};


const scrollToFooter = () => {
    if(SCROLL_TO_BOTTOM){
        window.scrollTo({top: document.body.scrollHeight - 1000,behavior: 'smooth'});
    }
};

// 2. Logic for the "See all reviews" link
const transformButtons = () => {
    const seeMoreLink = document.querySelector(SEE_MORE_TAG);
    const showMoreReviewsLink = document.querySelectorAll(SHOW_MORE_REVIEWS_TAG);

    if (seeMoreLink) {
        mutateTag(seeMoreLink);

        if (!seeMoreLink.dataset.hasScrollListener) {
            seeMoreLink.addEventListener('click', () => {
                setTimeout(() => {}, 500);
            });
            seeMoreLink.dataset.hasScrollListener = 'true';
        }
    }

    showMoreReviewsLink.forEach(button => {
        if (button.textContent.trim().includes("Show 10 more reviews")) {
            mutateTag(button);

            if (!button.dataset.hasScrollListener) {
                button.addEventListener('click', () => {
                    setTimeout(() => scrollToFooter(), 500);
                });
                button.dataset.hasScrollListener = 'true';
            }
        }
    });
};


//Just to be save, transform the buttons every second
setInterval(function() {
    transformButtons();
}, 1000);

//Set up our logic for handling DOM changes
let debounceTimer;
let isRunning = false;

const runUpdates = () => {
    // 1. Run the actual logic
    transformButtons();
    updateVisibleReviewCount();

    // 2. Allow it to run again after a short cooldown
    setTimeout(() => {
        isRunning = false;
    }, 1000);
};
info.onclick = runUpdates;

const observer = new MutationObserver((mutations) => {
    // If we are currently in the cooldown, do nothing
    if (isRunning) return;

    // Otherwise, lock it and run
    isRunning = true;
    runUpdates();
});

// Start observing
observer.observe(document.body, {
    childList: true,
    subtree: true
});

// Initial run
runUpdates();

})();