DuckDuckGo URL Collector (Rewritten)

Collects URLs from DuckDuckGo with optional site: filtering and rate limiting, updated for modern DuckDuckGo

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name        DuckDuckGo URL Collector (Rewritten)
// @namespace   http://tampermonkey.net/
// @version     1.5
// @description Collects URLs from DuckDuckGo with optional site: filtering and rate limiting, updated for modern DuckDuckGo
// @author      Ghosty-Tongue
// @match       *://duckduckgo.com/*
// @grant       GM_notification
// @license     MIT
// ==/UserScript==

(function() {
    'use strict';

    const collectedUrls = new Set();
    let isProcessing = false;
    let startTime, timerInterval;
    let targetSite = null;

    // Attempt to get the search input value more robustly
    // Check for both 'search_form_input' and a more general input within the search form area
    const searchFormInput = document.getElementById('search_form_input') ||
                            document.querySelector('form[data-testid="search-form"] input[type="text"]');

    if (searchFormInput && searchFormInput.value.includes('site:')) {
        const match = searchFormInput.value.match(/site:([^\s]+)/);
        if (match) {
            targetSite = match[1].toLowerCase();
        }
    }

    // --- UI Elements ---
    const banner = document.createElement('div');
    Object.assign(banner.style, {
        position: 'fixed',
        top: '90px',
        right: '10px',
        zIndex: '10001',
        backgroundColor: 'rgba(255, 165, 0, 0.9)',
        color: 'white',
        padding: '10px',
        borderRadius: '5px',
        display: 'none'
    });
    document.body.appendChild(banner);

    const style = document.createElement('style');
    style.textContent = `
        @keyframes rgbFlow {
            0% { background-position: 0% 50%; }
            100% { background-position: 100% 50%; }
        }
        .ddg-url-collector-btn.processing {
            /* Add any specific styles for when processing, e.g., pulsating effect */
            animation: rgbFlow 1s linear infinite alternate; /* Faster, alternating glow */
        }
    `;
    document.head.appendChild(style);

    const timerDisplay = document.createElement('div');
    Object.assign(timerDisplay.style, {
        position: 'fixed',
        top: '50px',
        right: '10px',
        zIndex: '10000',
        color: 'white',
        backgroundColor: 'rgba(0,0,0,0.7)',
        padding: '5px 10px',
        borderRadius: '5px',
        fontFamily: 'Arial, sans-serif',
        fontSize: '14px'
    });
    document.body.appendChild(timerDisplay);

    // --- Timer Functions ---
    function startTimer() {
        if (timerInterval) clearInterval(timerInterval);
        startTime = Date.now();
        timerInterval = setInterval(updateTimer, 1000);
        timerDisplay.textContent = '0s';
    }

    function updateTimer() {
        const elapsed = Math.floor((Date.now() - startTime) / 1000);
        timerDisplay.textContent = `${elapsed}s`;
    }

    function stopTimer() {
        clearInterval(timerInterval);
        const elapsed = Math.floor((Date.now() - startTime) / 1000);
        timerDisplay.textContent = `${elapsed}s (stopped)`;
    }

    // --- URL Extraction ---
    function extractUrls() {
        // More general selector for results. DuckDuckGo uses <article> for results.
        const results = document.querySelectorAll('article[data-testid="result"]');
        let newUrlsCount = 0;
        results.forEach(result => {
            // Look for the main link within the result, which often has a data-testid or a specific class
            // Prioritize data-testid if available, otherwise look for a common link structure
            const link = result.querySelector('a[data-testid="result-title-a"]') ||
                         result.querySelector('a[data-testid="result-extras-url-link"]'); // Fallback to the 'extras' link

            if (link && link.href) {
                const url = link.href;
                try {
                    const urlDomain = new URL(url).hostname.toLowerCase();
                    if (targetSite) {
                        if (!urlDomain.includes(targetSite)) return;
                    }
                    if (!collectedUrls.has(url)) {
                        collectedUrls.add(url);
                        newUrlsCount++;
                    }
                } catch (e) {
                    console.warn('DuckDuckGo URL Collector: Invalid URL found:', url, e);
                }
            }
        });
        return newUrlsCount;
    }

    // --- Automation Logic ---
    async function clickMoreResults() {
        isProcessing = true;
        btn.classList.add('processing');
        let batchCount = 0;
        let moreResultsButton;

        do {
            if (!isProcessing) break;

            // Find the "More results" button. It often has the ID 'more-results' or contains specific text.
            moreResultsButton = document.getElementById('more-results') ||
                                document.querySelector('button[type="button"]#more-results') ||
                                Array.from(document.querySelectorAll('button, a')).find(el =>
                                    el.textContent.includes('More results') && el.offsetHeight > 0 // Ensure visible
                                );

            if (moreResultsButton) {
                // Scroll the button into view if it's not visible, to ensure clicks register.
                moreResultsButton.scrollIntoView({ behavior: 'smooth', block: 'center' });

                // Wait a moment for scrolling to complete if needed.
                await new Promise(resolve => setTimeout(resolve, 500));

                moreResultsButton.click();
                // Increased delay to account for potential slower loading or rate limiting
                await new Promise(resolve => setTimeout(resolve, 3000));
                batchCount += extractUrls();

                // Display current status
                banner.textContent = `Collecting... Found ${collectedUrls.size} URLs so far.`;
                banner.style.display = 'block';

                // Implement a more dynamic rate limiting if necessary,
                // or just stick to a fixed pause for every X pages.
                // The original script used 420, let's keep it but monitor if it's too aggressive.
                if (batchCount >= 420) { // Reset batch count after a pause
                    banner.textContent = 'Taking 15s break to avoid limits...';
                    await new Promise(resolve => setTimeout(resolve, 15000));
                    banner.textContent = `Resuming collection. Found ${collectedUrls.size} URLs.`;
                    batchCount = 0; // Reset batch count after the break
                }

            } else {
                // If no more results button is found, collection is complete
                break;
            }
        } while (moreResultsButton && isProcessing); // Continue only if button exists and processing is active

        isProcessing = false;
        btn.classList.remove('processing');
        banner.style.display = 'none'; // Hide banner when done or stopped

        GM_notification({
            title: 'DuckDuckGo URL Collector',
            text: `Collection ${isProcessing ? 'interrupted' : 'complete'}. Saved ${collectedUrls.size} URLs.`,
            timeout: 5000
        });
        saveUrls();
    }

    // --- Save Function ---
    function saveUrls() {
        const blob = new Blob([Array.from(collectedUrls).join('\n')], {type: 'text/plain'});
        const url = URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.href = url;
        a.download = `duckduckgo_urls_${new Date().toISOString().slice(0,10)}.txt`; // More descriptive filename
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
        URL.revokeObjectURL(url);
        stopTimer();
    }

    // --- Control Button ---
    const btn = document.createElement('button');
    btn.textContent = '🦆';
    btn.classList.add('ddg-url-collector-btn'); // Add a class for specific styling
    Object.assign(btn.style, {
        position: 'fixed',
        top: '10px',
        right: '10px',
        zIndex: '10000',
        padding: '12px 24px',
        background: 'linear-gradient(90deg, #ff0000, #00ff00, #0000ff, #ff0000)',
        backgroundSize: '300% 100%',
        animation: 'rgbFlow 5s linear infinite',
        color: 'white',
        border: 'none',
        borderRadius: '25px',
        cursor: 'pointer',
        fontFamily: 'Arial, sans-serif',
        fontWeight: 'bold',
        boxShadow: '0 4px 15px rgba(0,0,0,0.2)',
        transition: 'transform 0.2s, box-shadow 0.2s'
    });

    btn.addEventListener('mouseover', () => {
        btn.style.transform = 'scale(1.05)';
        btn.style.boxShadow = '0 6px 20px rgba(0,0,0,0.25)';
    });

    btn.addEventListener('mouseout', () => {
        btn.style.transform = 'scale(1)';
        btn.style.boxShadow = '0 4px 15px rgba(0,0,0,0.2)';
    });

    btn.addEventListener('click', () => {
        if (!isProcessing) {
            collectedUrls.clear(); // Clear previous URLs before starting a new collection
            startTimer();
            clickMoreResults();
        } else {
            // If processing, clicking the button stops it
            isProcessing = false;
            btn.classList.remove('processing');
            banner.style.display = 'none';
            stopTimer();
            // Optional: Save URLs immediately on stop, or wait for the "Collection complete" notification
            saveUrls();
        }
    });

    document.body.appendChild(btn);

    // Initial extraction in case there are results on the first page already
    extractUrls();
})();