DuckDuckGo URL Collector (Rewritten)

Collects URLs from DuckDuckGo with optional site: filtering and rate limiting, updated for modern DuckDuckGo

Vous devrez installer une extension telle que Tampermonkey, Greasemonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Userscripts pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension de gestionnaire de script utilisateur pour installer ce script.

(J'ai déjà un gestionnaire de scripts utilisateur, laissez-moi l'installer !)

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

(J'ai déjà un gestionnaire de style utilisateur, laissez-moi l'installer!)

// ==UserScript==
// @name        DuckDuckGo URL Collector (Rewritten)
// @namespace   http://tampermonkey.net/
// @version     1.5
// @description Collects URLs from DuckDuckGo with optional site: filtering and rate limiting, updated for modern DuckDuckGo
// @author      Ghosty-Tongue
// @match       *://duckduckgo.com/*
// @grant       GM_notification
// @license     MIT
// ==/UserScript==

(function() {
    'use strict';

    const collectedUrls = new Set();
    let isProcessing = false;
    let startTime, timerInterval;
    let targetSite = null;

    // Attempt to get the search input value more robustly
    // Check for both 'search_form_input' and a more general input within the search form area
    const searchFormInput = document.getElementById('search_form_input') ||
                            document.querySelector('form[data-testid="search-form"] input[type="text"]');

    if (searchFormInput && searchFormInput.value.includes('site:')) {
        const match = searchFormInput.value.match(/site:([^\s]+)/);
        if (match) {
            targetSite = match[1].toLowerCase();
        }
    }

    // --- UI Elements ---
    const banner = document.createElement('div');
    Object.assign(banner.style, {
        position: 'fixed',
        top: '90px',
        right: '10px',
        zIndex: '10001',
        backgroundColor: 'rgba(255, 165, 0, 0.9)',
        color: 'white',
        padding: '10px',
        borderRadius: '5px',
        display: 'none'
    });
    document.body.appendChild(banner);

    const style = document.createElement('style');
    style.textContent = `
        @keyframes rgbFlow {
            0% { background-position: 0% 50%; }
            100% { background-position: 100% 50%; }
        }
        .ddg-url-collector-btn.processing {
            /* Add any specific styles for when processing, e.g., pulsating effect */
            animation: rgbFlow 1s linear infinite alternate; /* Faster, alternating glow */
        }
    `;
    document.head.appendChild(style);

    const timerDisplay = document.createElement('div');
    Object.assign(timerDisplay.style, {
        position: 'fixed',
        top: '50px',
        right: '10px',
        zIndex: '10000',
        color: 'white',
        backgroundColor: 'rgba(0,0,0,0.7)',
        padding: '5px 10px',
        borderRadius: '5px',
        fontFamily: 'Arial, sans-serif',
        fontSize: '14px'
    });
    document.body.appendChild(timerDisplay);

    // --- Timer Functions ---
    function startTimer() {
        if (timerInterval) clearInterval(timerInterval);
        startTime = Date.now();
        timerInterval = setInterval(updateTimer, 1000);
        timerDisplay.textContent = '0s';
    }

    function updateTimer() {
        const elapsed = Math.floor((Date.now() - startTime) / 1000);
        timerDisplay.textContent = `${elapsed}s`;
    }

    function stopTimer() {
        clearInterval(timerInterval);
        const elapsed = Math.floor((Date.now() - startTime) / 1000);
        timerDisplay.textContent = `${elapsed}s (stopped)`;
    }

    // --- URL Extraction ---
    function extractUrls() {
        // More general selector for results. DuckDuckGo uses <article> for results.
        const results = document.querySelectorAll('article[data-testid="result"]');
        let newUrlsCount = 0;
        results.forEach(result => {
            // Look for the main link within the result, which often has a data-testid or a specific class
            // Prioritize data-testid if available, otherwise look for a common link structure
            const link = result.querySelector('a[data-testid="result-title-a"]') ||
                         result.querySelector('a[data-testid="result-extras-url-link"]'); // Fallback to the 'extras' link

            if (link && link.href) {
                const url = link.href;
                try {
                    const urlDomain = new URL(url).hostname.toLowerCase();
                    if (targetSite) {
                        if (!urlDomain.includes(targetSite)) return;
                    }
                    if (!collectedUrls.has(url)) {
                        collectedUrls.add(url);
                        newUrlsCount++;
                    }
                } catch (e) {
                    console.warn('DuckDuckGo URL Collector: Invalid URL found:', url, e);
                }
            }
        });
        return newUrlsCount;
    }

    // --- Automation Logic ---
    async function clickMoreResults() {
        isProcessing = true;
        btn.classList.add('processing');
        let batchCount = 0;
        let moreResultsButton;

        do {
            if (!isProcessing) break;

            // Find the "More results" button. It often has the ID 'more-results' or contains specific text.
            moreResultsButton = document.getElementById('more-results') ||
                                document.querySelector('button[type="button"]#more-results') ||
                                Array.from(document.querySelectorAll('button, a')).find(el =>
                                    el.textContent.includes('More results') && el.offsetHeight > 0 // Ensure visible
                                );

            if (moreResultsButton) {
                // Scroll the button into view if it's not visible, to ensure clicks register.
                moreResultsButton.scrollIntoView({ behavior: 'smooth', block: 'center' });

                // Wait a moment for scrolling to complete if needed.
                await new Promise(resolve => setTimeout(resolve, 500));

                moreResultsButton.click();
                // Increased delay to account for potential slower loading or rate limiting
                await new Promise(resolve => setTimeout(resolve, 3000));
                batchCount += extractUrls();

                // Display current status
                banner.textContent = `Collecting... Found ${collectedUrls.size} URLs so far.`;
                banner.style.display = 'block';

                // Implement a more dynamic rate limiting if necessary,
                // or just stick to a fixed pause for every X pages.
                // The original script used 420, let's keep it but monitor if it's too aggressive.
                if (batchCount >= 420) { // Reset batch count after a pause
                    banner.textContent = 'Taking 15s break to avoid limits...';
                    await new Promise(resolve => setTimeout(resolve, 15000));
                    banner.textContent = `Resuming collection. Found ${collectedUrls.size} URLs.`;
                    batchCount = 0; // Reset batch count after the break
                }

            } else {
                // If no more results button is found, collection is complete
                break;
            }
        } while (moreResultsButton && isProcessing); // Continue only if button exists and processing is active

        isProcessing = false;
        btn.classList.remove('processing');
        banner.style.display = 'none'; // Hide banner when done or stopped

        GM_notification({
            title: 'DuckDuckGo URL Collector',
            text: `Collection ${isProcessing ? 'interrupted' : 'complete'}. Saved ${collectedUrls.size} URLs.`,
            timeout: 5000
        });
        saveUrls();
    }

    // --- Save Function ---
    function saveUrls() {
        const blob = new Blob([Array.from(collectedUrls).join('\n')], {type: 'text/plain'});
        const url = URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.href = url;
        a.download = `duckduckgo_urls_${new Date().toISOString().slice(0,10)}.txt`; // More descriptive filename
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
        URL.revokeObjectURL(url);
        stopTimer();
    }

    // --- Control Button ---
    const btn = document.createElement('button');
    btn.textContent = '🦆';
    btn.classList.add('ddg-url-collector-btn'); // Add a class for specific styling
    Object.assign(btn.style, {
        position: 'fixed',
        top: '10px',
        right: '10px',
        zIndex: '10000',
        padding: '12px 24px',
        background: 'linear-gradient(90deg, #ff0000, #00ff00, #0000ff, #ff0000)',
        backgroundSize: '300% 100%',
        animation: 'rgbFlow 5s linear infinite',
        color: 'white',
        border: 'none',
        borderRadius: '25px',
        cursor: 'pointer',
        fontFamily: 'Arial, sans-serif',
        fontWeight: 'bold',
        boxShadow: '0 4px 15px rgba(0,0,0,0.2)',
        transition: 'transform 0.2s, box-shadow 0.2s'
    });

    btn.addEventListener('mouseover', () => {
        btn.style.transform = 'scale(1.05)';
        btn.style.boxShadow = '0 6px 20px rgba(0,0,0,0.25)';
    });

    btn.addEventListener('mouseout', () => {
        btn.style.transform = 'scale(1)';
        btn.style.boxShadow = '0 4px 15px rgba(0,0,0,0.2)';
    });

    btn.addEventListener('click', () => {
        if (!isProcessing) {
            collectedUrls.clear(); // Clear previous URLs before starting a new collection
            startTimer();
            clickMoreResults();
        } else {
            // If processing, clicking the button stops it
            isProcessing = false;
            btn.classList.remove('processing');
            banner.style.display = 'none';
            stopTimer();
            // Optional: Save URLs immediately on stop, or wait for the "Collection complete" notification
            saveUrls();
        }
    });

    document.body.appendChild(btn);

    // Initial extraction in case there are results on the first page already
    extractUrls();
})();