LinkedIn Saved Posts Extractor

Extracts and exports URLs, author names, and a summary from LinkedIn saved posts.
换行
// ==UserScript==
// @name         LinkedIn Saved Posts Extractor
// @namespace    http://tampermonkey.net/
// @version      1.2
// @description  Extracts and exports URLs, author names, and a summary from LinkedIn saved posts.
// @author       Gemini assisted by @ProtoPioneer
// @match        https://www.linkedin.com/my-items/saved-posts/
// @icon         https://www.google.com/s2/favicons?sz=64&domain=linkedin.com
// @grant        GM_addStyle
// @grant        GM_setClipboard
// @grant        window.scrollTo
// @license      MIT

// ==/UserScript==

(function() {
    'use strict';

    // --- Constants and Selectors ---
    const TARGET_PATHNAME = '/my-items/saved-posts/'; // Explicitly define the target pathname
    const PAGE_LOAD_SELECTOR = '.scaffold-finite-scroll__content';
    const SIDEBAR_CONTAINER_SELECTOR = 'section.artdeco-card'; // Selector for the sidebar section
    const POST_ITEM_SELECTOR = 'li.spJBWGJxucbfKXdVfkzNWoylvzJUXm';
    // Updated selector to target the div containing the URN
    const POST_URN_DIV_SELECTOR = 'div.SBfgMVhxquNmLvmeshQNzKpcIpEmfwZtXvYS[data-chameleon-result-urn^="urn:li:activity:"]';
    const AUTHOR_NAME_SELECTOR = 'span.MNolfTcmIbAkuMjITKBbzJsHKqYdGLzxXw a';
    const SHOW_MORE_BUTTON_SELECTOR = 'button.scaffold-finite-scroll__load-button';
    const SUMMARY_SELECTOR = 'div.TGfdAPUhdpGykMAFqWBkfRYuKjJcskwZw p.entity-result__content-summary'; // Selector for the summary
    const SCROLL_DELAY_MS = 2000; // Increased delay after scrolling/clicking to allow content to load
    const MAX_AUTO_SCROLL_DURATION_MS = 5 * 60 * 1000; // 5 minutes for automatic scroll mode

    let extractedPosts = new Map(); // Using Map to store unique posts by URL
    let extractionInProgress = false;
    let extractionButton = null;
    let scrollModeDropdown = null; // Reference to the new scroll mode dropdown
    let numPagesInput = null;      // Reference to the new number of pages input
    let numPagesLabel = null;      // Reference to the new number of pages label
    let outputFormatDropdown = null; // Reference to the new output format dropdown

    // --- Utility Functions ---

    /**
     * Sanitize string by removing control characters that might cause encoding issues.
     * Preserves printable ASCII and most common Unicode characters.
     * @param {string} str - The input string to sanitize.
     * @returns {string} The sanitized string.
     */
    function sanitizeString(str) {
        if (typeof str !== 'string') return '';
        // Remove ASCII control characters (0x00-0x1F) and C1 control characters (0x7F-0x9F)
        // This regex ensures that only printable characters and common Unicode characters remain.
        return str.replace(/[\x00-\x1F\x7F-\x9F]/g, '');
    }

    /**
     * Waits for an element to appear on the page.
     * @param {string} selector - The CSS selector of the element to wait for.
     * @param {number} timeout - Maximum time to wait in milliseconds.
     * @returns {Promise<HTMLElement>} A promise that resolves with the element or rejects if timed out.
     */
    function waitForElement(selector, timeout = 10000) {
        return new Promise((resolve, reject) => {
            const startTime = Date.now();
            const interval = setInterval(() => {
                const element = document.querySelector(selector);
                if (element) {
                    clearInterval(interval);
                    resolve(element);
                } else if (Date.now() - startTime > timeout) {
                    clearInterval(interval);
                    reject(new Error(`Timeout waiting for element: ${selector}`));
                }
            }, 500); // Check every 500ms
        });
    }

    /**
     * Extracts data from all currently visible saved posts.
     */
    function extractVisiblePosts() {
        const postElements = document.querySelectorAll(POST_ITEM_SELECTOR);
        postElements.forEach((postElement, index) => { // Added index parameter
            console.log(`Processing post element at index: ${index}`); // Log the current index
            try {
                // Find the div containing the URN
                const urnDivElement = postElement.querySelector(POST_URN_DIV_SELECTOR);
                let postUrl = null;

                if (urnDivElement) {
                    const urn = urnDivElement.dataset.chameleonResultUrn;
                    if (urn) {
                        // Construct the full LinkedIn URL from the URN
                        postUrl = `https://www.linkedin.com/feed/update/${urn}/`;
                    }
                }

                const authorNameElements = postElement.querySelectorAll(AUTHOR_NAME_SELECTOR);
                let authorNames = Array.from(authorNameElements)
                                        .map(el => el.innerText.split('\n')[0].trim())
                                        .filter(name => name)
                                        .join(';');

                // Sanitize authorNames
                authorNames = sanitizeString(authorNames);

                // Extract the summary content
                const summaryElement = postElement.querySelector(SUMMARY_SELECTOR);
                // Clean up summary text: remove "…ver mais" and trim whitespace
                let summary = summaryElement ? summaryElement.textContent.replace(/…ver mais$/, '').trim() : '';
                // Replace line breaks with spaces for CSV compatibility
                summary = summary.replace(/(\r\n|\n|\r)/gm, " ");
                // Sanitize summary
                summary = sanitizeString(summary);

                // Ensure summary is not empty (after sanitization)
                if(summary.length === 0) {
                    console.warn('%cWarning: Summary is empty for this post.', 'color: orange;', summaryElement);
                } else {
                    // Escape double quotes for CSV only if it's not empty
                    summary = summary.replace(/"/g, '""');
                }

                if (postUrl && authorNames) {
                    if (!extractedPosts.has(postUrl)) {
                        extractedPosts.set(postUrl, { author: authorNames, url: postUrl, summary: summary });
                        console.info(`Extracted: Author(s) - "${authorNames}", URL - "${postUrl}", Summary - "${summary}"`); // Added console.info
                    }
                } else {
                    console.warn('%cError: Could not extract full data for a post.', 'color: yellow;');
                    console.warn('%cPost Element:', 'color: yellow;', postElement);
                    console.warn('%cExtracted urnDivElement: ', 'color: yellow;', urnDivElement);
                    console.warn('%cExtracted URL:', 'color: yellow;', postUrl);
                    console.warn('%cExtracted Author(s):', 'color: yellow;', authorNames);
                    console.warn('%cExtracted Summary:', 'color: yellow;', summary);
                }
            } catch (error) {
                console.warn(`%cError processing a post element: ${error.message}`, 'color: yellow;');
                console.warn('%cProblematic element:', 'color: yellow;', postElement);
            }
        });
    }

    /**
     * Scrolls the page to the bottom.
     */
    function scrollToBottom() {
        window.scrollTo(0, document.body.scrollHeight);
    }

    /**
     * Clicks the "Show more results" button if it exists.
     * @returns {boolean} True if the button was clicked, false otherwise.
     */
    function clickShowMoreButton() {
        const showMoreButton = document.querySelector(SHOW_MORE_BUTTON_SELECTOR);
        if (showMoreButton && !showMoreButton.disabled) {
            showMoreButton.click();
            return true;
        }
        return false;
    }

    /**
     * Handles the scrolling and extraction logic based on the chosen mode.
     * @param {number} scrollMode - 1 for manual pages, 2 for auto until end/timeout.
     * @param {number} numPagesToScroll - Number of pages to scroll for manual mode.
     * @returns {Promise<void>}
     */
    async function handleScrolling(scrollMode, numPagesToScroll) {
        if (scrollMode === 1) { // Manual pages
            for (let i = 0; i < numPagesToScroll; i++) {
                scrollToBottom();
                await new Promise(resolve => setTimeout(resolve, SCROLL_DELAY_MS / 2)); // Short wait for scroll
                const clicked = clickShowMoreButton();
                if (!clicked && i < numPagesToScroll - 1) {
                    console.log('No more "Show more results" button found, stopping manual scroll early.');
                    break;
                }
                await new Promise(resolve => setTimeout(resolve, SCROLL_DELAY_MS)); // Wait for content to load
                extractVisiblePosts();
            }
        } else if (scrollMode === 2) { // Auto until end/timeout
            const startTime = Date.now();
            let noNewContentCount = 0;
            const MAX_NO_NEW_CONTENT_CHECKS = 3; // Stop if no new content for a few checks

            while (Date.now() - startTime < MAX_AUTO_SCROLL_DURATION_MS) {
                const initialHeight = document.body.scrollHeight;
                scrollToBottom();
                await new Promise(resolve => setTimeout(resolve, SCROLL_DELAY_MS)); // Wait for scroll and potential initial content load

                // Check if the "Show more" button exists and is clickable
                const buttonExistsAndClickable = clickShowMoreButton();

                // Wait again for content to load after potential button click
                await new Promise(resolve => setTimeout(resolve, SCROLL_DELAY_MS));
                extractVisiblePosts();

                const newHeight = document.body.scrollHeight;

                if (newHeight === initialHeight && !buttonExistsAndClickable) {
                    // No new content loaded and no button to click, increment counter
                    noNewContentCount++;
                    console.log(`No new content or button. No new content count: ${noNewContentCount}`);
                    if (noNewContentCount >= MAX_NO_NEW_CONTENT_CHECKS) {
                        console.log('Page height not changing and no clickable "Show more results" button. Assuming end of content.');
                        break; // Exit loop if no new content for several checks
                    }
                } else {
                    // Content loaded or button was clicked, reset counter
                    noNewContentCount = 0;
                }
            }
            console.log('Automatic scrolling finished (either reached end or timed out).');
        }
    }

    /**
     * Initiates the extraction process.
     */
    async function startExtraction() {
        if (extractionInProgress) {
            console.log('Extraction already in progress.');
            return;
        }
        extractionInProgress = true;
        extractionButton.textContent = 'Extracting...';
        extractionButton.disabled = true;

        // Get values from UI elements
        const scrollMode = parseInt(scrollModeDropdown.value, 10);
        let numPagesToScroll = 0;
        if (scrollMode === 1) {
            numPagesToScroll = parseInt(numPagesInput.value, 10);
        }
        const outputFormat = outputFormatDropdown.value;

        // Validation based on UI values
        if (isNaN(scrollMode) || (scrollMode !== 1 && scrollMode !== 2)) {
            alert('Invalid scroll mode selected.');
            extractionInProgress = false;
            extractionButton.textContent = 'Extract LinkedIn Posts';
            extractionButton.disabled = false;
            return;
        }

        if (scrollMode === 1 && (isNaN(numPagesToScroll) || numPagesToScroll <= 0)) {
            alert('Invalid number of pages for manual scroll. Please enter a positive number.');
            extractionInProgress = false;
            extractionButton.textContent = 'Extract LinkedIn Posts';
            extractionButton.disabled = false;
            return;
        }

        if (!['1', '2', '3'].includes(outputFormat)) {
            alert('Invalid output format selected.');
            extractionInProgress = false;
            extractionButton.textContent = 'Extract LinkedIn Posts';
            extractionButton.disabled = false;
            return;
        }

        extractedPosts.clear(); // Clear previous extraction
        extractVisiblePosts(); // Extract initially visible posts

        try {
            await handleScrolling(scrollMode, numPagesToScroll);
            extractVisiblePosts(); // Final extraction after all scrolling is done

            const postsArray = Array.from(extractedPosts.values());
            let output = '';

            switch (outputFormat) {
                case '1': // Clipboard (URLs only)
                    output = postsArray.map(post => post.url).join('\n');
                    GM_setClipboard(output);
                    alert(`Copied ${postsArray.length} URLs to clipboard!`);
                    break;
                case '2': // CSV (now comma-separated)
                    // Add Summary header and properly quote summary content
                    output = 'Author(s),Post URL,Summary\n' + postsArray.map(post => {
                        // The summary is already sanitized and double quotes escaped in extractVisiblePosts
                        // So, just wrap it in double quotes for CSV
                        return `${post.author},${post.url},"${post.summary}"`;
                    }).join('\n');
                    openNewTabWithContent('text/csv', output, 'linkedin_saved_posts.csv');
                    alert(`Generated CSV for ${postsArray.length} posts. Check new tab.`);
                    break;
                case '3': // JSON Array
                    output = JSON.stringify(postsArray, null, 2);
                    openNewTabWithContent('application/json', output, 'linkedin_saved_posts.json');
                    alert(`Generated JSON for ${postsArray.length} posts. Check new tab.`);
                    break;
            }

        } catch (error) {
            console.error('An error occurred during extraction:', error);
            alert('An error occurred during extraction. Check console for details.');
        } finally {
            extractionInProgress = false;
            extractionButton.textContent = 'Extract LinkedIn Posts';
            extractionButton.disabled = false;
        }
    }

    /**
     * Opens a new tab with the given content.
     * @param {string} mimeType - The MIME type of the content (e.g., 'text/csv', 'application/json').
     * @param {string} content - The content string.
     * @param {string} filename - The suggested filename for download.
     */
    function openNewTabWithContent(mimeType, content, filename) {
        const blob = new Blob([content], { type: mimeType });
        const url = URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.href = url;
        a.download = filename;
        a.target = '_blank';
        document.body.appendChild(a); // Append to body to make it clickable
        a.click();
        document.body.removeChild(a); // Clean up
        URL.revokeObjectURL(url); // Release the object URL
    }

    // --- UI Setup ---

    /**
     * Adds the extraction button and options to the page.
     */
    function addExtractionButton() {
        // Only add the button if the current URL's pathname matches the target
        if (window.location.pathname === TARGET_PATHNAME) {
            GM_addStyle(`
                .linkedin-extractor-controls {
                    padding: 10px;
                    border-top: 1px solid #e0e0e0;
                    margin-top: 10px;
                    display: flex;
                    flex-direction: column;
                    gap: 10px;
                }
                .linkedin-extractor-controls label {
                    font-size: 14px;
                    font-weight: bold;
                    color: #333;
                    margin-bottom: 5px;
                    display: block;
                }
                .linkedin-extractor-controls select,
                .linkedin-extractor-controls input[type="number"] {
                    width: calc(100% - 20px); /* Adjust width to fit sidebar with padding */
                    padding: 6px 8px;
                    border: 1px solid #ccc;
                    border-radius: 4px;
                    font-size: 14px;
                    box-sizing: border-box; /* Include padding and border in the element's total width and height */
                    position: relative; /* Ensure z-index works */
                    z-index: 10001; /* Higher than other elements */
                    background-color: white; /* Ensure background is white for visibility */
                }
                .linkedin-extractor-button {
                    width: calc(100% - 20px); /* Adjust width to fit sidebar with padding */
                    margin: 10px auto 0; /* Center button and add vertical margin */
                    display: block; /* Make it a block element for margin:auto to work */
                    background-color: #0073b1; /* LinkedIn blue */
                    color: white;
                    padding: 10px 15px;
                    border: none;
                    border-radius: 8px;
                    cursor: pointer;
                    font-size: 16px;
                    z-index: 10000;
                    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
                    transition: background-color 0.3s ease, transform 0.2s ease;
                    font-family: "Inter", sans-serif;
                }
                .linkedin-extractor-button:hover:not(:disabled) {
                    background-color: #005f91;
                    transform: translateY(-2px);
                }
                .linkedin-extractor-button:disabled {
                    background-color: #cccccc;
                    cursor: not-allowed;
                }
            `);

            const sidebarContainer = document.querySelector(SIDEBAR_CONTAINER_SELECTOR);
            if (!sidebarContainer) {
                console.error('Could not find the sidebar container to add the button and controls.');
                return;
            }

            // Create a container for all controls
            const controlsContainer = document.createElement('div');
            controlsContainer.className = 'linkedin-extractor-controls';

            // --- Output Format Dropdown ---
            const outputFormatLabel = document.createElement('label');
            outputFormatLabel.textContent = 'Output Format:';
            outputFormatDropdown = document.createElement('select');
            outputFormatDropdown.id = 'linkedin-output-format';

            // Create and append options for output format
            const outputOptionClipboard = document.createElement('option');
            outputOptionClipboard.value = '1';
            outputOptionClipboard.textContent = 'Clipboard (URLs only)';
            outputFormatDropdown.appendChild(outputOptionClipboard);

            const outputOptionCSV = document.createElement('option');
            outputOptionCSV.value = '2';
            outputOptionCSV.textContent = 'CSV (Author,URL,Summary)'; // Updated text for CSV option
            outputFormatDropdown.appendChild(outputOptionCSV);

            const outputOptionJSON = document.createElement('option');
            outputOptionJSON.value = '3';
            outputOptionJSON.textContent = 'JSON Array';
            outputFormatDropdown.appendChild(outputOptionJSON);

            controlsContainer.appendChild(outputFormatLabel);
            controlsContainer.appendChild(outputFormatDropdown);

            // --- Scroll Mode Dropdown ---
            const scrollModeLabel = document.createElement('label');
            scrollModeLabel.textContent = 'Scroll Mode:';
            scrollModeDropdown = document.createElement('select');
            scrollModeDropdown.id = 'linkedin-scroll-mode';

            // Create and append options for scroll mode
            const optionAuto = document.createElement('option');
            optionAuto.value = '2';
            optionAuto.textContent = 'Automatic (until end or 5 min)';
            scrollModeDropdown.appendChild(optionAuto);

            const optionManual = document.createElement('option');
            optionManual.value = '1';
            optionManual.textContent = 'Manual (specify pages)';
            scrollModeDropdown.appendChild(optionManual);

            controlsContainer.appendChild(scrollModeLabel);
            controlsContainer.appendChild(scrollModeDropdown);

            // --- Number of Pages Input (initially hidden) ---
            numPagesLabel = document.createElement('label'); // Assign to global variable
            numPagesLabel.textContent = 'Number of Pages:';
            numPagesInput = document.createElement('input'); // Assign to global variable
            numPagesInput.type = 'number';
            numPagesInput.id = 'linkedin-num-pages';
            numPagesInput.min = '1';
            numPagesInput.value = '5'; // Default value
            // Initially hide both label and input
            numPagesLabel.style.display = 'none';
            numPagesInput.style.display = 'none';
            controlsContainer.appendChild(numPagesLabel);
            controlsContainer.appendChild(numPagesInput);

            // Toggle visibility of numPagesInput and numPagesLabel based on scrollModeDropdown selection
            scrollModeDropdown.addEventListener('change', () => {
                console.log('Scroll Mode changed to:', scrollModeDropdown.value); // Log change
                if (scrollModeDropdown.value === '1') {
                    numPagesLabel.style.display = 'block';
                    numPagesInput.style.display = 'block';
                } else {
                    numPagesLabel.style.display = 'none';
                    numPagesInput.style.display = 'none';
                }
            });

            // --- Extraction Button ---
            extractionButton = document.createElement('button');
            extractionButton.className = 'linkedin-extractor-button';
            extractionButton.textContent = 'Extract LinkedIn Posts';
            extractionButton.addEventListener('click', startExtraction);
            controlsContainer.appendChild(extractionButton);

            // Append the entire controls container to the sidebar
            sidebarContainer.appendChild(controlsContainer);
            console.log('Extraction button and controls added to the sidebar.');

        } else {
            console.log(`Current path "${window.location.pathname}" does not match target path "${TARGET_PATHNAME}". Button and controls not added.`);
        }
    }

    // --- Main Execution ---
    waitForElement(PAGE_LOAD_SELECTOR)
        .then(() => {
            console.log('LinkedIn saved posts page loaded. Attempting to add extraction button and controls.');
            addExtractionButton();
        })
        .catch(error => {
            console.error('Failed to load LinkedIn saved posts page or element not found:', error);
        });

})();