MagicScraper

Scrapes and displays data from the web page based on rules.

Bu script direkt olarak kurulamaz. Başka scriptler için bir kütüphanedir ve meta yönergeleri içerir // @require https://update.greasyfork.org/scripts/471264/1222840/MagicScraper.js

Bu betiği kurabilmeniz için Tampermonkey, Greasemonkey ya da Violentmonkey gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

You will need to install an extension such as Tampermonkey to install this script.

Bu betiği kurabilmeniz için Tampermonkey ya da Violentmonkey gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

Bu betiği kurabilmeniz için Tampermonkey ya da Userscripts gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

You will need to install an extension such as Tampermonkey to install this script.

Bu komut dosyasını yüklemek için bir kullanıcı komut dosyası yöneticisi uzantısı yüklemeniz gerekecek.

(Zaten bir kullanıcı komut dosyası yöneticim var, kurmama izin verin!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(Zateb bir user-style yöneticim var, yükleyeyim!)

// ==UserScript==
// @name         MagicScraper
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  Scrapes and displays data from the web page based on rules.
// @author       aolko
// @match        *://*/*
// @grant        GM_addStyle
// ==/UserScript==

function magicScraper(rules, options = {}) {
    
  let scrapedData = {}; // Variable to store the scraped data
    
  function createDOMFromScrapedData(data, element, keepChildren) {
    for (const key in data) {
      if (Array.isArray(data[key])) {
        data[key].forEach(item => {
          const newElement = document.createElement(element.tagName);
          newElement.innerHTML = item;

          if (keepChildren && element.children.length > 0) {
            Array.from(element.children).forEach(child => {
              newElement.appendChild(child);
            });
          }

          element.appendChild(newElement);
        });
      } else if (typeof data[key] === 'string') {
        const newElement = document.createElement(element.tagName);
        newElement.innerHTML = data[key];

        if (keepChildren && element.children.length > 0) {
          Array.from(element.children).forEach(child => {
            newElement.appendChild(child);
          });
        }

        element.appendChild(newElement);
      } else if (typeof data[key] === 'object') {
        const newElement = document.createElement(element.tagName);
        element.appendChild(newElement);
        createDOMFromScrapedData(data[key], newElement, keepChildren);
      }
    }
  }

  function matchPageOrDomain(pattern, current) {
    const escapedPattern = pattern.replace(/\./g, '\\.');
    const regex = new RegExp(`^${escapedPattern.replace('*', '.*')}$`, 'i');
    return regex.test(current);
  }

  function scrapeDataByRules(rulesObj, currentDomain, currentPage) {
    const domainKeys = Object.keys(rulesObj);
    let domainData = {};
    let pageData = {};

    for (const domainPattern of domainKeys) {
      if (matchPageOrDomain(domainPattern, currentDomain)) {
        domainData = rulesObj[domainPattern];
        break;
      }
    }

    if (currentPage && domainData.pages) {
      pageData = domainData.pages[currentPage] || {};
    }

    return Object.assign({}, domainData, pageData);
  }

  function loadExternalRules(externalRulesURL, currentDomain, currentPage, callback) {
    fetch(externalRulesURL)
      .then(response => response.json())
      .then(data => {
        const rulesObj = data.rules || {};
        const scrapedData = scrapeDataByRules(rulesObj, currentDomain, currentPage);
        callback(scrapedData);
      })
      .catch(err => {
        console.error('Error loading external rules:', err);
        callback({});
      });
  }

  function runScraping() {
    const currentDomain = window.location.hostname;
    const currentPage = window.location.pathname;

    let pageRules;

    if (typeof rules === 'string') {
      // Load external rules if the rules parameter is a URL string
      loadExternalRules(rules, currentDomain, currentPage, scrapedData => {
        pageRules = scrapedData;
        handleRules(pageRules);
      });
    } else {
      pageRules = scrapeDataByRules(rules, currentDomain, currentPage);
      handleRules(pageRules);
    }
  }

  function handleRules(pageRules) {
    if (Object.keys(pageRules).length === 0) {
      console.warn('No rules found for the current domain and page.');
      return;
    }

    const fragment = document.createDocumentFragment();
    const temporaryData = {};

    // Helper function to process nested rules
    function processNestedRules(rules, currentElement) {
      for (const key in rules) {
        const selector = rules[key];

        if (typeof selector === 'string') {
          // Handle single selector
          const elements = currentElement.querySelectorAll(selector);
          temporaryData[key] = Array.from(elements).map(element => {
            return {
              text: element.textContent,
              html: element.innerHTML,
            };
          });
        } else if (typeof selector === 'object') {
          // Handle nested rules recursively
          temporaryData[key] = [];
          const nestedElements = currentElement.querySelectorAll(key);
          nestedElements.forEach(nestedElement => {
            temporaryData[key].push({});
            processNestedRules(selector, nestedElement);
          });
        }
      }
    }

    // Process the top-level rules
    processNestedRules(pageRules, document);

    createDOMFromScrapedData(temporaryData, fragment, options.keepChildren);

    if (options.replaceBody) {
      document.body.innerHTML = '';
      document.body.appendChild(fragment);
    }

    // Store the scraped data in the variable
    scrapedData = temporaryData; // Update the correct variable with the scraped data
  }
  
  runScraping();
  
  // Expose the scraped data object for further use
  return scrapedData;
}