Greasy Fork is available in English.

Discussions » Creation Requests

Extract links from frame

§
Posted: 2022-08-02

Testing site: http://konjo.sub.jp/o-kami/img/main/gij/girl/00.htm
This code only extracts links from a portion of what is seen on the page (This is the edited version of the twitter link extractor, to extract links from general sites):

// ==UserScript==
// @name         Extract any links from any site
// @namespace    any site
// @version      0.2
// @description  try to take over the world!
// @include      *
// @exclude      https://twitter.com/*
// @exclude      https://www.smwcentral.net/*
// @exclude      https://github.com/*
// @grant        none
// ==/UserScript==


(function() {
    'use strict';
    const all = window.allLink = new Set();
    
    function getLink() {
        Array.from(document.querySelectorAll('[href]')).forEach(link=>{ //"a href" links
            if(!all.has(link.href)) {
                all.add(link.href);
                console.log((link.href).replace(/^http/, "ttp").replace(/#.*$/, ""));
            }
        });
        Array.from(document.querySelectorAll('[src]')).forEach(link=>{ //Image
            if(!all.has(link.src)) {
                all.add(link.src);
                console.log((link.src).replace(/^http/, "ttp").replace(/#.*$/, ""));
            }
        });
        Array.from(document.querySelectorAll('[style]')).forEach(link=>{ //Background images
            if(!all.has(link.style)) {
                if ((link.style.backgroundImage).replace(/url\((\"|\')/, "").replace(/(\"|\')\)/, "").replace(/^http/, "ttp") != "") {
                    all.add(link.style);
                    console.log((link.style.backgroundImage).replace(/url\((\"|\')/, "").replace(/(\"|\')\)/, "").replace(/^http/, "ttp"));
                }
            }
        });
    }
    getLink();
    window.addEventListener('scroll',getLink);
})();

If you inspect element, you'll notice the page uses a <frameset> tag and actually loads multiple HTML files together. So can someone make a code based on this but also extract links not just the main HTML file but also all and I mean ALL the other HTML files that were loaded?

§
Posted: 2022-08-02
Edited: 2022-08-02

You'll notice that this removes the h in http/https. That is because firefox's console log and browser log truncate URLs' text when copied and/or saved as a text file, replacing the middle portion of the substring text with ellipsis, which invalidates the URL.

§
Posted: 2022-08-13

Nevermind. Use window.frames:

(function() {
	'use strict';
	const all = window.allLink = new Set();
	function getLink(PageDocument) {
		Array.from(PageDocument.getElementsByTagName('a')).forEach(link=>{ //"a href" links
			let URLString = FormatURL(link.href)
			if(!all.has(URLString[0])&&URLString[1]) {
				all.add(URLString[0]);
				console.log((URLString[0]).replace(/^http/, "ttp").replace(/#.*$/, ""));
			}
		});
		Array.from(PageDocument.getElementsByTagName('img')).forEach(link=>{ //Images
			let URLString = FormatURL(link.src)
			if(!all.has(URLString[0])&&URLString[1]) {
				all.add(URLString[0]);
				console.log((URLString[0]).replace(/^http/, "ttp").replace(/#.*$/, ""));
			}
		});
		Array.from(PageDocument.getElementsByTagName('*')).forEach(link=>{ //Background images
			let URLString = FormatURL(link.style.backgroundImage.slice(5, -2))
			if(!all.has(URLString[0])&&URLString[1]) {
				all.add(URLString[0]);
				console.log((URLString[0]).replace(/^http/, "ttp"));
			}
		});
	}
	
	function FormatURL(String) {
		let IsStringValid = true
		if ((/^\s*javascript:.*$/.test(String))||String=="none"||String=="") {
			IsStringValid = false
		}
		if (IsStringValid) {
			if (/^\/+/.test(String)) {
				String = String.replace(/^\/+/, "https://")
			} else if (/^(?!http(s)?:\/\/)/.test(String)) {
				String = String.replace(/^/, "https://")
			}
		}
		return [String, IsStringValid]
	};
	
	//Code that executes when the MAIN WINDOW loads the page
	//Please note that this does not reflect the loading of subwindows when you open links in a way that does not reload the main window
	//Since this executes ONCE when the main window loads.

	window.addEventListener('load',getLink.bind(null, document)); //Get links on the main window when page finishes loading
	window.addEventListener('load', (event) => {
		let CurrentDocument = document
		window.addEventListener('scroll',getLink.bind(null, CurrentDocument)); //Get links on the main window when scrolling (when page loads as you scroll; infinute scroll)
		if (window.frames.length) { //Loop through every window and extract their links too (NOTE: will not extract recursively)
			for (let i=0;i
§
Posted: 2022-08-13

However, is this safe? As in, I wrote this, and I am worried that there may be a vulnerability in there. Can someone vet this?

§
Posted: 2022-08-18

You whole code isn't there but looks good.

No one would really try to hack your script haha, specially if you don't publish it

Post reply

Sign in to post a reply.