Discussions » Creation Requests
Extract links from frame
You'll notice that this removes the h in http/https. That is because firefox's console log and browser log truncate URLs' text when copied and/or saved as a text file, replacing the middle portion of the substring text with ellipsis, which invalidates the URL.
Nevermind. Use window.frames:
(function() { 'use strict'; const all = window.allLink = new Set(); function getLink(PageDocument) { Array.from(PageDocument.getElementsByTagName('a')).forEach(link=>{ //"a href" links let URLString = FormatURL(link.href) if(!all.has(URLString[0])&&URLString[1]) { all.add(URLString[0]); console.log((URLString[0]).replace(/^http/, "ttp").replace(/#.*$/, "")); } }); Array.from(PageDocument.getElementsByTagName('img')).forEach(link=>{ //Images let URLString = FormatURL(link.src) if(!all.has(URLString[0])&&URLString[1]) { all.add(URLString[0]); console.log((URLString[0]).replace(/^http/, "ttp").replace(/#.*$/, "")); } }); Array.from(PageDocument.getElementsByTagName('*')).forEach(link=>{ //Background images let URLString = FormatURL(link.style.backgroundImage.slice(5, -2)) if(!all.has(URLString[0])&&URLString[1]) { all.add(URLString[0]); console.log((URLString[0]).replace(/^http/, "ttp")); } }); } function FormatURL(String) { let IsStringValid = true if ((/^\s*javascript:.*$/.test(String))||String=="none"||String=="") { IsStringValid = false } if (IsStringValid) { if (/^\/+/.test(String)) { String = String.replace(/^\/+/, "https://") } else if (/^(?!http(s)?:\/\/)/.test(String)) { String = String.replace(/^/, "https://") } } return [String, IsStringValid] }; //Code that executes when the MAIN WINDOW loads the page //Please note that this does not reflect the loading of subwindows when you open links in a way that does not reload the main window //Since this executes ONCE when the main window loads. window.addEventListener('load',getLink.bind(null, document)); //Get links on the main window when page finishes loading window.addEventListener('load', (event) => { let CurrentDocument = document window.addEventListener('scroll',getLink.bind(null, CurrentDocument)); //Get links on the main window when scrolling (when page loads as you scroll; infinute scroll) if (window.frames.length) { //Loop through every window and extract their links too (NOTE: will not extract recursively) for (let i=0;i
However, is this safe? As in, I wrote this, and I am worried that there may be a vulnerability in there. Can someone vet this?
You whole code isn't there but looks good.
No one would really try to hack your script haha, specially if you don't publish it
Testing site: http://konjo.sub.jp/o-kami/img/main/gij/girl/00.htm
This code only extracts links from a portion of what is seen on the page (This is the edited version of the twitter link extractor, to extract links from general sites):
If you inspect element, you'll notice the page uses a <frameset> tag and actually loads multiple HTML files together. So can someone make a code based on this but also extract links not just the main HTML file but also all and I mean ALL the other HTML files that were loaded?