Wayback Machine Small Bug Fixes

Fixes encoded ampersands on Wayback Machine's captures graph and problems that arise when trailing slashes are missing in an URL and other small issues universally present in all crawled sites

12.06.2016 itibariyledir. En son verisyonu görün.

Bu betiği kurabilmeniz için Tampermonkey, Greasemonkey ya da Violentmonkey gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

Bu betiği kurabilmeniz için Tampermonkey ya da Violentmonkey gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

Bu betiği kurabilmeniz için Tampermonkey ya da Violentmonkey gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

Bu betiği kurabilmeniz için Tampermonkey ya da Userscripts gibi bir kullanıcı betiği eklentisini kurmanız gerekmektedir.

You will need to install an extension such as Tampermonkey to install this script.

Bu komut dosyasını yüklemek için bir kullanıcı komut dosyası yöneticisi uzantısı yüklemeniz gerekecek.

(Zaten bir kullanıcı komut dosyası yöneticim var, kurmama izin verin!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(Zateb bir user-style yöneticim var, yükleyeyim!)

// ==UserScript==
// @name          Wayback Machine Small Bug Fixes
// @namespace     DoomTay
// @description   Fixes encoded ampersands on Wayback Machine's captures graph and problems that arise when trailing slashes are missing in an URL and other small issues universally present in all crawled sites
// @version       1.3.2
// @include       http://web.archive.org/web/*
// @include       http://wayback.archive.org/web/*
// @include       https://web.archive.org/web/*
// @include       https://wayback.archive.org/web/*
// @run-at        document-start
// @exclude       /\*/
// @grant         none

// ==/UserScript==

var lastFolder = window.location.href.substring(window.location.href.lastIndexOf("/") + 1);
var pics = document.images;
var backgrounds = document.querySelectorAll("[background]");
var shouldHaveTrailingSlash = (window.location.href.lastIndexOf(".") < window.location.href.lastIndexOf("/") || window.location.href.substring(window.location.href.lastIndexOf("//") + 2) == lastFolder) && !lastFolder.includes("?");
var hasTrailingSlash = window.location.href.endsWith("/");
var domain = window.location.href.substring(0,window.location.href.indexOf("/",window.location.href.lastIndexOf("//") + 2));
var isInDomain = window.location.href == domain || window.location.href == domain + "/";
var timestamp = /web\/(\d{1,14})/.exec(window.location.href)[1];

if(!document.getElementsByTagName("base")[0])
{
	var base = document.createElement("base");
	if(shouldHaveTrailingSlash && !hasTrailingSlash) base.href = window.location.href + "/";
	else if((!hasTrailingSlash && !shouldHaveTrailingSlash) || hasTrailingSlash) base.href = document.baseURI;
	else base.href = domain + "/";
	document.head.appendChild(base);
}

function relativeToAbsolute(bgURL)
{
	var img = new Image();
	img.src = bgURL;
	return img.src;
}

function fixURL(URL)
{
	var fixedURL;
	if(URL.includes("archive.org")) fixedURL = URL.substring(URL.indexOf(document.domain) + document.domain.length);
	if(URL.startsWith("/")&& !URL.startsWith("/web/")) fixedURL = domain + URL;
	else if (URL.startsWith("../") && isInDomain) fixedURL = URL.substring(3);
	else fixedURL = domain.substring(0,domain.indexOf("/http") + 1) + URL.substring(URL.indexOf("/http") + 1);
	return fixedURL;
}

function fixImage(pic)
{
	var oldSrc = pic.getAttribute("src");
	if(!oldSrc) return;
	pic.setAttribute("src",fixURL(oldSrc));
}

function fixBackground(node)
{
	if(node.background) node.background = fixURL(node.background);
	else if(node.getAttribute("background")) node.setAttribute("background",fixURL(node.getAttribute("background")));
}

var observer = new MutationObserver(function(mutations) {
	mutations.forEach(function(mutation) {
		if(mutation.target.nodeName != "HEAD" && mutation.target.nodeName != "BODY") checkMutations(mutation.target);
		for(var i = 0; i < mutation.addedNodes.length; i++) 
		{
			if(mutation.addedNodes[i].nodeName == "BASE")
			{
				if(base) document.head.removeChild(base);
				Array.prototype.forEach.call(pics, function(pic) {
					pic.src = pic.src;
				});
				Array.prototype.forEach.call(backgrounds, function(background) {
					background.setAttribute("background",background.getAttribute("background"));
				});
			}
			else if(mutation.addedNodes[i].nodeName == "SCRIPT" && mutation.addedNodes[i].innerHTML.includes("__wm.bt()"))
			{
				//As far as we're concerned, it's guaranteed that the script we're trying to find here is immediately two nodes before this one, but I'm not sure how else to go about finding it 
				var toolbarScript = Array.prototype.find.call(document.scripts,node => node.innerHTML.includes("wbCurrentUrl") && node.innerHTML.includes("&amp;"));
				if(toolbarScript)
				{
					var replacement = document.createElement("script");
					replacement.type = toolbarScript.type;
					var currentURL = toolbarScript.innerHTML.match(/wbCurrentUrl = "(.+)";/)[1];
					replacement.innerHTML = toolbarScript.innerHTML.replace(currentURL,decodeHTML(currentURL));
					toolbarScript.parentNode.replaceChild(replacement,toolbarScript);
				}
			}
			else
			{
				checkMutations(mutation.addedNodes[i]);
				if(mutation.addedNodes[i].nodeType == 1)
				{
					var children = mutation.addedNodes[i].getElementsByTagName("*");
					for(var c = 0; c < children.length; c++)
					{
						checkMutations(children[c]);
					}
				}
			}
		}
	});
});

function decodeHTML(text) {
    var textarea = document.createElement("textarea");
    textarea.innerHTML = text;
    return textarea.value;
}

function checkMutations(node)
{
	if(document.getElementById("wm-ipp") && document.getElementById("wm-ipp").contains(node)) return;
	if(node.nodeType == 1)
	{
		if(node.nodeName == "IMG" && !node.src.includes("/web/") && !node.src.includes("data:"))
		{
			observer.disconnect();
			fixImage(node);
			observer.observe(document.body || document, config);
		}
		var bg = node.background || node.getAttribute("background");
		if(bg && !relativeToAbsolute(bg).includes(document.domain + "/web"))
		{
			observer.disconnect();
			fixBackground(node);
			observer.observe(document.body || document, config);
		}
		if(node.nodeName == "A" || node.nodeName == "AREA")
		{
			if(node.href.includes("/http://web.archive.org/web/")) node.href = node.href.substring(node.href.indexOf("/http://web.archive.org/web/") + 1);
			else if(node.getAttribute("href").startsWith("/")) node.setAttribute("href",fixURL(node.getAttribute("href")));
		}
	}
}

var config = { attributes: true, childList: true, subtree: true };
observer.observe(document.body || document, config);