General URL Cleaner

Cleans URL's from various popular sites. Also, makes sure the sites are using HTTPS.

As of 2015-07-02. See the latest version.

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @run-at document-start
// @name           General URL Cleaner
// @namespace      
// @description    Cleans URL's from various popular sites. Also, makes sure the sites are using HTTPS.
// @include        /^https?://[a-z]*.google.(com|ca|co.uk|com.au|co.nz|ac|ad|ae|com.af|com.ag|com.ai|al|am|co.ao|com.ar|as)\/.*$/
// @include        /^https?://[a-z]*.google.(us|at|az|ba|com.bd|be|bf|bg|com.bh|bi|bj|com.bn|com.bo|com.br|bs|bt|co.bw|by)\/.*$/
// @include        /^https?://[a-z]*.google.(com.bz|com.kh|cc|cd|cf|cat|cg|ch|ci|co.ck|cl|cm|cn|.cn|com.co|co.cr|com.cu)\/.*$/
// @include        /^https?://[a-z]*.google.(cv|com.cy|cz|de|dj|dk|dm|com.do|dz|com.ec|ee|com.eg|es|com.et|fi|com.fj|fm|fr)\/.*$/
// @include        /^https?://[a-z]*.google.(ga|ge|gf|gg|com.gh|com.gi|gl|gm|gp|gr|com.gt|gy|com.hk|hn|hr|ht|hu|co.id|ir)\/.*$/
// @include        /^https?://[a-z]*.google.(iq|ie|co.il|im|co.in|io|is|it|je|com.jm|jo|co.jp|co.ke|ki|kg|co.kr|com.kw|kz)\/.*$/
// @include        /^https?://[a-z]*.google.(la|com.lb|com.lc|li|lk|co.ls|lt|lu|lv|com.ly|co.ma|md|me|mg|mk|ml|com.mm|mn)\/.*$/
// @include        /^https?://[a-z]*.google.(ms|com.mt|mu|mv|mw|com.mx|com.my|co.mz|com.na|ne|com.nf|com.ng|com.ni|nl|no)\/.*$/
// @include        /^https?://[a-z]*.google.(com.np|nr|nu|com.om|com.pa|com.pe|com.ph|com.pk|pl|com.pg|pn|com.pr|ps|com.py)\/.*$/
// @include        /^https?://[a-z]*.google.(vg|pt|com.qa|ro|rs|ru|rw|com.sa|com.sb|sc|se|com.sg|sh|si|sk|com.sl|sn|sm|so)\/.*$/
// @include        /^https?://[a-z]*.google.(st|com.sv|td|tg|co.th|com.tj|tk|tl|tm|to|tn|com.tn|com.tr|tt|com.tw|co.tz)\/.*$/
// @include        /^https?://[a-z]*.google.(com.ua|co.ug|com.uy|co.uz|com.vc|co.ve|co.vi|com.vn|vu|ws|co.za|co.zm|co.zw)\/.*$/
// @include        /^https?://[a-z]*.amazon.(cn|in|co.jp|fr|de|it|nl|es|co.uk|ca|com.mx|com|com.au|com.br)\/.*$/
// @include        /^https?://[a-z]*.newegg.(com|ca|cn)\/.*$/
// @include        /^https?://[a-z]*.ebay.(com.au|at|be|ca|fr|de|com.hk|in|ie|co.il|it|com.my|nl|co.za|ph|pl|com.sg|co.za|es|ch|co.th|co.uk|com|vn)\/.*$/
// @include        /^https?://[a-z]*.bing.com\/.*$/
// @include        /^https?://[a-z]*.youtube.com\/.*$/
// @include        /^https?://[a-z]*.dealtime.com\/.*$/
// @exclude        https://apis.google.com/*
// @exclude        https://www.google.com/recaptcha/api2/*
// @version        1.9.0.10
// @license        GPL version 3 or any later version; http://www.gnu.org/copyleft/gpl.html
// ==/UserScript==

// compile these regexes beforehand to improve efficiency
var bing = new RegExp(/^https?:\/\/www\.bing\.(.+?)\/search\?/);
var google = new RegExp(/^https?:\/\/[a-z]*\.google\.(.+?)\/[a-z]*\?/);
var googleImageRedirect = new RegExp(/^https?:\/\/www\.google\.(.+?)\/url\?/);
var youtube = new RegExp(/^https?:\/\/www\.youtube\.com\/watch/);
var ebay = new RegExp(/^https?:\/\/www\.ebay\.(.+?)\/itm/);
var ebaySearch = new RegExp(/^https?:\/\/www\.ebay\.(.+?)\/sch\//);
var amazon = new RegExp(/^https?:\/\/www\.amazon\..*\/dp\//);
var newegg = new RegExp(/^http:\/\/www\.newegg\.(com|ca)\/Product\/Product\.aspx/);
var dealtime = new RegExp(/http:\/\/stat\.dealtime\.com\/DealFrame\/DealFrame\.cmp\?/);

// Clean the current page URL
var newPageUrl = cleanUrl(document.URL);
if (newPageUrl != document.URL) location.replace(newPageUrl);

// Cleans links on the page
var links = document.links;
var excludeLinks = new RegExp(/(^$|^javascript\:|^mailto\:|^data\:)/); // don't do anything with links that are blank, javascript, email addresses, data

if (google.test(newPageUrl)) {
    document.addEventListener("DOMContentLoaded", cleanGooglePageLinks, false);
    window.onhashchange = googleInstant;
}
else {
    document.addEventListener("DOMContentLoaded", cleanPageLinks, false);
}

// Standard link cleaning function
function cleanPageLinks() {
    for (var i = links.length; i--;) {
        if (excludeLinks.test(links[i].href)) continue; // Links to skip
        links[i].href = cleanUrl(links[i].href); // Standard link cleaning
    }
    this.removeEventListener('DOMContentLoaded', cleanPageLinks, false); // We don't need to keep the event listener running
}

// Google search results link cleaning function
function cleanGooglePageLinks() {
    for (var i = links.length; i--;) {
        if (excludeLinks.test(links[i].href)) continue; // Links to skip
        links[i].removeAttribute('onmousedown'); // Remove search results redirection
        links[i].href = cleanUrl(links[i].href); // Standard link cleaning
    }
    this.removeEventListener('DOMContentLoaded', cleanGooglePageLinks, false); // We don't need to keep event listener running
}

// Google Instant document URL cleaning - if the search terms change, remove the extra stuff.
function googleInstant() {
    if (!document.URL.includes('#imgrc=')) {                                                        // Don't rewrite anything if an image is clicked in image searches
        var newSearchString = String(document.URL.match(/\#.*/)).replace(/^\#/,'');                 // The string after the hash, containing the new search terms
        var newSearchUrl = String(document.URL.replace(/search\?.*/, 'search?' + newSearchString)); // Remake the full URL with only the new search terms
        location.replace(newSearchUrl);
    }
}

// Main function for cleaning the url's
function cleanUrl(oldurl) {
    var newurl = oldurl;
    switch(true) {
        case googleImageRedirect.test(oldurl):
            newurl = decodeURIComponent(oldurl.replace(/^.*\&url\=/,'').replace(/\&psig\=.*$/,''));
            break;
        case google.test(oldurl):
            newurl = oldurl.replace('?','?&') // temporarily put an "&" after the "?" so that the regex below will always match
                           .replace(/\&(aqs|es_sm|channel|tab|num|hl|safe|tbo|sclient|sourceid|spell|client|complete|as_qdr|um|sa|tab|authuser|rlz|cad|rct|ved|usg|site|source|oe|oq|sa|ei|ie|dpr|gs\_l|ved|tbas|sei|biw|bih|gpsrc|gfe_rd|gws_rd)\=[^&]*/g,'')
                           .replace('?&','?')
                           .replace(/^http\:/,'https:'); // always use https
            break;
        case bing.test(oldurl):
            newurl = oldurl.replace('?','?&')
                           .replace(/\&(go|qs|form|FORM|filt|pq|sc|sp|sk|qpvt)\=[^&]*/g,'')
                           .replace('?&','?')
                           .replace(/^http\:/,'https:');
            break;
        case youtube.test(oldurl):
            newurl = 'https://www.youtube.com/watch?' + oldurl.match(/v\=[^&]*/);
            break;
        case ebay.test(oldurl):
            newurl = 'http://' + oldurl.split('/')[2] + '/itm' + oldurl.match(/\/[0-9]{11,13}[^#?&\/]/); // the split gets the domain name. Should be more efficient than a regex.
            break;
        case ebaySearch.test(oldurl):
            newurl = oldurl.replace('?','?&') // temporarily put an "&" after the "?" so that the regex below will always match
                           .replace(/\&(\_osacat|\_odkw|\_from|rt|\_trksid|\_sacat)\=[^&]*/g,'')
                           .replace('?&','?');
            break;
        case amazon.test(oldurl):
            newurl = 'https://' + oldurl.split('/')[2] + oldurl.match(/\/dp\/[A-Z0-9]{10}/);
            break;
        case newegg.test(oldurl):
            newurl = 'http://' + oldurl.split('/')[2] + oldurl.match(/\/Product\/Product\.aspx\?Item\=[^&]*/);
            break;
        case dealtime.test(oldurl):
            newurl = decodeURIComponent(oldurl.replace(/.*\&url\=/,'').replace(/(\%26|)\&linkin_id\=.*$/,'')).replace(/\&(url|partner)\=[^&]*/g,'');
            break;
        default:
            break;
    }
    newurl = newurl.replace(/((\?|\&|)utm_(source|medium|campaign)\=[^&]*|\&amp\;)/g,'');
    return newurl;
}