OverDrive Transcriber

Transcribes books you read on OverDrive for offline reading

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name OverDrive Transcriber
// @description Transcribes books you read on OverDrive for offline reading
// @namespace Violentmonkey Scripts
// @match *://*.overdrive.com/*
// @match *://*.greasyfork.org/*
// @grant GM_setValue
// @grant GM_getValue
// @grant GM_listValues
// @grant GM_deleteValue
// @grant GM_xmlhttpRequest
// @run-at document-start
// @version 0.1.3
// @author qsniyg
// ==/UserScript==

(function() {
    var content_html_regex = /\.x?html?\?cmpt=/;
    var content_html_match = /^(?:.*\/)?([^/.]*)\.x?html?/;
    
    var default_options = {
        images: {
            name: "Include images",
            default: true
        }
    };
    
    var options = {};
    
    var img_requests = 0;
    
    for (var option in default_options) {
        var value = GM_getValue("SETTINGS:" + option);
        if (value !== undefined)
            options[option] = JSON.parse(value);
        else
            options[option] = default_options[option].default;
    }
    
    function onload(f) {
        if (document.readyState === "interactive" || document.readyState === "complete") {
            f();
        } else {
            document.addEventListener("DOMContentLoaded", f, false);
        }
    }
    
    function makebutton(el, bg) {
        el.style.padding = ".5em 1em";
        el.style.background = bg;
        el.style.color = "white";
        el.style.textDecoration = "none";
        el.style.display = "inline-block";
        el.style.margin = ".3em .5em";
    }
    
    var fullurl = function(url, x) {
        return urljoin(url, x);
    };
    
    function urljoin(a, b) {
        var protocol_split = a.split("://");
        var protocol = protocol_split[0];
        var splitted = protocol_split[1].split("/");
        var domain = splitted[0];
        var start = protocol + "://" + domain;

        if (b.length === 0)
            return a;
        if (b.match(/[a-z]*:\/\//))
            return b;
        if (b.length >= 2 && b.slice(0, 2) === "//")
            return protocol + ":" + b;
        if (b.length >= 1 && b.slice(0, 1) === "/")
            return start + b;
        
        if (a.match(/\/$/))
            return a + b.replace(/^\/*/, "");
        else
            return a.replace(/\/[^/]*$/, "/") + b.replace(/^\/*/, "");
    }

    
    // OverDrive section
    function overdrive() {
        var transcribe_btn;
        var transcribed = false;
        
        function parse(url, content) {
            var html = document.createElement("html");
            html.innerHTML = content;

            var url_match = url.match(content_html_match);
            if (!url_match)
                // shouldn't happen
                return;
            //var book_id = url_match[2];
            var book_id = bData["-odread-buid"];
            var var_id = url_match[1];

            var titleel = html.getElementsByTagName("title")[0];
            if (!titleel)
                return;

            var title = titleel.innerHTML;
            /*console.log(book_id);
            console.log(var_id);
            console.log(title);*/

            GM_setValue("TITLE:" + book_id, title.toString());
            GM_setValue("INFO:" + book_id, JSON.stringify(unsafeWindow.bData));

            var setcontents = function(contents) {
                GM_setValue("CONTENTS:" + book_id + ":" + var_id, contents);
                _contents = contents;
            }

            var scripts = html.getElementsByTagName("script");
            var regex = /^ *parent\.[^;(]*\(.*?['"](.*?)['"]/;
            var set = false;
            var _contents;
            for (var i = 0; i < scripts.length; i++) {
                var matchobj = scripts[i].innerHTML.match(regex);
                if (matchobj) {
                    var text = unsafeWindow.atob(matchobj[1]);
                    setcontents(text);
                    set = true;
                }
            }

            if (!set) {
                if (html.querySelectorAll("body > p").length >= 0) {
                    var body = html.getElementsByTagName("body")[0].cloneNode(true);
                    body.removeAttribute("xmlns");
                    body.removeAttribute("onload");
                    setcontents(body.outerHTML);
                }
            }
            
            if (options.images) {
                var doc = document.implementation.createHTMLDocument("preview");
                var newhtml = doc.createElement("html");
                newhtml.innerHTML = _contents;
                /*var parser = new DOMParser();
                var newhtml = parser.parseFromString(_contents, "text/xml");*/
                var images = newhtml.getElementsByTagName("img");
                for (var i = 0; i < images.length; i++) {
                    console.log(images[i]);
                    img_requests++;
                    
                    (function(src) {
                        var full_url = fullurl(document.location.href, url);
                        var newsrc = fullurl(full_url, src);
                        new GM_xmlhttpRequest({
                            method: 'GET',
                            url: newsrc,
                            overrideMimeType: 'text/plain; charset=x-user-defined',
                            headers: {
                                "Origin": document.location.href.replace(/^([a-z]+:\/\/[^/]*).*?$/, "$1"),
                                "Referer": full_url
                            },
                            onload: function (resp) {
                                if (resp.status !== 200 && resp.status !== 304) {
                                    console.dir(resp);
                                    return;
                                }

                                console.log(src);
                                img_requests--;
                                var retval = "";
                                for (var i = 0; i < resp.responseText.length; i++) {
                                    retval += String.fromCharCode(resp.responseText.charCodeAt(i) & 0xff);
                                }
                                GM_setValue("IMAGE:" + book_id + ":" + src.replace(/.*?:\/\/[^/]*\/*/, ""), retval);

                                if (img_requests === 0 && transcribed) {
                                    transcribe_btn.innerHTML = "Done";
                                }
                            }
                        });
                    })(images[i].getAttribute("src"));
                }
            }
        }

        var original_open = window.XMLHttpRequest.prototype.open;
        window.XMLHttpRequest.prototype.open = function(method, url) {
            if (!url)
                return;

            if (url.match(content_html_regex)) {
                this.addEventListener("readystatechange", function() {
                    if (this.readyState === 4) {
                        parse(url, this.responseText);
                    }
                });
            }
            original_open.apply(this, arguments);
        };

        function run_iframe(iframe) {
            var ifdocument = iframe.contentDocument || iframe.contentWindow.document;
            parse(iframe.src, ifdocument.documentElement.innerHTML)
        }

        function find_iframes() {
            return;
            var iframes = document.getElementsByTagName("iframe");
            for (var i = 0; i < iframes.length; i++) {
                if (iframes[i].src.match(content_html_regex)) {
                    (function(iframes, i) {
                        iframes[i].onload = function() {
                            run_iframe(iframes[i]);
                        }

                        run_iframe(iframes[i]);
                    })(iframes, i);
                }
            }
        }
        
        function transcribe(el) {
            el.innerHTML = "Transcribing...";
            
            var info = unsafeWindow.bData;
            
            var i = 0;
            function do_request() {
                if (i >= info.spine.length) {
                    console.log("Done text");
                    transcribed = true;
                    
                    if (img_requests === 0) {
                        el.innerHTML = "Done";
                    }
                    return;
                }
                
                var path = info.spine[i].path;
                i++;
                console.log(path);
                if (!path.match(content_html_regex)) {
                    console.log("Skipping: " + path);
                    do_request();
                    return;
                }

                var oReq = new XMLHttpRequest();
                oReq.addEventListener("load", do_request);
                oReq.open("GET", path);
                oReq.send();
            }
            
            do_request();
        }

        function start() {
            new MutationObserver(find_iframes).observe(document.documentElement, {
                attributes: true,
                childList: true
            });
            find_iframes();
            
            if (unsafeWindow.bData) {
                var outer_div = document.createElement("div");
                outer_div.style.width = "100%";

                transcribe_btn = document.createElement("a");
                transcribe_btn.innerHTML = "Transcribe";
                transcribe_btn.onclick = function() {
                    transcribe(transcribe_btn);
                };
                transcribe_btn.href = "javascript:void(0)";
                transcribe_btn.style.zIndex = 999999999;
                transcribe_btn.style.position = "absolute";
                makebutton(transcribe_btn, "#1070a0");

                outer_div.appendChild(transcribe_btn);
                document.body.appendChild(outer_div);
            }
        }

        onload(start);
    }
    
    // GreasyFork section
    function greasyfork() {
        function start() {
            var insert = document.getElementById("overdrive-insert");
            
            if (!insert) {
                var addto = document.querySelector(".script-author-description");
                addto.innerHTML = "<div id='overdrive-insert'></div>" + addto.innerHTML;
            
                insert = document.getElementById("overdrive-insert");
            }
            
            insert.innerHTML = "";
            
            var table = document.createElement("table");
            table.style.border = "1px solid black";
            table.style.background = "white";
            table.style.width = "100%";
            
            var preview_iframe = document.createElement("iframe");
            preview_iframe.style.width = "100%";
            preview_iframe.style.display = "block";
            preview_iframe.style.border = "0";
            var preview_tr = document.createElement("tr");
            preview_tr.style.padding = 0;
            preview_tr.style.margin = 0;
            var preview_td = document.createElement("td");
            preview_td.style.padding = 0;
            preview_td.style.margin = 0;
            preview_td.style.borderBottom = "1px solid black";
            preview_td.style.display = "none";
            preview_td.setAttribute("colspan", 10);
            preview_td.appendChild(preview_iframe);
            preview_tr.appendChild(preview_td);
            table.appendChild(preview_tr);
            
            var keys = GM_listValues();
            if (keys.length === 0) {
                var tr = document.createElement("tr");
                var td = document.createElement("td");
                td.innerHTML = "<i>No books yet</>";
                tr.appendChild(td);
                table.appendChild(tr);
            }
            
            for (var i = 0; i < keys.length; i++) {
                if (!keys[i].match(/^TITLE:/)) {
                    continue;
                }
                
                (function(key) {
                    var title = GM_getValue(key);
                    var id = key.replace(/.*:/, "");
                    var info = JSON.parse(GM_getValue("INFO:" + id));

                    var tr = document.createElement("tr");
                    tr.style.border = "1px solid black";

                    var name_td = document.createElement("td");
                    name_td.innerHTML = "<b>" + title + "</b>";
                    
                    var html = '<html><head><title>' + title + '</title><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /></head><body>';
                    var size = 0;
                    var items = {};
                    var images = {};
                    for (var i = 0; i < keys.length; i++) {
                        if (keys[i].indexOf("CONTENTS:" + id + ":") !== 0 &&
                            keys[i].indexOf("IMAGE:" + id + ":") !== 0) {
                            continue;
                        }

                        var contents = GM_getValue(keys[i]);
                        size += contents.length;
                        
                        if (keys[i].indexOf("IMAGE:") === 0) {
                            images[keys[i].replace(/.*:/, "")] = contents;
                            continue;
                        }
                        
                        var element = document.createElement("html");
                        element.innerHTML = '<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /></head>';
                        element.innerHTML += contents;
                        
                        var text = element.getElementsByTagName("body")[0].innerHTML;

                        //html += text;
                        items[keys[i].replace(/.*:/, "")] = text;
                    }
                    
                    var have = 0;
                    var total = 0;
                    for (var i = 0; i < info.spine.length; i++) {
                        if (!info.spine[i].path.match(content_html_regex))
                            continue;
                        total++;
                        var spinematch = info.spine[i].path.match(content_html_match);
                        if (!spinematch)
                            continue;
                        var spineid = spinematch[1];
                        if (spineid in items) {
                            html += items[spineid];
                            have++;
                        } else {
                            console.log("Missing " + spineid);
                        }
                    }
                    html += "</body></html>";
                    
                    if (Object.keys(images).length > 0) {
                        var doc = document.implementation.createHTMLDocument("preview");
                        var htmlel = doc.createElement("html");
                        htmlel.innerHTML = html;
                        var img_els = htmlel.getElementsByTagName("img");
                        for (var i = 0; i < img_els.length; i++) {
                            var src = img_els[i].getAttribute("src");
                            if (src in images) {
                                var ext = src.replace(/.*\.([a-zA-Z]*).*?$/, "$1").toLowerCase();
                                var mime = "image/" + ext;
                                if (ext === "jpg")
                                    mime = "image/jpeg";
                                img_els[i].setAttribute("src", "data:" + mime + ";base64," + btoa(images[src]));
                            }
                        }
                        html = htmlel.innerHTML;
                    }

                    
                    var link = "data:text/html," + encodeURIComponent(html);
                    name_td.innerHTML += "  (" + have + "/" + total + ")";
                    

                    var preview = document.createElement("a");
                    preview.onclick = function() {
                        preview_iframe.src = link;
                        preview_td.style.display = "table-cell";
                    };
                    preview.href = "javascript:void(0)";
                    preview.innerHTML = "Preview";
                    makebutton(preview, "#105210");
                    
                    var download = document.createElement("a");
                    download.href = link;
                    download.setAttribute("download", title + ".html");
                    download.innerHTML = "Download (" + (html.length / 1024).toFixed(1) + "KB)";
                    makebutton(download, "#1070a0");
                    
                    var del = document.createElement("a");
                    del.href = "javascript:void(0)";
                    del.onclick = function() {
                        if (!confirm("Delete '" + title + "'?"))
                            return;
                        
                        for (var i = 0; i < keys.length; i++) {
                            if (keys[i] === ("TITLE:" + id) ||
                                keys[i] === ("INFO:" + id) ||
                                keys[i].indexOf("CONTENTS:" + id + ":") >= 0 ||
                                keys[i].indexOf("IMAGE:" + id + ":") >= 0) {
                                console.log(keys[i]);
                                GM_deleteValue(keys[i]);
                            }
                        }
                        
                        start();
                    }
                    del.innerHTML = "Delete (" + (size / 1024).toFixed(1) + "KB)";
                    del.style.float = "right";
                    makebutton(del, "#a02010");
                    
                    var actions_td = document.createElement("td");
                    actions_td.appendChild(preview);
                    actions_td.appendChild(download);
                    actions_td.appendChild(del);
                    
                    tr.appendChild(name_td);
                    tr.appendChild(actions_td);
                    
                    table.appendChild(tr);
                })(keys[i]);
            }
            
            var hr_tr = document.createElement("tr");
            var hr_td = document.createElement("td");
            hr_td.setAttribute("colspan", 10);
            hr_td.style.borderBottom = "1px solid black";
            hr_tr.appendChild(hr_td);
            table.appendChild(hr_tr);
            
            for (var option in default_options) {
                (function(option) {
                    var tr = document.createElement("tr");
                    var name_td = document.createElement("td");
                    name_td.innerHTML = default_options[option].name;
                    tr.appendChild(name_td);
                    
                    var value_td = document.createElement("td");
                    var value_input = document.createElement("input");
                    value_input.type = "checkbox";
                    
                    if (options[option])
                        value_input.setAttribute("checked", "");
                    
                    value_input.onclick = function() {
                        var val = false;
                        if (value_input.checked)
                            val = true;
                        GM_setValue("SETTINGS:" + option, JSON.stringify(val));
                    };
                    
                    value_td.appendChild(value_input);
                    tr.appendChild(value_td);
                    table.appendChild(tr);
                })(option);
            }
            
            insert.appendChild(table);
        }
        
        onload(start);
    }
    
    if (document.location.href.match(/:\/\/[^/]*\.overdrive\.com\//))
        overdrive();
    else if (document.location.href.match(/\/41000-overdrive-transcriber(?:\/?[?#].*)?$/))
        greasyfork();
})();