AO3: Sample character tag scraper!

Grabs every single char tag used by fics that you page through!

// ==UserScript==
// @name         AO3: Sample character tag scraper!
// @description  Grabs every single char tag used by fics that you page through!
// @version      1.0.0

// @author       owlwinter
// @namespace    N/A
// @license      MIT license

// @match        *://*.archiveofourown.org/*TwoSet*
// @grant        GM_setValue
// @grant        GM_getValue
// ==/UserScript==

(function() {
    'use strict';

    //I needed to grab a list of every single character tag used by works in the twoset violin
    //fandom, which had just recently been split off as its own fandom from Vlogging.
    //So I wrote this!
    //Everytime I page through a page of twoset fics (see the @match in the URL), this will
    //grab all the char tags on that page of results and save the unique ones
    //Then, on the last page of twoset fics, it pops up the resulting tag list!
    //So all I had to do was click through the 100ish pages of TwoSet works and then this script
    //did all the rest!

    //A couple of notes:
    //1) This is not a script I'd want always-on, after scraping the data be sure to disable the script again!
    //2) The data saved across page to page is not "reset" after it's displayed on the last page -
    //   In order to reset that, click the "Storage" tab on the top of this script in tapermonkey
    //   (next to the Editor and Settings tab), and manually change it to be {}
    //3) If you are wanting to make your own version of this script from scratch, be sure
    //   to include the @grant lines (lines 9-10), it's easy to overlook but this script
    //   won't work without those!
    //4) Also don't forget to change the @match statement to your needs!!
    //5) Feel free to ping me if you have questions, @owlwinter8 :)

    //Converts the document.querySelectorAll() results into an array
    const array = f => Array.prototype.slice.call(f, 0)

    var key = "twoset_chardict";
    let chartags;

    if (GM_getValue(key) == null) {
        //If there isn't any old data saved, we start fresh!
        chartags = new Set();
    } else {
        //If there is old data saved, we fetch it
        var raw = GM_getValue (key, null)
        var lessraw = JSON.parse(raw) || {};
        chartags = new Set(lessraw);
    }

    const worksOnPage = array(document.querySelectorAll("li.work"))
    //For each work on the page
    for (let a of worksOnPage) {
        //Grabs the character tags
        //If you want to, you could collect multiple tag types into multiple lists!
        let chars = array(a.querySelectorAll(".characters"))
        for (let b of chars) {
            let chartag = b.innerText;

            //Saves the ones we haven't seen before
            if (!chartags.has(chartag)) {
                chartags.add(chartag)
            }
        }
    }

    //Saves the dictionary data across pages
    //https://wiki.greasespot.net/GM.setValue
    //On tapermonkey, you can see the info saved by opening the script and clicking storage!
    var arrayeddata = Array.from(chartags)
    var formatteddata = JSON.stringify(arrayeddata)
    GM_setValue(key, formatteddata);

    //If on the last page of results, show them off~!!
    if (document.querySelector(".next .disabled") != null) {
        // for some reason this seems to always be present on the page, even if there is no content in it
        var flash = document.getElementsByClassName("flash")[0]
        flash.innerHTML = "";
        flash.classList.add("notice")

        flash.appendChild(document.createTextNode("The following tags were collected: "));
        var spacer = ""
        for (var tag of arrayeddata) {
            flash.appendChild(document.createTextNode(spacer + "\u000a"))
            const url_safe_tag = tag.replace("/", "*s*").replace(".", "*d*").replace("#", "*h*").replace("?", "*q*");
            const taga = document.createElement("a")
            taga.href = "/tags/" + url_safe_tag; // fingers crossed
            taga.target = "_blank"
            taga.innerText = tag;
            flash.appendChild(taga);
            spacer = ", "
        }
    }

})();