Double-click hyphenated word selection

In selecting a word by double click, it remove break separation by hyphen character (-). So that it will be considered as one word.

As of 2024-02-15. See the latest version.

// ==UserScript==
// @name         Double-click hyphenated word selection
// @namespace    https://greasyfork.org/en/users/1261421-colemeg
// @version      v1.0
// @description  In selecting a word by double click, it remove break separation by hyphen character (-). So that it will be considered as one word.
// @author       colemeg, based on code by @lexogram (https://github.com/lexogram) - (https://github.com/lexogram/select-with-hyphens/blob/main/selectWordsWithHyphens.js)
// @match        *://*/*
// @icon         https://avatars.githubusercontent.com/u/81081327?s=400&u=d2f5b0584025947c76a338aa837a8dd5201f09b1&v=4
// @grant        none
// @license      UNLICENSE - For more information, please refer to <https://unlicense.org> 

// ==/UserScript==

"use strict";

// Credit goes to @lexogram (https://github.com/lexogram) 
// Based on repo "selectWordsWithHyphens" (https://github.com/lexogram/select-with-hyphens/blob/main/selectWordsWithHyphens.js)
//
// Tweak to make a double-click select words with hyphens or
// apostrophes.
//
// As of 2023-04-12, None of the major browsers selects whole
// words with hyphens, like "ad-lib". Only the text before or
// after the hyphen, or the hyphen on its own, will be selected.
// This tweak fixes the hypen issue.
//
// Note: Firefox (at least until version 111.0.1) also doesn't
// automatically select whole words with apostrophes like the word
// "doesn't".
//
// In Safari (at least until version 16.3), a double-click that
// lands precisely on an apostrophe will select only the
// apostrophe. However, a double-click on any *letter* in a word
// that contains an apostrophe will select the entire word,
// including the apostrophe.
//
// This tweak also treats these issues.

// On Windows, a double-click on a word will also select the space
// after the word, if there is one. On MacOS and Ubuntu, only the
// word itself is selected. This tweak respects these
// platform-specific differences.

// In the comments below, I'll use the word "join" to mean any of
// the following hyphen and apostrophe characters:
//
//  * - (hyphen: &#8208;)
//  * ‑ (non‑breaking hypen: &8209;)
//  * &shy; (soft hyphen, which only appears at a line break)
//  * ' (apostrophe: &#39;)
//  * ’ (right single quotation mark: &#8217;).
//
// NOTE 1: It is not trivial to distinguish between a final
// apostrophe, which is an integral part of a word, that is used
// to indicate possession)...
//
//   She said, "Those books are Jodi's, but these are my kids'".
//
// ... from a closing single quote:
//
//   He said, "She said, 'Meet Jo and Di. These are my kids'".
//
// For simplicity, this script ignores both cases. As of 2023-04-12,
// all major browsers behave in exactly the same way.
//
// NOTE 2: Two hyphens can be used to indicate a dash—a character
// which indicates a secondary thought–and some writers leave no
// spaces around a dash. However it is never used to make compound
// words. "Two consecutive hypens should be ignored--at least I
// think they should."

(function selectWholeWordsWithHyphens() {
    var selection = window.getSelection();
    // Regex designed to detect if the selection is just a series of
	// join characters.
    var ignoreRegex = /^[\u00AD‑'’-]{2,}$/;
    
    // Regex designed to find a word+join before the selected word.
	// Examples: ad-|lib|  seven-o'|clock|
	// It finds the last chunk with no non-word characters (except for
	// joins) before the first selected character.
    var startRegex = /(\w+[\u00AD‑'’-]?)+$/g;
    
    // Regex designed to find a join character after the selected word.
	// Examples: |ad|-lib  |seven|-o'clock
    var endRegex = /^([\u00AD‑'’-]?\w+)+/;
    
    // Edge case: check if the selection contains no word characters
	// or - or '. If so, then don't do anything to extend it.
    var edgeRegex = /\w|-|‑|'|’|\u00AD/;

    document.body.ondblclick = selectHyphenatedWords;

    function selectHyphenatedWords(event) {
        var target = event.target;
        var isInput = target.tagName === "INPUT";
        
        // In browsers on Windows, a double-click on a word will
		// select the word _and_ a space character that immediately
		// follows it. We will need to adjust for this.
        var lastSelectedCharIsSpace = 0;

        if (isInput) {
            var start = target.selectionStart;
            var end = target.selectionEnd;
            var inputString = target.value;
            lastSelectedCharIsSpace = (
                inputString.substring(end - 1, end) === " "
            );
            end -= lastSelectedCharIsSpace; // true → 1, false → 0
        } else if (!selection.rangeCount) {
            return;
        } else {
            var range = selection.getRangeAt(0);
            // If the selection is at the boundary of a tag – for example:
			// <p>The selection word is one of <em>these-words</em></p> —
			// then range.startContainer and range.endContainer will be
			// different.
            var container = range.endContainer;
            var endOffset = range.endOffset; // "end" rename to "endOffset"
            lastSelectedCharIsSpace = (
                container.textContent.substring(endOffset - 1, endOffset) === " "
            );
            endOffset -= lastSelectedCharIsSpace; // true → 1, false → 0
            if (!endOffset) { // "end" rename to "endOffset"
				// The selection extends to the end of the startContainer
				// and ends at char index 0 in the endContainer. Use the
				// startContainer instead
                container = range.startContainer;
                endOffset = container.length; // "end" rename to "endOffset"
            }
            var string = container.textContent;
            var startOffset = (container === range.startContainer) ? range.startOffset : 0; // "start" renamed to "startOffset"
        } // The selection starts at the very end of the startContainer, or at char index 0 of the endContainer

        var selectionUpdated = false;
        var chunk = string.substring(startOffset, endOffset);
        var ignore = ignoreRegex.test(chunk) || chunk.search(edgeRegex) < 0;

        if (ignore) {
            // The selection contains neither word nor join characters
			// or is nothing but a series of join characters
            return;
        }

        extendSelectionBackBeforeHyphen(string, startOffset);
        extendSelectionForwardAfterHyphen(string, endOffset);

        if (selectionUpdated) {
            if (isInput) {
                end += lastSelectedCharIsSpace;
                target.setSelectionRange(startOffset, endOffset);
            } else {
                selection.removeAllRanges();
                selection.addRange(range);
            }
        }

        function extendSelectionBackBeforeHyphen(string, offset) {
            var lastIndex = 0;
            var result, index;
            string = string.substring(0, offset);

            while ((result = startRegex.exec(string))) {
                index = result.index;
                lastIndex = startRegex.lastIndex;
            }

            if (lastIndex === offset) {
                if (isInput) {
                    start = index;
                } else {
                    range.setStart(container, index);
                }
                selectionUpdated = true;
            }
        }

        function extendSelectionForwardAfterHyphen(string, offset) {
            if (!offset) {
                return;
            }

            string = string.substring(offset);
            var result = endRegex.exec(string);

            if (result) {
                endOffset = offset + result[0].length;
                if (!isInput) {
                    range.setEnd(container, endOffset);
                }
                selectionUpdated = true;
            }
        }
    }
})();