Gutencount

Accurate word counter for Gutenberg texts

当前为 2023-04-15 提交的版本,查看 最新版本

// ==UserScript==
// @name         Gutencount
// @namespace    https://vox.quartertone.net/
// @version      1.5.1
// @description  Accurate word counter for Gutenberg texts
// @author       quartertone
// @match        *://*.gutenberg.org/*
// @grant        none
// @license      gpl-3.0
// ==/UserScript==

(function() {
    'use strict';
 let monkey = true; //tampermonkey flag
  function wx(t, x, r = "") {
    // match gutenberg ebook text between START and END markers
    // accuracy of word count depends on presence of extraneous text such as "transcriber's notes", book summaries, page numbers etc
    return wc(t.match(/(?<=\*{3} START.*?\*{3}).*(?=\*{3} END.*?\*{3})/s)[0].trim(), x, r);
  }
  function wc(t, x, r = "") {
    // trim whitespace, count non-space things
    // include — (em dash)
    let w = t.trim().split(/--|[\s\*—]+/).length;
    if (r) return w; // return the word count as a number if requested; this skips the alert
    // alert(w + "\ words,\ " + t.length + "\ chars\nin\ " + x);
    //let alrt = w + "\ words,\ " + t.length + "\ chars\nin\ " + x;
    //if (monkey) { cornerbox(alrt); } else { alert(alrt); }
    cornerbox(w + "\ words,\ " + t.length + "\ chars\nin\ " + x);

  }
  function fwx(l, a) {
    // fetch link => get text from result => send text to wx function
    fetch(l).then((r) => r.text()).then((t) => {
      let c = wx(t, "ebook", a);
      if (a) {
        a.innerHTML += "wc: " + c;
      }
    }).catch((e) => { }); // alert(e));
  }
  function action() {
    let loc = document.location.toString();
    let sel = window.getSelection() + "";

    if (sel.length > 0) { // selection
      wc(sel, "selection");
    } else if (loc.match(/gutenberg.org\/(files|cache\/epub)\/\d+/)) {
      // gutenberg text
      wx(document.body.innerText, "ebook");

      // Place the onclick listener here and general webpage block
      // so that repeat action() does not trigger in other scenarios
      document.onclick = () => clicklisten();

    } else if (loc.match(/gutenberg.org\/ebooks\/(\d+)/)) {
      // gutenberg main ebook page
      fwx(loc + ".txt.utf-8", document.querySelector("#cover"));
    } else if (loc.match(/gutenberg.org\/ebooks\/(bookshelf|subject|search)\//)) {
      // gutenberg search or subject page
      if (confirm("Perform batch word count?")) {
      for (const a of document.querySelectorAll(".booklink>a>span:nth-child(2)")) {
        // append word counts to each book section
        // second CHILD of the <a> is the "content" span
        fwx(a.parentElement.href + ".txt.utf-8", a);
      }
    }
    } else if (!monkey) { // general webpage
      wc(document.body.innerText, "webpage");
      document.onclick = () => clicklisten();
    }
  }
  action();

  function cornerbox(wcount) {
    // console.log(wcount);
    let xcount = document.getElementById("xcount") || document.createElement("div");
    xcount.id = "xcount";
    xcount.style = "position:fixed;top:0;right:0;width:10em;height:2em;background:#333c;color:#fff;z-index:10000;padding:0.5em;text-align:right;";
    xcount.innerHTML = "";
    xcount.ondblclick = function () {
      this.remove();
    };
    document.body.appendChild(xcount);
    xcount.innerHTML = wcount;
  }

  function clicklisten() {
    let sel = document.getSelection();
    if (sel) {

      let ischap, isachap;
      try {
        ischap = sel.anchorNode.parentNode.tagName.match(/^H\d/);
        isachap = sel.anchorNode.parentNode.parentNode.tagName.match(/^H\d/); // to match headings that are wrapped in both A and H_ tags
      } catch (e) { }
      if (ischap || isachap) {
        let range = document.createRange();
        // set beginning and end nodes depending on whether H_ or H_>A
        let a = isachap ? sel.anchorNode.parentNode.parentNode : sel.anchorNode.parentNode;
        let b = isachap ? sel.anchorNode.parentNode.parentNode.nextElementSibling : sel.anchorNode.parentNode.nextElementSibling;

        while (b.nextElementSibling) {
          if (b.nextElementSibling && b.nextElementSibling.tagName.match(/^(H\d|SECTION)/)) break;
          b = b.nextElementSibling;
        }

        range.setStartAfter(a);
        range.setEndAfter(b);
        sel.addRange(range);
      }
    }
    action();
  }



})();