Accurate word counter for Gutenberg texts
当前为
// ==UserScript==
// @name Gutencount
// @namespace https://vox.quartertone.net/
// @version 1.6.2
// @description Accurate word counter for Gutenberg texts
// @author quartertone
// @match *://*.gutenberg.org/*
// @grant none
// @license gpl-3.0
// ==/UserScript==
(function() {
'use strict';
function wx(t, x, r = "") {
// match gutenberg ebook text between START and END markers
return wc(t.match(/(?<=\*{3} START.*?\*{3}).*(?=\*{3} END.*?\*{3})/s)[0].trim(), x, r);
}
function wc(t, x, r = "") {
// trim whitespace, count non-space things
// include — (em dash) and ascii --
let w = t.trim().split(/--|[\s\*—]+/).length;
if (r) return w; // return the word count as a number if requested; this skips the alert
cornerbox(w + "\ words,\ " + t.length + "\ chars\nin\ " + x);
}
function fwx(l, a) {
fetch(l + ".txt.utf-8").then((r) => r.text()).then((t) => {
let c = wx(t, "ebook", a);
if (a) {
a.parentElement.innerHTML += "wc: " + c
+ ` | <a href="${l}.html.images">html</a> | <a href="${l}.epub3.images">epub</a>`;
}
}).catch((e) => { }); // alert(e));
}
function action() {
let loc = document.location.toString();
let sel = window.getSelection() + "";
if (sel.length > 0) { // selection
wc(sel, "selection");
} else if (loc.match(/gutenberg.org\/(files|cache\/epub)\/\d+/)) {
// gutenberg text
wx(document.body.innerText, "ebook");
// Place the onclick listener here and general webpage block
// so that repeat action() does not trigger in other scenarios
document.onclick = () => clicklisten();
} else if (loc.match(/gutenberg.org\/ebooks\/(\d+)/)) {
// gutenberg main ebook page
fwx(loc, document.querySelector("#cover"));
} else if (loc.match(/gutenberg.org\/ebooks\/(bookshelf|subject|search|author)\/[\?\d]/)) {
// gutenberg [list of books] page
if (confirm("Gutencount: Perform batch word count?")) {
for (const a of document.querySelectorAll(".booklink>a>span:nth-child(2)")) {
// append word counts to each book section
// second CHILD of the <a> is the "content" span
fwx(a.parentElement.href, a);
}
}
} else if (!monkey) { // general webpage
wc(document.body.innerText, "webpage");
document.onclick = () => clicklisten();
}
}
action();
function cornerbox(wcount) {
let xcount = document.getElementById("xcount") || document.createElement("div");
xcount.id = "xcount";
xcount.style = "font-size:min(20pt,5vw);position:fixed;top:0;right:0;width:10em;height:2em;background:#333c;color:#fff;z-index:10000;padding:0.5em;text-align:right;";
xcount.innerHTML = "";
xcount.ondblclick = function () {
this.remove();
};
document.body.appendChild(xcount);
xcount.innerHTML = wcount;
}
function clicklisten() {
let sel = document.getSelection();
if (sel.toString().length == 0) {
let ischap, isachap;
try {
ischap = sel.anchorNode.parentNode.tagName.match(/^H\d/)
|| sel.anchorNode.parentNode.className == "ph1"; // to match pseudo chapters headlined with P tag
isachap = sel.anchorNode.parentNode.parentNode.tagName.match(/^H\d/); // to match headings that are wrapped in both A and H_ tags
} catch (e) { }
if (ischap || isachap) {
let range = document.createRange();
// set beginning and end nodes depending on whether H_ or H_>A
let a = isachap ? sel.anchorNode.parentNode.parentNode : sel.anchorNode.parentNode;
let b = isachap ? sel.anchorNode.parentNode.parentNode.nextElementSibling : sel.anchorNode.parentNode.nextElementSibling;
while (b.nextElementSibling) {
if (b.nextElementSibling && (b.nextElementSibling.tagName.match(/^(H\d|SECTION)/) || b.nextElementSibling.className == "ph1")) break;
b = b.nextElementSibling;
}
sel.removeAllRanges();
range.setStartAfter(a);
range.setEndAfter(b);
sel.addRange(range);
}
}
action();
}
})();