AO3: Get Current Chapter Word Count

Counts and displays the number of words in the current chapter

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name           AO3: Get Current Chapter Word Count
// @namespace      https://github.com/w4tchdoge
// @version        1.2.1-20240705_232304
// @description    Counts and displays the number of words in the current chapter
// @author         w4tchdoge
// @homepage       https://github.com/w4tchdoge/MISC-UserScripts
// @match          *://archiveofourown.org/*chapters/*
// @match          *://archiveofourown.org/*works/*
// @exclude        *://archiveofourown.org/*works/*/bookmarks
// @exclude        *://archiveofourown.org/*works/*/navigate
// @icon           https://archiveofourown.org/favicon.ico
// @license        AGPL-3.0-or-later
// @history        1.2.1 — Prevent script from running on multi-chapter works which only have 1 chapter published.
// @history        1.2.0 — Replace \w with [\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}] in the regular expession as that is the proper JavaScript equivalent to Ruby's [[:word:]]. Add support for most Unicode scripts supported in regular expressions. Use Array.from() instead of the spread syntax to convert the RegExpStringIterator into a countable array. Add *://archiveofourown.org/*chapters/* as a @match rule so that the script can work on URLs such as https://archiveofourown.org/chapters/141182779. Add *://archiveofourown.org/*works/*/navigate as an @exclude rule so the script does not run on the index page.
// @history        1.1.3 — Get rid of the element containing the words "Chapter Text" using removeChild() so I don't have to use RegEx to get rid of it. Also some miscellaneous cleanup
// @history        1.1.2 — Switch to using Intl.NumberFormat for making the word count thousands separated
// @history        1.1.1 — Modify the match rule so that it matches collections/*/works URLs as well; Add an exlude role so it doesn't work on works/*/bookmarks pages as it isn't designed to
// @history        1.1.0 — Implement a counting method that uses an attempted conversion of the Ruby regex code used by AO3 to JavaScript
// ==/UserScript==

(function () {
	`use strict`;

	// Get the current chapter count as a integer number
	const curr_chp_cnt = parseInt(document.querySelector(`dd.stats dd.chapters`).textContent.split(`/`).at(0));

	// Execute script only on multi-chapter works which have more than one chapter published and only when a single chapter is being viewed
	if (window.location.pathname.toLowerCase().includes(`chapters`) && curr_chp_cnt > 1) {

		// Get the Chapter Text
		const chapter_text = (function () {
			// Get the HTML element containing the chapter's text content
			let elm_parent = document.querySelector(`[role="article"]:has(> #work)`).cloneNode(true);
			// Remove the child element with the text "Chapter Text"
			elm_parent.removeChild(elm_parent.querySelector(`#work`));

			// Return only the textContent of the HTML element
			return elm_parent.textContent.trim();
		})();

		// Couting and formatting the number of words
		const word_count = (function () {

			// Attempted conversion of the Ruby regex code AO3 uses to JavaScript by looking at:
			// https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/config/config.yml#L453
			// https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L26
			// https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C9-L31C95
			// Has not been tested on non-English works, feedback would be appreciated
			// const word_count_regex = /\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai}|((?!\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai})[\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}])+/gu;

			// Add support for most Unicode scripts supported in Regular Expressions
			// Vanilla AO3 compliant script_list:
			// const script_list = [`Han`, `Hiragana`, `Katakana`, `Thai`];
			// Full script_list:
			const script_list = [`Arabic`, `Armenian`, `Balinese`, `Bengali`, `Bopomofo`, `Braille`, `Buginese`, `Buhid`, `Canadian_Aboriginal`, `Carian`, `Cham`, `Cherokee`, `Common`, `Coptic`, `Cuneiform`, `Cypriot`, `Cyrillic`, `Deseret`, `Devanagari`, `Ethiopic`, `Georgian`, `Glagolitic`, `Gothic`, `Greek`, `Gujarati`, `Gurmukhi`, `Han`, `Hangul`, `Hanunoo`, `Hebrew`, `Hiragana`, `Inherited`, `Kannada`, `Katakana`, `Kayah_Li`, `Kharoshthi`, `Khmer`, `Lao`, `Latin`, `Lepcha`, `Limbu`, `Linear_B`, `Lycian`, `Lydian`, `Malayalam`, `Mongolian`, `Myanmar`, `New_Tai_Lue`, `Nko`, `Ogham`, `Ol_Chiki`, `Old_Italic`, `Old_Persian`, `Oriya`, `Osmanya`, `Phags_Pa`, `Phoenician`, `Rejang`, `Runic`, `Saurashtra`, `Shavian`, `Sinhala`, `Sundanese`, `Syloti_Nagri`, `Syriac`, `Tagalog`, `Tagbanwa`, `Tai_Le`, `Tamil`, `Telugu`, `Thaana`, `Thai`, `Tibetan`, `Tifinagh`, `Ugaritic`, `Vai`, `Yi`];
			// Excludes the Unicode scripts "Common" and "Latin" because that messes with the counting somehow
			// Exclude "Inherited" just to be safe
			const script_exclude_list = [`Common`, `Latin`, `Inherited`];
			const word_count_regex = new RegExp((function () {
				// Switch from using alternations in a group (e.g. (a|b|c)) to a character class (e.g. [abc]) for performance reasons (https://stackoverflow.com/a/27791811/11750206)
				const regex_scripts = script_list.filter((elm) => !script_exclude_list.includes(elm)).map((elm) => `\\p{Script=${elm}}`).join(``);
				const full_regex_str = `[${regex_scripts}]|((?![${regex_scripts}])[\\p{Letter}\\p{Mark}\\p{Number}\\p{Connector_Punctuation}])+`;
				return full_regex_str;
			})(), `gv`);

			// Count the number of words
			// Counting method from: https://stackoverflow.com/a/76673564/11750206, https://stackoverflow.com/a/69486719/11750206, and https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/matchAll
			// Regex substitutions from: https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C33-L30C68
			const word_count_arr = Array.from(chapter_text.replaceAll(/--/g, `—`).replaceAll(/['’‘-]/g, ``).matchAll(word_count_regex), (m) => m[0]);
			const word_count_int = word_count_arr.length;

			// Format the integer number to a thousands separated string (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat)
			const word_count_str = new Intl.NumberFormat({ style: `decimal` }).format(word_count_int);

			return word_count_str;
		})();

		console.log(`Word Count: ${word_count} words`);

		// Create element with the text "Words in Chapter"
		const chap_word_count_text = Object.assign(document.createElement(`dt`), {
			id: `chapter_words_label`,
			className: `chapter_words`,
			textContent: `Words in Chapter:`
		});

		// Create element with the word count of the chapter
		const chap_word_count_num = Object.assign(document.createElement(`dd`), {
			id: `chapter_words_number`,
			className: `chapter_words`,
			textContent: word_count
		});

		// Get the element where the stats are displayed
		const stats_elem = document.querySelector(`#main dl.work.meta.group dl.stats`);

		// Append the created elements after the element containing the total word count of the fic
		stats_elem.querySelector(`dd.words`).after(chap_word_count_text, chap_word_count_num);
	}
})();