- // ==UserScript==
- // @name AO3: Get Current Chapter Word Count
- // @namespace https://github.com/w4tchdoge
- // @version 1.2.1-20240705_232304
- // @description Counts and displays the number of words in the current chapter
- // @author w4tchdoge
- // @homepage https://github.com/w4tchdoge/MISC-UserScripts
- // @match *://archiveofourown.org/*chapters/*
- // @match *://archiveofourown.org/*works/*
- // @exclude *://archiveofourown.org/*works/*/bookmarks
- // @exclude *://archiveofourown.org/*works/*/navigate
- // @icon https://archiveofourown.org/favicon.ico
- // @license AGPL-3.0-or-later
- // @history 1.2.1 — Prevent script from running on multi-chapter works which only have 1 chapter published.
- // @history 1.2.0 — Replace \w with [\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}] in the regular expession as that is the proper JavaScript equivalent to Ruby's [[:word:]]. Add support for most Unicode scripts supported in regular expressions. Use Array.from() instead of the spread syntax to convert the RegExpStringIterator into a countable array. Add *://archiveofourown.org/*chapters/* as a @match rule so that the script can work on URLs such as https://archiveofourown.org/chapters/141182779. Add *://archiveofourown.org/*works/*/navigate as an @exclude rule so the script does not run on the index page.
- // @history 1.1.3 — Get rid of the element containing the words "Chapter Text" using removeChild() so I don't have to use RegEx to get rid of it. Also some miscellaneous cleanup
- // @history 1.1.2 — Switch to using Intl.NumberFormat for making the word count thousands separated
- // @history 1.1.1 — Modify the match rule so that it matches collections/*/works URLs as well; Add an exlude role so it doesn't work on works/*/bookmarks pages as it isn't designed to
- // @history 1.1.0 — Implement a counting method that uses an attempted conversion of the Ruby regex code used by AO3 to JavaScript
- // ==/UserScript==
-
- (function () {
- `use strict`;
-
- // Get the current chapter count as a integer number
- const curr_chp_cnt = parseInt(document.querySelector(`dd.stats dd.chapters`).textContent.split(`/`).at(0));
-
- // Execute script only on multi-chapter works which have more than one chapter published and only when a single chapter is being viewed
- if (window.location.pathname.toLowerCase().includes(`chapters`) && curr_chp_cnt > 1) {
-
- // Get the Chapter Text
- const chapter_text = (function () {
- // Get the HTML element containing the chapter's text content
- let elm_parent = document.querySelector(`[role="article"]:has(> #work)`).cloneNode(true);
- // Remove the child element with the text "Chapter Text"
- elm_parent.removeChild(elm_parent.querySelector(`#work`));
-
- // Return only the textContent of the HTML element
- return elm_parent.textContent.trim();
- })();
-
- // Couting and formatting the number of words
- const word_count = (function () {
-
- // Attempted conversion of the Ruby regex code AO3 uses to JavaScript by looking at:
- // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/config/config.yml#L453
- // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L26
- // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C9-L31C95
- // Has not been tested on non-English works, feedback would be appreciated
- // const word_count_regex = /\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai}|((?!\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai})[\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}])+/gu;
-
- // Add support for most Unicode scripts supported in Regular Expressions
- // Vanilla AO3 compliant script_list:
- // const script_list = [`Han`, `Hiragana`, `Katakana`, `Thai`];
- // Full script_list:
- const script_list = [`Arabic`, `Armenian`, `Balinese`, `Bengali`, `Bopomofo`, `Braille`, `Buginese`, `Buhid`, `Canadian_Aboriginal`, `Carian`, `Cham`, `Cherokee`, `Common`, `Coptic`, `Cuneiform`, `Cypriot`, `Cyrillic`, `Deseret`, `Devanagari`, `Ethiopic`, `Georgian`, `Glagolitic`, `Gothic`, `Greek`, `Gujarati`, `Gurmukhi`, `Han`, `Hangul`, `Hanunoo`, `Hebrew`, `Hiragana`, `Inherited`, `Kannada`, `Katakana`, `Kayah_Li`, `Kharoshthi`, `Khmer`, `Lao`, `Latin`, `Lepcha`, `Limbu`, `Linear_B`, `Lycian`, `Lydian`, `Malayalam`, `Mongolian`, `Myanmar`, `New_Tai_Lue`, `Nko`, `Ogham`, `Ol_Chiki`, `Old_Italic`, `Old_Persian`, `Oriya`, `Osmanya`, `Phags_Pa`, `Phoenician`, `Rejang`, `Runic`, `Saurashtra`, `Shavian`, `Sinhala`, `Sundanese`, `Syloti_Nagri`, `Syriac`, `Tagalog`, `Tagbanwa`, `Tai_Le`, `Tamil`, `Telugu`, `Thaana`, `Thai`, `Tibetan`, `Tifinagh`, `Ugaritic`, `Vai`, `Yi`];
- // Excludes the Unicode scripts "Common" and "Latin" because that messes with the counting somehow
- // Exclude "Inherited" just to be safe
- const script_exclude_list = [`Common`, `Latin`, `Inherited`];
- const word_count_regex = new RegExp((function () {
- // Switch from using alternations in a group (e.g. (a|b|c)) to a character class (e.g. [abc]) for performance reasons (https://stackoverflow.com/a/27791811/11750206)
- const regex_scripts = script_list.filter((elm) => !script_exclude_list.includes(elm)).map((elm) => `\\p{Script=${elm}}`).join(``);
- const full_regex_str = `[${regex_scripts}]|((?![${regex_scripts}])[\\p{Letter}\\p{Mark}\\p{Number}\\p{Connector_Punctuation}])+`;
- return full_regex_str;
- })(), `gv`);
-
- // Count the number of words
- // Counting method from: https://stackoverflow.com/a/76673564/11750206, https://stackoverflow.com/a/69486719/11750206, and https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/matchAll
- // Regex substitutions from: https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C33-L30C68
- const word_count_arr = Array.from(chapter_text.replaceAll(/--/g, `—`).replaceAll(/['’‘-]/g, ``).matchAll(word_count_regex), (m) => m[0]);
- const word_count_int = word_count_arr.length;
-
- // Format the integer number to a thousands separated string (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat)
- const word_count_str = new Intl.NumberFormat({ style: `decimal` }).format(word_count_int);
-
- return word_count_str;
- })();
-
- console.log(`Word Count: ${word_count} words`);
-
- // Create element with the text "Words in Chapter"
- const chap_word_count_text = Object.assign(document.createElement(`dt`), {
- id: `chapter_words_label`,
- className: `chapter_words`,
- textContent: `Words in Chapter:`
- });
-
- // Create element with the word count of the chapter
- const chap_word_count_num = Object.assign(document.createElement(`dd`), {
- id: `chapter_words_number`,
- className: `chapter_words`,
- textContent: word_count
- });
-
- // Get the element where the stats are displayed
- const stats_elem = document.querySelector(`#main dl.work.meta.group dl.stats`);
-
- // Append the created elements after the element containing the total word count of the fic
- stats_elem.querySelector(`dd.words`).after(chap_word_count_text, chap_word_count_num);
- }
- })();