AO3: Get Current Chapter Word Count

Counts and displays the number of words in the current chapter

  1. // ==UserScript==
  2. // @name AO3: Get Current Chapter Word Count
  3. // @namespace https://github.com/w4tchdoge
  4. // @version 1.2.1-20240705_232304
  5. // @description Counts and displays the number of words in the current chapter
  6. // @author w4tchdoge
  7. // @homepage https://github.com/w4tchdoge/MISC-UserScripts
  8. // @match *://archiveofourown.org/*chapters/*
  9. // @match *://archiveofourown.org/*works/*
  10. // @exclude *://archiveofourown.org/*works/*/bookmarks
  11. // @exclude *://archiveofourown.org/*works/*/navigate
  12. // @icon https://archiveofourown.org/favicon.ico
  13. // @license AGPL-3.0-or-later
  14. // @history 1.2.1 — Prevent script from running on multi-chapter works which only have 1 chapter published.
  15. // @history 1.2.0 — Replace \w with [\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}] in the regular expession as that is the proper JavaScript equivalent to Ruby's [[:word:]]. Add support for most Unicode scripts supported in regular expressions. Use Array.from() instead of the spread syntax to convert the RegExpStringIterator into a countable array. Add *://archiveofourown.org/*chapters/* as a @match rule so that the script can work on URLs such as https://archiveofourown.org/chapters/141182779. Add *://archiveofourown.org/*works/*/navigate as an @exclude rule so the script does not run on the index page.
  16. // @history 1.1.3 — Get rid of the element containing the words "Chapter Text" using removeChild() so I don't have to use RegEx to get rid of it. Also some miscellaneous cleanup
  17. // @history 1.1.2 — Switch to using Intl.NumberFormat for making the word count thousands separated
  18. // @history 1.1.1 — Modify the match rule so that it matches collections/*/works URLs as well; Add an exlude role so it doesn't work on works/*/bookmarks pages as it isn't designed to
  19. // @history 1.1.0 — Implement a counting method that uses an attempted conversion of the Ruby regex code used by AO3 to JavaScript
  20. // ==/UserScript==
  21.  
  22. (function () {
  23. `use strict`;
  24.  
  25. // Get the current chapter count as a integer number
  26. const curr_chp_cnt = parseInt(document.querySelector(`dd.stats dd.chapters`).textContent.split(`/`).at(0));
  27.  
  28. // Execute script only on multi-chapter works which have more than one chapter published and only when a single chapter is being viewed
  29. if (window.location.pathname.toLowerCase().includes(`chapters`) && curr_chp_cnt > 1) {
  30.  
  31. // Get the Chapter Text
  32. const chapter_text = (function () {
  33. // Get the HTML element containing the chapter's text content
  34. let elm_parent = document.querySelector(`[role="article"]:has(> #work)`).cloneNode(true);
  35. // Remove the child element with the text "Chapter Text"
  36. elm_parent.removeChild(elm_parent.querySelector(`#work`));
  37.  
  38. // Return only the textContent of the HTML element
  39. return elm_parent.textContent.trim();
  40. })();
  41.  
  42. // Couting and formatting the number of words
  43. const word_count = (function () {
  44.  
  45. // Attempted conversion of the Ruby regex code AO3 uses to JavaScript by looking at:
  46. // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/config/config.yml#L453
  47. // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L26
  48. // https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C9-L31C95
  49. // Has not been tested on non-English works, feedback would be appreciated
  50. // const word_count_regex = /\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai}|((?!\p{Script=Han}|\p{Script=Hiragana}|\p{Script=Katakana}|\p{Script=Thai})[\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}])+/gu;
  51.  
  52. // Add support for most Unicode scripts supported in Regular Expressions
  53. // Vanilla AO3 compliant script_list:
  54. // const script_list = [`Han`, `Hiragana`, `Katakana`, `Thai`];
  55. // Full script_list:
  56. const script_list = [`Arabic`, `Armenian`, `Balinese`, `Bengali`, `Bopomofo`, `Braille`, `Buginese`, `Buhid`, `Canadian_Aboriginal`, `Carian`, `Cham`, `Cherokee`, `Common`, `Coptic`, `Cuneiform`, `Cypriot`, `Cyrillic`, `Deseret`, `Devanagari`, `Ethiopic`, `Georgian`, `Glagolitic`, `Gothic`, `Greek`, `Gujarati`, `Gurmukhi`, `Han`, `Hangul`, `Hanunoo`, `Hebrew`, `Hiragana`, `Inherited`, `Kannada`, `Katakana`, `Kayah_Li`, `Kharoshthi`, `Khmer`, `Lao`, `Latin`, `Lepcha`, `Limbu`, `Linear_B`, `Lycian`, `Lydian`, `Malayalam`, `Mongolian`, `Myanmar`, `New_Tai_Lue`, `Nko`, `Ogham`, `Ol_Chiki`, `Old_Italic`, `Old_Persian`, `Oriya`, `Osmanya`, `Phags_Pa`, `Phoenician`, `Rejang`, `Runic`, `Saurashtra`, `Shavian`, `Sinhala`, `Sundanese`, `Syloti_Nagri`, `Syriac`, `Tagalog`, `Tagbanwa`, `Tai_Le`, `Tamil`, `Telugu`, `Thaana`, `Thai`, `Tibetan`, `Tifinagh`, `Ugaritic`, `Vai`, `Yi`];
  57. // Excludes the Unicode scripts "Common" and "Latin" because that messes with the counting somehow
  58. // Exclude "Inherited" just to be safe
  59. const script_exclude_list = [`Common`, `Latin`, `Inherited`];
  60. const word_count_regex = new RegExp((function () {
  61. // Switch from using alternations in a group (e.g. (a|b|c)) to a character class (e.g. [abc]) for performance reasons (https://stackoverflow.com/a/27791811/11750206)
  62. const regex_scripts = script_list.filter((elm) => !script_exclude_list.includes(elm)).map((elm) => `\\p{Script=${elm}}`).join(``);
  63. const full_regex_str = `[${regex_scripts}]|((?![${regex_scripts}])[\\p{Letter}\\p{Mark}\\p{Number}\\p{Connector_Punctuation}])+`;
  64. return full_regex_str;
  65. })(), `gv`);
  66.  
  67. // Count the number of words
  68. // Counting method from: https://stackoverflow.com/a/76673564/11750206, https://stackoverflow.com/a/69486719/11750206, and https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/matchAll
  69. // Regex substitutions from: https://github.com/otwcode/otwarchive/blob/943f585818005be8df269d84ca454af478150e75/lib/word_counter.rb#L30C33-L30C68
  70. const word_count_arr = Array.from(chapter_text.replaceAll(/--/g, `—`).replaceAll(/['’‘-]/g, ``).matchAll(word_count_regex), (m) => m[0]);
  71. const word_count_int = word_count_arr.length;
  72.  
  73. // Format the integer number to a thousands separated string (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat)
  74. const word_count_str = new Intl.NumberFormat({ style: `decimal` }).format(word_count_int);
  75.  
  76. return word_count_str;
  77. })();
  78.  
  79. console.log(`Word Count: ${word_count} words`);
  80.  
  81. // Create element with the text "Words in Chapter"
  82. const chap_word_count_text = Object.assign(document.createElement(`dt`), {
  83. id: `chapter_words_label`,
  84. className: `chapter_words`,
  85. textContent: `Words in Chapter:`
  86. });
  87.  
  88. // Create element with the word count of the chapter
  89. const chap_word_count_num = Object.assign(document.createElement(`dd`), {
  90. id: `chapter_words_number`,
  91. className: `chapter_words`,
  92. textContent: word_count
  93. });
  94.  
  95. // Get the element where the stats are displayed
  96. const stats_elem = document.querySelector(`#main dl.work.meta.group dl.stats`);
  97.  
  98. // Append the created elements after the element containing the total word count of the fic
  99. stats_elem.querySelector(`dd.words`).after(chap_word_count_text, chap_word_count_num);
  100. }
  101. })();