Google Docs - Word Count with Options

adds a word counter with options to Google Docs (NOTE: Unfortunately, this is currently broken. Fortunately, they basically implemented this: in the top menu click Tools > Word count > check "Display word count while typing". This UserScript never worked that well, especially with longer document and Google has changed their code again to make this even more difficult to fix (aggressive lazy-loading pages). I likely won't fix this, but I'm leaving it up for now.)

  1. // ==UserScript==
  2. // @name Google Docs - Word Count with Options
  3. // @namespace https://zachhardesty.com
  4. // @author Zach Hardesty <zachhardesty7@users.noreply.github.com> (https://github.com/zachhardesty7)
  5. // @description adds a word counter with options to Google Docs (NOTE: Unfortunately, this is currently broken. Fortunately, they basically implemented this: in the top menu click Tools > Word count > check "Display word count while typing". This UserScript never worked that well, especially with longer document and Google has changed their code again to make this even more difficult to fix (aggressive lazy-loading pages). I likely won't fix this, but I'm leaving it up for now.)
  6. // @copyright 2019, Zach Hardesty (https://zachhardesty.com/)
  7. // @license GPL-3.0-only; http://www.gnu.org/licenses/gpl-3.0.txt
  8. // @version 1.0.1
  9.  
  10. // @homepageURL https://github.com/zachhardesty7/tamper-monkey-scripts-collection/raw/master/google-docs-word-count.user.js
  11. // @homepageURL https://openuserjs.org/scripts/zachhardesty7/Google_Docs_-_Word_Count_(With_Options)
  12. // @supportURL https://github.com/zachhardesty7/tamper-monkey-scripts-collection/issues
  13.  
  14.  
  15. // @match https://docs.google.com/document/*
  16. // ==/UserScript==
  17. // heavy inspiration from:
  18. // https://greasyfork.org/en/scripts/22057-google-docs-wordcount/code
  19. // https://stackoverflow.com/questions/951021/what-is-the-javascript-version-of-sleep
  20.  
  21. // strikingly complex (uses DOM bounding boxes) to get currently selected text:
  22. // may implement only necessary functions to save space, library size: (15.4 KB)
  23. // https://github.com/JensPLarsen/ChromeExtension-GoogleDocsUtil
  24.  
  25. const displayCount = () => {
  26. // words not counted between these when true
  27. const BRACKETS = true
  28. const PARENTHESIS = true
  29. const QUOTES = true
  30. const MISC = true // skips works cited, personal titles
  31.  
  32. const SELECTED = true // if selected text present, word count only counts it
  33.  
  34. const display = document.createElement("div")
  35. display.id = "zh-display"
  36. display.setAttribute(
  37. "style",
  38. `
  39. position: fixed;
  40. width: 100%;
  41. left: 0px;
  42. bottom: 0px;
  43. color: rgba(0,0,0,.7);
  44. height: 15px;
  45. background-color: #ededee;
  46. z-index: 100;
  47. font-family: Arial;
  48. font-size: 12px;
  49. padding-top: 5px;
  50. padding-left: 5px;
  51. border-top: 1px solid #d9d9d9;
  52. `
  53. )
  54. document.querySelector("body").append(display)
  55.  
  56. /**
  57. * update the word count
  58. */
  59. async function setCount() {
  60. const doc = getGoogleDocument()
  61. let selected = doc.selectedText
  62. console.log("selected", selected)
  63.  
  64. const pages = document.querySelector(".kix-paginateddocumentplugin")
  65. .children[1].children
  66. let body = ""
  67. for (const page of pages) {
  68. // pages that are unloaded will appear to have no text
  69. // add a marker to the cumulative body to indicate that
  70. // a word count should not be displayed
  71. if (page.textContent === "") body += " ~~ "
  72. body += page.textContent
  73. }
  74.  
  75. // clean extra spaces
  76. body = body.replace(/\u00A0/g, " ").trim()
  77.  
  78. // generate regex from settings
  79. // must escape \'s in JS
  80. // in standard regex form:
  81. // /(“(.(?!“))+”)|(\((.(?!\())+\)|\[(.(?!\[))+\])
  82. // |Works Cited(\n.*)*|(Unit \d (Primary Source Analysis|Exam: Part \d - #\d+))/g
  83. const regex = []
  84. if (BRACKETS) regex.push("\\[(.(?!\\[))+\\]")
  85. if (PARENTHESIS) regex.push("\\((.(?!\\())+\\)")
  86. if (QUOTES)
  87. regex.push(
  88. "Works Cited(.|\\n.*)*|(Unit \\d (Primary Source Analysis|Exam: Part \\d( - #\\d+)*))"
  89. )
  90. if (MISC) regex.push("(“(.(?!“))+”)")
  91.  
  92. // apply regex filtering to body
  93. for (const reg of regex) {
  94. selected = selected.replace(new RegExp(reg, "g"), " ")
  95. }
  96.  
  97. // apply regex filtering to selected text if necessary
  98. let filtered = body
  99. for (const reg of regex) {
  100. filtered = filtered.replace(new RegExp(reg, "g"), " ")
  101. }
  102.  
  103. // remove extra spaces and line breaks and get counts
  104. const words = filtered
  105. .trim()
  106. .replace(/\u00A0/g, " ")
  107. .replace(/ {2,}/g, " ")
  108. .split(" ")
  109. if (words.includes("~~")) {
  110. // empty or unloaded pages present
  111. document.querySelector(
  112. "#zh-display"
  113. ).textContent = `Word Count: (scroll to bottom & remove empty pages) | Pages: ${pages.length}`
  114. } else if (selected.length > 0 && SELECTED) {
  115. selected = selected
  116. .trim()
  117. .replace(/\u00A0/g, " ")
  118. .replace(/ {2,}/g, " ")
  119. console.log("selected", selected)
  120. document.querySelector("#zh-display").textContent = `Word Count: ${
  121. selected.split(" ").length
  122. } of ${words.length} (selected) | Pages: ${pages.length}`
  123. } else {
  124. document.querySelector(
  125. "#zh-display"
  126. ).textContent = `Word Count: ${words.length} | Pages: ${pages.length}`
  127. }
  128. }
  129.  
  130. setInterval(setCount, 1000)
  131. }
  132.  
  133. // #region - Google Docs Utils
  134. // - - - - - - - - - - - - - - - - - - - -
  135. // General
  136. // - - - - - - - - - - - - - - - - - - - -
  137.  
  138. const classNames = {
  139. paragraph: ".kix-paragraphrenderer",
  140. line: ".kix-lineview",
  141. selectionOverlay: ".kix-selection-overlay",
  142. wordNode: ".kix-wordhtmlgenerator-word-node",
  143. cursor: ".kix-cursor",
  144. cursorName: ".kix-cursor-name",
  145. cursorCaret: ".kix-cursor-caret",
  146. }
  147.  
  148. /**
  149. * Google Docs like to add \u200B, \u200C (&zwnj) and non breaking spaces to make sure
  150. * the browser shows the text correct. When getting the text, we would prefer to get
  151. * clean text.
  152. *
  153. * @param {string} text - ?
  154. * @returns {string} clean text
  155. */
  156. function cleanDocumentText(text) {
  157. let cleanedText = text.replace(/[\u200B\u200C]/g, "")
  158. const nonBreakingSpaces = String.fromCharCode(160)
  159. const regex = new RegExp(nonBreakingSpaces, "g")
  160. cleanedText = cleanedText.replace(regex, " ")
  161. return cleanedText
  162. }
  163.  
  164. // - - - - - - - - - - - - - - - - - - - -
  165. // Get Google Document
  166. // - - - - - - - - - - - - - - - - - - - -
  167.  
  168. /**
  169. * Finds all the text and the caret position in the .
  170. *
  171. * @returns {GoogleDoc} google docs document
  172. */
  173. function getGoogleDocument() {
  174. let caret, caretRect
  175. let caretIndex = 0
  176. let caretLineIndex = 0
  177. let caretLine = 0
  178. const text = []
  179. const nodes = []
  180. let lineCount = 0
  181. let globalIndex = 0
  182. let selectedText = ""
  183. let exportedSelectionRect
  184. const paragraphRenderers = document.querySelectorAll(classNames.paragraph)
  185.  
  186. if (containsUserCaretDom()) {
  187. caret = getUserCaretDom()
  188. caretRect = caret.getBoundingClientRect()
  189. }
  190.  
  191. for (const paragraphRenderer of paragraphRenderers) {
  192. const lineViews = paragraphRenderer.querySelectorAll(classNames.line)
  193. for (const lineView of lineViews) {
  194. let lineText = ""
  195. const selectionOverlays = lineView.querySelectorAll(
  196. classNames.selectionOverlay
  197. )
  198. const wordhtmlgeneratorWordNodes = lineView.querySelectorAll(
  199. classNames.wordNode
  200. )
  201. for (const wordhtmlgeneratorWordNode of wordhtmlgeneratorWordNodes) {
  202. const wordhtmlgeneratorWordNodeRect =
  203. wordhtmlgeneratorWordNode.getBoundingClientRect()
  204. if (
  205. caretRect &&
  206. doesRectsOverlap(wordhtmlgeneratorWordNodeRect, caretRect)
  207. ) {
  208. const caretXStart =
  209. caretRect.left - wordhtmlgeneratorWordNodeRect.left
  210. const localCaretIndex = getLocalCaretIndex(
  211. caretXStart,
  212. wordhtmlgeneratorWordNode,
  213. lineView
  214. )
  215. caretIndex = globalIndex + localCaretIndex
  216. caretLineIndex = lineText.length + localCaretIndex
  217. caretLine = lineCount
  218. }
  219. const nodeText = cleanDocumentText(
  220. wordhtmlgeneratorWordNode.textContent
  221. )
  222. nodes.push({
  223. index: globalIndex,
  224. line: lineCount,
  225. lineIndex: lineText.length,
  226. node: wordhtmlgeneratorWordNode,
  227. lineElement: lineView,
  228. text: nodeText,
  229. })
  230.  
  231. for (const selectionOverlay of selectionOverlays) {
  232. const selectionRect = selectionOverlay.getBoundingClientRect()
  233.  
  234. if (selectionRect) exportedSelectionRect = selectionRect
  235.  
  236. if (
  237. doesRectsOverlap(
  238. wordhtmlgeneratorWordNodeRect,
  239. selectionOverlay.getBoundingClientRect()
  240. )
  241. ) {
  242. const selectionStartIndex = getLocalCaretIndex(
  243. selectionRect.left - wordhtmlgeneratorWordNodeRect.left,
  244. wordhtmlgeneratorWordNode,
  245. lineView
  246. )
  247. const selectionEndIndex = getLocalCaretIndex(
  248. selectionRect.left +
  249. selectionRect.width -
  250. wordhtmlgeneratorWordNodeRect.left,
  251. wordhtmlgeneratorWordNode,
  252. lineView
  253. )
  254. selectedText += nodeText.slice(
  255. selectionStartIndex,
  256. selectionEndIndex
  257. )
  258. }
  259. }
  260.  
  261. globalIndex += nodeText.length
  262. lineText += nodeText
  263. }
  264. text.push(lineText)
  265. lineCount += 1
  266. }
  267. }
  268. return {
  269. nodes,
  270. text,
  271. selectedText,
  272. caret: {
  273. index: caretIndex,
  274. lineIndex: caretLineIndex,
  275. line: caretLine,
  276. },
  277. selectionRect: exportedSelectionRect,
  278. }
  279. }
  280.  
  281. // http://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
  282. /**
  283. * @param {DOMRect} RectA - ?
  284. * @param {DOMRect} RectB - ?
  285. * @returns {boolean} overlapping?
  286. */
  287. function doesRectsOverlap(RectA, RectB) {
  288. return (
  289. RectA.left <= RectB.right &&
  290. RectA.right >= RectB.left &&
  291. RectA.top <= RectB.bottom &&
  292. RectA.bottom >= RectB.top
  293. )
  294. }
  295.  
  296. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  297. /**
  298. * @returns {boolean} does the kix-cursor contain a kix-cursor-name dom
  299. */
  300. function containsUserCaretDom() {
  301. const carets = document.querySelectorAll(classNames.cursor)
  302.  
  303. for (const caret of carets) {
  304. const nameDom = caret.querySelectorAll(classNames.cursorName)
  305. const name = nameDom[0].textContent
  306. if (!name) return true
  307. }
  308.  
  309. return false
  310. }
  311.  
  312. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  313. /**
  314. * @returns {Element} user caret
  315. */
  316. function getUserCaretDom() {
  317. const carets = document.querySelectorAll(classNames.cursor)
  318. for (const caret of carets) {
  319. const nameDom = caret.querySelectorAll(classNames.cursorName)
  320. const name = nameDom[0].textContent
  321. if (!name) return caret.querySelectorAll(classNames.cursorCaret)[0]
  322. }
  323.  
  324. throw new Error("Could not find the users cursor")
  325. }
  326.  
  327. /**
  328. * @param {number} caretX - The x coordinate on where the element the caret is located
  329. * @param {Element} element - The element on which contains the text where in the caret position is
  330. * @param {Element} simulateElement - ?Doing the calculation of the caret position, we need to create a temporary DOM, the DOM will be created as a child to the simulatedElement.
  331. * @returns {number} caret index on the innerText of the element
  332. */
  333. function getLocalCaretIndex(caretX, element, simulateElement) {
  334. // Creates a span DOM for each letter
  335. const text = cleanDocumentText(element.textContent)
  336. const container = document.createElement("div")
  337. const letterSpans = []
  338. for (const ch of text) {
  339. const textNode = document.createElement("span")
  340. textNode.textContent = ch
  341. textNode.style.cssText = element.style.cssText
  342. // "pre" = if there are multiple white spaces, they will all be rendered. Default behavior is for them to be collapsed
  343. textNode.style.whiteSpace = "pre"
  344. letterSpans.push(textNode)
  345. container.append(textNode)
  346. }
  347. container.style.whiteSpace = "nowrap"
  348. simulateElement.append(container)
  349.  
  350. // The caret is usually at the edge of the letter, we find the edge we are closest to.
  351. let index = 0
  352. let currentMinimumDistance = -1
  353. const containerRect = container.getBoundingClientRect()
  354.  
  355. for (const [i, letterSpan] of letterSpans.entries()) {
  356. const rect = letterSpan.getBoundingClientRect()
  357. const left = rect.left - containerRect.left
  358. const right = left + rect.width
  359. if (currentMinimumDistance === -1) {
  360. currentMinimumDistance = Math.abs(caretX - left)
  361. }
  362. const leftDistance = Math.abs(caretX - left)
  363. const rightDistance = Math.abs(caretX - right)
  364.  
  365. if (leftDistance <= currentMinimumDistance) {
  366. index = i
  367. currentMinimumDistance = leftDistance
  368. }
  369.  
  370. if (rightDistance <= currentMinimumDistance) {
  371. index = i + 1
  372. currentMinimumDistance = rightDistance
  373. }
  374. }
  375.  
  376. // Clean up
  377. container.remove()
  378. return index
  379. }
  380.  
  381. displayCount()