Google Docs - Word Count with Options

adds a word counter with options to Google Docs

目前为 2021-06-28 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name Google Docs - Word Count with Options
  3. // @namespace https://zachhardesty.com
  4. // @author Zach Hardesty <zachhardesty7@users.noreply.github.com> (https://github.com/zachhardesty7)
  5. // @description adds a word counter with options to Google Docs
  6. // @copyright 2019, Zach Hardesty (https://zachhardesty.com/)
  7. // @license GPL-3.0-only; http://www.gnu.org/licenses/gpl-3.0.txt
  8. // @version 1.0.1
  9.  
  10. // @homepageURL https://github.com/zachhardesty7/tamper-monkey-scripts-collection/raw/master/google-docs-word-count.user.js
  11. // @homepageURL https://openuserjs.org/scripts/zachhardesty7/Google_Docs_-_Word_Count_(With_Options)
  12. // @supportURL https://github.com/zachhardesty7/tamper-monkey-scripts-collection/issues
  13.  
  14.  
  15. // @match https://docs.google.com/document/*
  16. // ==/UserScript==
  17. // heavy inspiration from:
  18. // https://greasyfork.org/en/scripts/22057-google-docs-wordcount/code
  19. // https://stackoverflow.com/questions/951021/what-is-the-javascript-version-of-sleep
  20.  
  21. // strikingly complex (uses DOM bounding boxes) to get currently selected text:
  22. // may implement only necessary functions to save space, library size: (15.4 KB)
  23. // https://github.com/JensPLarsen/ChromeExtension-GoogleDocsUtil
  24.  
  25. const displayCount = () => {
  26. // words not counted between these when true
  27. const BRACKETS = true
  28. const PARENTHESIS = true
  29. const QUOTES = true
  30. const MISC = true // skips works cited, personal titles
  31.  
  32. const SELECTED = true // if selected text present, word count only counts it
  33.  
  34. const display = document.createElement("div")
  35. display.id = "zh-display"
  36. display.setAttribute(
  37. "style",
  38. `
  39. position: fixed;
  40. width: 100%;
  41. left: 0px;
  42. bottom: 0px;
  43. color: rgba(0,0,0,.7);
  44. height: 15px;
  45. background-color: #ededee;
  46. z-index: 100;
  47. font-family: Arial;
  48. font-size: 12px;
  49. padding-top: 5px;
  50. padding-left: 5px;
  51. border-top: 1px solid #d9d9d9;
  52. `
  53. )
  54. document.querySelector("body").append(display)
  55.  
  56. /**
  57. * update the word count
  58. */
  59. async function setCount() {
  60. const doc = getGoogleDocument()
  61. let selected = doc.selectedText
  62. console.log("selected", selected)
  63.  
  64. const pages = document.querySelector(".kix-paginateddocumentplugin")
  65. .children[1].children
  66. let body = ""
  67. for (const page of pages) {
  68. // pages that are unloaded will appear to have no text
  69. // add a marker to the cumulative body to indicate that
  70. // a word count should not be displayed
  71. if (page.textContent === "") body += " ~~ "
  72. body += page.textContent
  73. }
  74.  
  75. // clean extra spaces
  76. body = body.replace(/\u00A0/g, " ").trim()
  77.  
  78. // generate regex from settings
  79. // must escape \'s in JS
  80. // in standard regex form:
  81. // /(“(.(?!“))+”)|(\((.(?!\())+\)|\[(.(?!\[))+\])
  82. // |Works Cited(\n.*)*|(Unit \d (Primary Source Analysis|Exam: Part \d - #\d+))/g
  83. const regex = []
  84. if (BRACKETS) regex.push("\\[(.(?!\\[))+\\]")
  85. if (PARENTHESIS) regex.push("\\((.(?!\\())+\\)")
  86. if (QUOTES)
  87. regex.push(
  88. "Works Cited(.|\\n.*)*|(Unit \\d (Primary Source Analysis|Exam: Part \\d( - #\\d+)*))"
  89. )
  90. if (MISC) regex.push("(“(.(?!“))+”)")
  91.  
  92. // apply regex filtering to body
  93. for (const reg of regex) {
  94. selected = selected.replace(new RegExp(reg, "g"), " ")
  95. }
  96.  
  97. // apply regex filtering to selected text if necessary
  98. let filtered = body
  99. for (const reg of regex) {
  100. filtered = filtered.replace(new RegExp(reg, "g"), " ")
  101. }
  102.  
  103. // remove extra spaces and line breaks and get counts
  104. const words = filtered
  105. .trim()
  106. .replace(/\u00A0/g, " ")
  107. .replace(/ {2,}/g, " ")
  108. .split(" ")
  109. if (words.includes("~~")) {
  110. // empty or unloaded pages present
  111. document.querySelector(
  112. "#zh-display"
  113. ).textContent = `Word Count: (scroll to bottom & remove empty pages) | Pages: ${pages.length}`
  114. } else if (selected.length > 0 && SELECTED) {
  115. selected = selected
  116. .trim()
  117. .replace(/\u00A0/g, " ")
  118. .replace(/ {2,}/g, " ")
  119. console.log("selected", selected)
  120. document.querySelector("#zh-display").textContent = `Word Count: ${
  121. selected.split(" ").length
  122. } of ${words.length} (selected) | Pages: ${pages.length}`
  123. } else {
  124. document.querySelector(
  125. "#zh-display"
  126. ).textContent = `Word Count: ${words.length} | Pages: ${pages.length}`
  127. }
  128. }
  129.  
  130. setInterval(setCount, 1000)
  131. }
  132.  
  133. // #region - Google Docs Utils
  134. // - - - - - - - - - - - - - - - - - - - -
  135. // General
  136. // - - - - - - - - - - - - - - - - - - - -
  137.  
  138. const classNames = {
  139. paragraph: ".kix-paragraphrenderer",
  140. line: ".kix-lineview",
  141. selectionOverlay: ".kix-selection-overlay",
  142. wordNode: ".kix-wordhtmlgenerator-word-node",
  143. cursor: ".kix-cursor",
  144. cursorName: ".kix-cursor-name",
  145. cursorCaret: ".kix-cursor-caret",
  146. }
  147.  
  148. /**
  149. * Google Docs like to add \u200B, \u200C (&zwnj) and non breaking spaces to make sure
  150. * the browser shows the text correct. When getting the text, we would prefer to get
  151. * clean text.
  152. *
  153. * @param {string} text - ?
  154. * @returns {string} clean text
  155. */
  156. function cleanDocumentText(text) {
  157. let cleanedText = text.replace(/[\u200B\u200C]/g, "")
  158. const nonBreakingSpaces = String.fromCharCode(160)
  159. const regex = new RegExp(nonBreakingSpaces, "g")
  160. cleanedText = cleanedText.replace(regex, " ")
  161. return cleanedText
  162. }
  163.  
  164. // - - - - - - - - - - - - - - - - - - - -
  165. // Get Google Document
  166. // - - - - - - - - - - - - - - - - - - - -
  167.  
  168. /**
  169. * Finds all the text and the caret position in the .
  170. *
  171. * @returns {GoogleDoc} google docs document
  172. */
  173. function getGoogleDocument() {
  174. let caret, caretRect
  175. let caretIndex = 0
  176. let caretLineIndex = 0
  177. let caretLine = 0
  178. const text = []
  179. const nodes = []
  180. let lineCount = 0
  181. let globalIndex = 0
  182. let selectedText = ""
  183. let exportedSelectionRect
  184. const paragraphRenderers = document.querySelectorAll(classNames.paragraph)
  185.  
  186. if (containsUserCaretDom()) {
  187. caret = getUserCaretDom()
  188. caretRect = caret.getBoundingClientRect()
  189. }
  190.  
  191. for (const paragraphRenderer of paragraphRenderers) {
  192. const lineViews = paragraphRenderer.querySelectorAll(classNames.line)
  193. for (const lineView of lineViews) {
  194. let lineText = ""
  195. const selectionOverlays = lineView.querySelectorAll(
  196. classNames.selectionOverlay
  197. )
  198. const wordhtmlgeneratorWordNodes = lineView.querySelectorAll(
  199. classNames.wordNode
  200. )
  201. for (const wordhtmlgeneratorWordNode of wordhtmlgeneratorWordNodes) {
  202. const wordhtmlgeneratorWordNodeRect =
  203. wordhtmlgeneratorWordNode.getBoundingClientRect()
  204. if (
  205. caretRect &&
  206. doesRectsOverlap(wordhtmlgeneratorWordNodeRect, caretRect)
  207. ) {
  208. const caretXStart =
  209. caretRect.left - wordhtmlgeneratorWordNodeRect.left
  210. const localCaretIndex = getLocalCaretIndex(
  211. caretXStart,
  212. wordhtmlgeneratorWordNode,
  213. lineView
  214. )
  215. caretIndex = globalIndex + localCaretIndex
  216. caretLineIndex = lineText.length + localCaretIndex
  217. caretLine = lineCount
  218. }
  219. const nodeText = cleanDocumentText(
  220. wordhtmlgeneratorWordNode.textContent
  221. )
  222. nodes.push({
  223. index: globalIndex,
  224. line: lineCount,
  225. lineIndex: lineText.length,
  226. node: wordhtmlgeneratorWordNode,
  227. lineElement: lineView,
  228. text: nodeText,
  229. })
  230.  
  231. for (const selectionOverlay of selectionOverlays) {
  232. const selectionRect = selectionOverlay.getBoundingClientRect()
  233.  
  234. if (selectionRect) exportedSelectionRect = selectionRect
  235.  
  236. if (
  237. doesRectsOverlap(
  238. wordhtmlgeneratorWordNodeRect,
  239. selectionOverlay.getBoundingClientRect()
  240. )
  241. ) {
  242. const selectionStartIndex = getLocalCaretIndex(
  243. selectionRect.left - wordhtmlgeneratorWordNodeRect.left,
  244. wordhtmlgeneratorWordNode,
  245. lineView
  246. )
  247. const selectionEndIndex = getLocalCaretIndex(
  248. selectionRect.left +
  249. selectionRect.width -
  250. wordhtmlgeneratorWordNodeRect.left,
  251. wordhtmlgeneratorWordNode,
  252. lineView
  253. )
  254. selectedText += nodeText.slice(
  255. selectionStartIndex,
  256. selectionEndIndex
  257. )
  258. }
  259. }
  260.  
  261. globalIndex += nodeText.length
  262. lineText += nodeText
  263. }
  264. text.push(lineText)
  265. lineCount += 1
  266. }
  267. }
  268. return {
  269. nodes,
  270. text,
  271. selectedText,
  272. caret: {
  273. index: caretIndex,
  274. lineIndex: caretLineIndex,
  275. line: caretLine,
  276. },
  277. selectionRect: exportedSelectionRect,
  278. }
  279. }
  280.  
  281. // http://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
  282. /**
  283. * @param {DOMRect} RectA - ?
  284. * @param {DOMRect} RectB - ?
  285. * @returns {boolean} overlapping?
  286. */
  287. function doesRectsOverlap(RectA, RectB) {
  288. return (
  289. RectA.left <= RectB.right &&
  290. RectA.right >= RectB.left &&
  291. RectA.top <= RectB.bottom &&
  292. RectA.bottom >= RectB.top
  293. )
  294. }
  295.  
  296. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  297. /**
  298. * @returns {boolean} does the kix-cursor contain a kix-cursor-name dom
  299. */
  300. function containsUserCaretDom() {
  301. const carets = document.querySelectorAll(classNames.cursor)
  302.  
  303. for (const caret of carets) {
  304. const nameDom = caret.querySelectorAll(classNames.cursorName)
  305. const name = nameDom[0].textContent
  306. if (!name) return true
  307. }
  308.  
  309. return false
  310. }
  311.  
  312. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  313. /**
  314. * @returns {Element} user caret
  315. */
  316. function getUserCaretDom() {
  317. const carets = document.querySelectorAll(classNames.cursor)
  318. for (const caret of carets) {
  319. const nameDom = caret.querySelectorAll(classNames.cursorName)
  320. const name = nameDom[0].textContent
  321. if (!name) return caret.querySelectorAll(classNames.cursorCaret)[0]
  322. }
  323.  
  324. throw new Error("Could not find the users cursor")
  325. }
  326.  
  327. /**
  328. * @param {number} caretX - The x coordinate on where the element the caret is located
  329. * @param {Element} element - The element on which contains the text where in the caret position is
  330. * @param {Element} simulateElement - ?Doing the calculation of the caret position, we need to create a temporary DOM, the DOM will be created as a child to the simulatedElement.
  331. * @returns {number} caret index on the innerText of the element
  332. */
  333. function getLocalCaretIndex(caretX, element, simulateElement) {
  334. // Creates a span DOM for each letter
  335. const text = cleanDocumentText(element.textContent)
  336. const container = document.createElement("div")
  337. const letterSpans = []
  338. for (const ch of text) {
  339. const textNode = document.createElement("span")
  340. textNode.textContent = ch
  341. textNode.style.cssText = element.style.cssText
  342. // "pre" = if there are multiple white spaces, they will all be rendered. Default behavior is for them to be collapsed
  343. textNode.style.whiteSpace = "pre"
  344. letterSpans.push(textNode)
  345. container.append(textNode)
  346. }
  347. container.style.whiteSpace = "nowrap"
  348. simulateElement.append(container)
  349.  
  350. // The caret is usually at the edge of the letter, we find the edge we are closest to.
  351. let index = 0
  352. let currentMinimumDistance = -1
  353. const containerRect = container.getBoundingClientRect()
  354.  
  355. for (const [i, letterSpan] of letterSpans.entries()) {
  356. const rect = letterSpan.getBoundingClientRect()
  357. const left = rect.left - containerRect.left
  358. const right = left + rect.width
  359. if (currentMinimumDistance === -1) {
  360. currentMinimumDistance = Math.abs(caretX - left)
  361. }
  362. const leftDistance = Math.abs(caretX - left)
  363. const rightDistance = Math.abs(caretX - right)
  364.  
  365. if (leftDistance <= currentMinimumDistance) {
  366. index = i
  367. currentMinimumDistance = leftDistance
  368. }
  369.  
  370. if (rightDistance <= currentMinimumDistance) {
  371. index = i + 1
  372. currentMinimumDistance = rightDistance
  373. }
  374. }
  375.  
  376. // Clean up
  377. container.remove()
  378. return index
  379. }
  380.  
  381. displayCount()