Google Docs - Word Count with Options

adds a word counter with options to Google Docs

当前为 2021-01-04 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Google Docs - Word Count with Options
  3. // @namespace https://zachhardesty.com
  4. // @author Zach Hardesty <zachhardesty7@users.noreply.github.com> (https://github.com/zachhardesty7)
  5. // @description adds a word counter with options to Google Docs
  6. // @copyright 2019, Zach Hardesty (https://zachhardesty.com/)
  7. // @license GPL-3.0-only; http://www.gnu.org/licenses/gpl-3.0.txt
  8. // @version 1.0.0
  9.  
  10. // @homepageURL https://github.com/zachhardesty7/tamper-monkey-scripts-collection/raw/master/google-docs-word-count.user.js
  11. // @homepageURL https://openuserjs.org/scripts/zachhardesty7/Google_Docs_-_Word_Count_(With_Options)
  12. // @supportURL https://openuserjs.org/scripts/zachhardesty7/Google_Docs_-_Word_Count_(With_Options)/issues
  13.  
  14.  
  15. // @match https://docs.google.com/document/*
  16. // ==/UserScript==
  17. // heavy inspiration from:
  18. // https://greasyfork.org/en/scripts/22057-google-docs-wordcount/code
  19. // https://stackoverflow.com/questions/951021/what-is-the-javascript-version-of-sleep
  20.  
  21. // strikingly complex (uses DOM bounding boxes) to get currently selected text:
  22. // may implement only necessary functions to save space, library size: (15.4 KB)
  23. // https://github.com/JensPLarsen/ChromeExtension-GoogleDocsUtil
  24.  
  25. const displayCount = () => {
  26. // words not counted between these when true
  27. const BRACKETS = true
  28. const PARENTHESIS = true
  29. const QUOTES = true
  30. const MISC = true // skips works cited, personal titles
  31.  
  32. const SELECTED = true // if selected text present, word count only counts it
  33.  
  34. const display = document.createElement("div")
  35. display.id = "zh-display"
  36. display.setAttribute(
  37. "style",
  38. `
  39. position: fixed;
  40. width: 100%;
  41. left: 0px;
  42. bottom: 0px;
  43. color: rgba(0,0,0,.7);
  44. height: 15px;
  45. background-color: #ededee;
  46. z-index: 100;
  47. font-family: Arial;
  48. font-size: 12px;
  49. padding-top: 5px;
  50. padding-left: 5px;
  51. border-top: 1px solid #d9d9d9;
  52. `
  53. )
  54. document.querySelector("body").append(display)
  55.  
  56. /**
  57. * update the word count
  58. */
  59. async function setCount() {
  60. const doc = getGoogleDocument()
  61. let selected = doc.selectedText
  62. console.log("selected", selected)
  63.  
  64. const pages = document.querySelector(".kix-paginateddocumentplugin")
  65. .children[1].children
  66. let body = ""
  67. for (const page of pages) {
  68. // pages that are unloaded will appear to have no text
  69. // add a marker to the cumulative body to indicate that
  70. // a word count should not be displayed
  71. if (page.textContent === "") body += " ~~ "
  72. body += page.textContent
  73. }
  74.  
  75. // clean extra spaces
  76. body = body.replace(/\u00A0/g, " ").trim()
  77.  
  78. // generate regex from settings
  79. // must escape \'s in JS
  80. // in standard regex form:
  81. // /(“(.(?!“))+”)|(\((.(?!\())+\)|\[(.(?!\[))+\])
  82. // |Works Cited(\n.*)*|(Unit \d (Primary Source Analysis|Exam: Part \d - #\d+))/g
  83. const regex = []
  84. if (BRACKETS) regex.push("\\[(.(?!\\[))+\\]")
  85. if (PARENTHESIS) regex.push("\\((.(?!\\())+\\)")
  86. if (QUOTES)
  87. regex.push(
  88. "Works Cited(.|\\n.*)*|(Unit \\d (Primary Source Analysis|Exam: Part \\d( - #\\d+)*))"
  89. )
  90. if (MISC) regex.push("(“(.(?!“))+”)")
  91.  
  92. // apply regex filtering to body
  93. regex.forEach((reg) => {
  94. selected = selected.replace(new RegExp(reg, "g"), " ")
  95. })
  96.  
  97. // apply regex filtering to selected text if necessary
  98. let filtered = body
  99. regex.forEach((reg) => {
  100. filtered = filtered.replace(new RegExp(reg, "g"), " ")
  101. })
  102.  
  103. // remove extra spaces and line breaks and get counts
  104. const words = filtered
  105. .trim()
  106. .replace(/\u00A0/g, " ")
  107. .replace(/ {2,}/g, " ")
  108. .split(" ")
  109. if (words.includes("~~")) {
  110. // empty or unloaded pages present
  111. document.querySelector(
  112. "#zh-display"
  113. ).textContent = `Word Count: (scroll to bottom & remove empty pages) | Pages: ${pages.length}`
  114. } else if (selected.length > 0 && SELECTED) {
  115. selected = selected
  116. .trim()
  117. .replace(/\u00A0/g, " ")
  118. .replace(/ {2,}/g, " ")
  119. console.log("selected", selected)
  120. document.querySelector("#zh-display").textContent = `Word Count: ${
  121. selected.split(" ").length
  122. } of ${words.length} (selected) | Pages: ${pages.length}`
  123. } else {
  124. document.querySelector(
  125. "#zh-display"
  126. ).textContent = `Word Count: ${words.length} | Pages: ${pages.length}`
  127. }
  128. }
  129.  
  130. setInterval(setCount, 1000)
  131. }
  132.  
  133. // #region - Google Docs Utils
  134. // - - - - - - - - - - - - - - - - - - - -
  135. // General
  136. // - - - - - - - - - - - - - - - - - - - -
  137.  
  138. const classNames = {
  139. paragraph: ".kix-paragraphrenderer",
  140. line: ".kix-lineview",
  141. selectionOverlay: ".kix-selection-overlay",
  142. wordNode: ".kix-wordhtmlgenerator-word-node",
  143. cursor: ".kix-cursor",
  144. cursorName: ".kix-cursor-name",
  145. cursorCaret: ".kix-cursor-caret",
  146. }
  147.  
  148. /**
  149. * Google Docs like to add \u200B, \u200C (&zwnj) and non breaking spaces to make sure
  150. * the browser shows the text correct. When getting the text, we would prefer to get
  151. * clean text.
  152. *
  153. * @param {string} text - ?
  154. * @returns {string} clean text
  155. */
  156. function cleanDocumentText(text) {
  157. let cleanedText = text.replace(/[\u200B\u200C]/g, "")
  158. const nonBreakingSpaces = String.fromCharCode(160)
  159. const regex = new RegExp(nonBreakingSpaces, "g")
  160. cleanedText = cleanedText.replace(regex, " ")
  161. return cleanedText
  162. }
  163.  
  164. // - - - - - - - - - - - - - - - - - - - -
  165. // Get Google Document
  166. // - - - - - - - - - - - - - - - - - - - -
  167.  
  168. /**
  169. * Finds all the text and the caret position in the .
  170. *
  171. * @returns {GoogleDoc} google docs document
  172. */
  173. function getGoogleDocument() {
  174. let caret, caretRect
  175. let caretIndex = 0
  176. let caretLineIndex = 0
  177. let caretLine = 0
  178. const text = []
  179. const nodes = []
  180. let lineCount = 0
  181. let globalIndex = 0
  182. let selectedText = ""
  183. let exportedSelectionRect
  184. const paragraphRenderers = document.querySelectorAll(classNames.paragraph)
  185.  
  186. if (containsUserCaretDom()) {
  187. caret = getUserCaretDom()
  188. caretRect = caret.getBoundingClientRect()
  189. }
  190.  
  191. for (const paragraphRenderer of paragraphRenderers) {
  192. const lineViews = paragraphRenderer.querySelectorAll(classNames.line)
  193. for (const lineView of lineViews) {
  194. let lineText = ""
  195. const selectionOverlays = lineView.querySelectorAll(
  196. classNames.selectionOverlay
  197. )
  198. const wordhtmlgeneratorWordNodes = lineView.querySelectorAll(
  199. classNames.wordNode
  200. )
  201. for (const wordhtmlgeneratorWordNode of wordhtmlgeneratorWordNodes) {
  202. const wordhtmlgeneratorWordNodeRect = wordhtmlgeneratorWordNode.getBoundingClientRect()
  203. if (
  204. caretRect &&
  205. doesRectsOverlap(wordhtmlgeneratorWordNodeRect, caretRect)
  206. ) {
  207. const caretXStart =
  208. caretRect.left - wordhtmlgeneratorWordNodeRect.left
  209. const localCaretIndex = getLocalCaretIndex(
  210. caretXStart,
  211. wordhtmlgeneratorWordNode,
  212. lineView
  213. )
  214. caretIndex = globalIndex + localCaretIndex
  215. caretLineIndex = lineText.length + localCaretIndex
  216. caretLine = lineCount
  217. }
  218. const nodeText = cleanDocumentText(
  219. wordhtmlgeneratorWordNode.textContent
  220. )
  221. nodes.push({
  222. index: globalIndex,
  223. line: lineCount,
  224. lineIndex: lineText.length,
  225. node: wordhtmlgeneratorWordNode,
  226. lineElement: lineView,
  227. text: nodeText,
  228. })
  229.  
  230. for (const selectionOverlay of selectionOverlays) {
  231. const selectionRect = selectionOverlay.getBoundingClientRect()
  232.  
  233. if (selectionRect) exportedSelectionRect = selectionRect
  234.  
  235. if (
  236. doesRectsOverlap(
  237. wordhtmlgeneratorWordNodeRect,
  238. selectionOverlay.getBoundingClientRect()
  239. )
  240. ) {
  241. const selectionStartIndex = getLocalCaretIndex(
  242. selectionRect.left - wordhtmlgeneratorWordNodeRect.left,
  243. wordhtmlgeneratorWordNode,
  244. lineView
  245. )
  246. const selectionEndIndex = getLocalCaretIndex(
  247. selectionRect.left +
  248. selectionRect.width -
  249. wordhtmlgeneratorWordNodeRect.left,
  250. wordhtmlgeneratorWordNode,
  251. lineView
  252. )
  253. selectedText += nodeText.slice(
  254. selectionStartIndex,
  255. selectionEndIndex
  256. )
  257. }
  258. }
  259.  
  260. globalIndex += nodeText.length
  261. lineText += nodeText
  262. }
  263. text.push(lineText)
  264. lineCount += 1
  265. }
  266. }
  267. return {
  268. nodes,
  269. text,
  270. selectedText,
  271. caret: {
  272. index: caretIndex,
  273. lineIndex: caretLineIndex,
  274. line: caretLine,
  275. },
  276. selectionRect: exportedSelectionRect,
  277. }
  278. }
  279.  
  280. // http://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
  281. /**
  282. * @param {DOMRect} RectA - ?
  283. * @param {DOMRect} RectB - ?
  284. * @returns {boolean} overlapping?
  285. */
  286. function doesRectsOverlap(RectA, RectB) {
  287. return (
  288. RectA.left <= RectB.right &&
  289. RectA.right >= RectB.left &&
  290. RectA.top <= RectB.bottom &&
  291. RectA.bottom >= RectB.top
  292. )
  293. }
  294.  
  295. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  296. /**
  297. * @returns {boolean} does the kix-cursor contain a kix-cursor-name dom
  298. */
  299. function containsUserCaretDom() {
  300. const carets = document.querySelectorAll(classNames.cursor)
  301.  
  302. for (const caret of carets) {
  303. const nameDom = caret.querySelectorAll(classNames.cursorName)
  304. const name = nameDom[0].textContent
  305. if (!name) return true
  306. }
  307.  
  308. return false
  309. }
  310.  
  311. // The kix-cursor contain a kix-cursor-name dom, which is only set when it is not the users cursor
  312. /**
  313. * @returns {Element} user caret
  314. */
  315. function getUserCaretDom() {
  316. const carets = document.querySelectorAll(classNames.cursor)
  317. for (const caret of carets) {
  318. const nameDom = caret.querySelectorAll(classNames.cursorName)
  319. const name = nameDom[0].textContent
  320. if (!name) return caret.querySelectorAll(classNames.cursorCaret)[0]
  321. }
  322.  
  323. throw new Error("Could not find the users cursor")
  324. }
  325.  
  326. /**
  327. * @param {number} caretX - The x coordinate on where the element the caret is located
  328. * @param {Element} element - The element on which contains the text where in the caret position is
  329. * @param {Element} simulateElement - ?Doing the calculation of the caret position, we need to create a temporary DOM, the DOM will be created as a child to the simulatedElement.
  330. * @returns {number} caret index on the innerText of the element
  331. */
  332. function getLocalCaretIndex(caretX, element, simulateElement) {
  333. // Creates a span DOM for each letter
  334. const text = cleanDocumentText(element.textContent)
  335. const container = document.createElement("div")
  336. const letterSpans = []
  337. for (const ch of text) {
  338. const textNode = document.createElement("span")
  339. textNode.textContent = ch
  340. textNode.style.cssText = element.style.cssText
  341. // "pre" = if there are multiple white spaces, they will all be rendered. Default behavior is for them to be collapsed
  342. textNode.style.whiteSpace = "pre"
  343. letterSpans.push(textNode)
  344. container.append(textNode)
  345. }
  346. container.style.whiteSpace = "nowrap"
  347. simulateElement.append(container)
  348.  
  349. // The caret is usually at the edge of the letter, we find the edge we are closest to.
  350. let index = 0
  351. let currentMinimumDistance = -1
  352. const containerRect = container.getBoundingClientRect()
  353.  
  354. letterSpans.forEach((letterSpan, i) => {
  355. const rect = letterSpan.getBoundingClientRect()
  356. const left = rect.left - containerRect.left
  357. const right = left + rect.width
  358. if (currentMinimumDistance === -1) {
  359. currentMinimumDistance = Math.abs(caretX - left)
  360. }
  361. const leftDistance = Math.abs(caretX - left)
  362. const rightDistance = Math.abs(caretX - right)
  363.  
  364. if (leftDistance <= currentMinimumDistance) {
  365. index = i
  366. currentMinimumDistance = leftDistance
  367. }
  368.  
  369. if (rightDistance <= currentMinimumDistance) {
  370. index = i + 1
  371. currentMinimumDistance = rightDistance
  372. }
  373. })
  374.  
  375. // Clean up
  376. container.remove()
  377. return index
  378. }
  379.  
  380. displayCount()