Anti-Zalgo (Fork)

Strip certain ranges of text that can be used to create Zalgo text from web pages

  1. // ==UserScript==
  2. // @name Anti-Zalgo (Fork)
  3. // @namespace https://greasyfork.org/en/users/2205
  4. // @version 1.1
  5. // @description Strip certain ranges of text that can be used to create Zalgo text from web pages
  6. // @license The Unlicense
  7. // @author NRGLG (original) Rudokhvist(fork)
  8. // @run-at document-end
  9. // @include http://*/*
  10. // @include https://*/*
  11. // @grant none
  12. // ==/UserScript==
  13. /*
  14. This is free and unencumbered software released into the public domain.
  15.  
  16. Anyone is free to copy, modify, publish, use, compile, sell, or
  17. distribute this software, either in source code form or as a compiled
  18. binary, for any purpose, commercial or non-commercial, and by any
  19. means.
  20.  
  21. In jurisdictions that recognize copyright laws, the author or authors
  22. of this software dedicate any and all copyright interest in the
  23. software to the public domain. We make this dedication for the benefit
  24. of the public at large and to the detriment of our heirs and
  25. successors. We intend this dedication to be an overt act of
  26. relinquishment in perpetuity of all present and future rights to this
  27. software under copyright law.
  28.  
  29. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  30. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  31. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  32. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  33. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  34. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  35. OTHER DEALINGS IN THE SOFTWARE.
  36.  
  37. For more information, please refer to <http://unlicense.org/>
  38. */
  39.  
  40. (function()
  41. {
  42. function textNodesUnder(el){
  43. var n, a=[], walk=document.createTreeWalker(el,NodeFilter.SHOW_TEXT,null,false);
  44. while(n=walk.nextNode()) a.push(n);
  45. return a;
  46. }
  47. // more specific filters
  48. // following the Unicode categorization
  49. // this list is incomplete at the moment
  50. var blockedRanges = [
  51. ////////////////////////////////////////////////
  52. // European Scripts
  53. [0xFB1E, 0xFB1E], // Alphabetic Presentation Forms
  54.  
  55. [0x0483, 0x0489], // Cyrillic
  56.  
  57. [0x2DE0, 0x2DFF], // Cyrillic Extended-A
  58.  
  59. [0xA66F, 0xA672], // Cyrillic Extended-B
  60. [0xA674, 0xA67D], // Cyrillic Extended-B
  61. [0xA69E, 0xA69F], // Cyrillic Extended-B
  62.  
  63. [0x10376, 0x1037A], // Old Permic
  64.  
  65. [0x101FD, 0x101FD], // Phaistos Disc
  66.  
  67. ////////////////////////////////////////////////
  68. // Combining Marks
  69. [0x0300, 0x036F], // Combining Diacritical Marks
  70. [0x1AB0, 0x1ABE], // Combining Diacritical Marks Extended
  71. [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
  72. [0x20D0, 0x20F0], // Combining Diacritical Marks for Symbols
  73. [0xFE20, 0xFE2F], // Combining Half Marks
  74.  
  75. ////////////////////////////////////////////////
  76. // African Scripts
  77. [0xA6F0, 0xA6F1], // Bamum
  78.  
  79. [0x16AF0, 0x16AF4], // Bassa Vah
  80.  
  81. [0x2CEF, 0x2CF1], // Coptic
  82.  
  83. [0x102E0, 0x102E0], // Coptic Epact Numbers
  84.  
  85. [0x1E8D0, 0x1E8D6], // Mende Kikakui
  86.  
  87. [0x07EB, 0x07F3], // N'Ko
  88.  
  89. [0x2D7F, 0x2D7F], // Tifinagh
  90.  
  91. ////////////////////////////////////////////////
  92. // Middle Eastern Scripts
  93. [0x0610, 0x061A], // Arabic
  94. [0x064B, 0x065F], // Arabic
  95. [0x0670, 0x0670], // Arabic +
  96. [0x0674, 0x0674], // Arabic +
  97. [0x06D6, 0x06DC], // Arabic
  98. [0x06DF, 0x06E4], // Arabic
  99. [0x06E7, 0x06E8], // Arabic
  100. [0x06EA, 0x06ED], // Arabic
  101.  
  102. [0x08E4, 0x08FF], // Arabic Extended-A
  103.  
  104. [0x0591, 0x05BD], // Hebrew
  105. [0x05BF, 0x05BF], // Hebrew
  106. [0x05C1, 0x05C2], // Hebrew
  107. [0x05C4, 0x05C5], // Hebrew
  108. [0x05C7, 0x05C7], // Hebrew
  109.  
  110. [0x0859, 0x085B], // Mandaic
  111.  
  112. [0x0816, 0x0819], // Samaritan
  113. [0x081B, 0x0823], // Samaritan
  114. [0x0825, 0x0827], // Samaritan
  115. [0x0829, 0x082D], // Samaritan
  116.  
  117. [0x0711, 0x0711], // Syriac
  118. [0x0730, 0x074A], // Syriac
  119.  
  120. ////////////////////////////////////////////////
  121. // Central Asian Scripts
  122. [0x10AE5, 0x10AE6], // Manichaean
  123.  
  124. [0x18A9, 0x18A9], // Mongolian
  125.  
  126. [0x0F00, 0x0FDA], // Tibetan (I'm going to block this whole language)
  127.  
  128. ////////////////////////////////////////////////
  129. // South Asian Scripts
  130. [0x1171D, 0x1172B], // Ahom
  131.  
  132. [0x0980, 0x0983], // Bengali
  133. [0x09BC, 0x09BC], // Bengali
  134. [0x09BE, 0x09C4], // Bengali
  135. [0x09C7, 0x09C8], // Bengali
  136. [0x09CB, 0x09CE], // Bengali
  137. [0x09D7, 0x09D7], // Bengali
  138. [0x09E2, 0x09E3], // Bengali
  139.  
  140. [0x11000, 0x11002], // Brahmi
  141. [0x11038, 0x11046], // Brahmi
  142.  
  143. [0x11100, 0x11102], // Chakma
  144. [0x11127, 0x11134], // Chakma
  145.  
  146. [0x0900, 0x0903], // Devanagari
  147. [0x093A, 0x094F], // Devanagari
  148. [0x0951, 0x0957], // Devanagari
  149. [0x0962, 0x0963], // Devanagari
  150.  
  151. [0xA8E0, 0xA8F0], // Devanagari Extended
  152.  
  153. [0x11300, 0x11303], // Grantha
  154. [0x1133C, 0x1133C], // Grantha
  155. [0x1133E, 0x1134D], // Grantha
  156. [0x11357, 0x11357], // Grantha
  157. [0x11362, 0x11374], // Grantha
  158.  
  159. [0x0A81, 0x0A83], // Gujarati
  160. [0x0ABC, 0x0ABC], // Gujarati
  161. [0x0ABE, 0x0ACD], // Gujarati
  162. [0x0AE2, 0x0AE3], // Gujarati
  163.  
  164. [0x0A01, 0x0A03], // Gurmukhi
  165. [0x0A3C, 0x0A51], // Gurmukhi
  166. [0x0A70, 0x0A71], // Gurmukhi
  167. [0x0A75, 0x0A75], // Gurmukhi
  168.  
  169. [0x11080, 0x11082], // Kaithi
  170. [0x110B0, 0x110BA], // Kaithi
  171.  
  172. [0x0C81, 0x0C83], // Kannada
  173. [0x0CBC, 0x0CBC], // Kannada
  174. [0x0CBE, 0x0CD6], // Kannada
  175. [0x0CE2, 0x0CE3], // Kannada
  176.  
  177. // assorted
  178. [0x1920, 0x193B], // Limbu
  179.  
  180. [0x0D01, 0x0D03], // Malayalam
  181. [0x0D3E, 0x0D57], // Malayalam
  182. [0x0D62, 0x0D63], // Malayalam
  183.  
  184. [0x0D82, 0x0D83], // Sinhala
  185. [0x0DCA, 0x0DDF], // Sinhala
  186. [0x0DF2, 0x0DF3], // Sinhala
  187.  
  188. [0x0C00, 0x0C03], // Telugu
  189. [0x0C3E, 0x0C56], // Telugu
  190. [0x0C62, 0x0C63], // Telugu
  191.  
  192. [0x1CD0, 0x1CD2], // Vedic Extensions
  193. [0x1CD4, 0x1CE8], // Vedic Extensions
  194. [0x1CED, 0x1CED], // Vedic Extensions
  195. [0x1CF2, 0x1CF4], // Vedic Extensions
  196. [0x1CF8, 0x1CF9], // Vedic Extensions
  197.  
  198. ////////////////////////////////////////////////
  199. // Southeast Asian Scripts
  200.  
  201. [0x0E31, 0x0E31], // Thai
  202. [0x0E34, 0x0E3A], // Thai
  203. [0x0E47, 0x0E4F] // Thai
  204.  
  205. ////////////////////////////////////////////////
  206. // Indonesia & Oceania Scripts
  207.  
  208. ////////////////////////////////////////////////
  209. // East Asian Scripts
  210.  
  211. ////////////////////////////////////////////////
  212. // American Scripts
  213.  
  214. ////////////////////////////////////////////////
  215. // Other
  216.  
  217. ];
  218.  
  219. function clearZalgo(value,index)
  220. {
  221. var text = value.textContent;
  222. var finalText = '';
  223. var dirty = false;
  224.  
  225. for (var i = 0; i < text.length; i++)
  226. {
  227. var charCode = text.charCodeAt(i);
  228.  
  229. // If in ASCII and ASCII extended limit, don't bother
  230. // this will drastically decrease processing time
  231. if (charCode >= 256)
  232. {
  233. var filterIndex = (blockedRanges.length / 2) | 0;
  234. var size = (blockedRanges.length / 4) | 0;
  235. var lastSize = size;
  236.  
  237. while (true)
  238. {
  239. if (blockedRanges[filterIndex][1] < charCode)
  240. {
  241. filterIndex = (filterIndex + size) | 0;
  242. size = Math.ceil(size / 2) | 0;
  243. }
  244. else if (blockedRanges[filterIndex][0] > charCode)
  245. {
  246. filterIndex = (filterIndex - size) | 0;
  247. size = Math.ceil(size / 2) | 0;
  248. }
  249. else
  250. {
  251. break;
  252. }
  253.  
  254. if (lastSize == size) break;
  255. lastSize = size;
  256. }
  257.  
  258. var filterA = blockedRanges[Math.max(filterIndex - 1, 0)];
  259. var filterB = blockedRanges[filterIndex];
  260.  
  261. if ((charCode >= filterB[0] && charCode <= filterB[1]) ||
  262. (charCode >= filterA[0] && charCode <= filterA[1]))
  263. {
  264. charCode = 0;
  265. if (!dirty)
  266. {
  267. dirty = true;
  268. finalText = text.substr(0, i);
  269. }
  270. }
  271. }
  272.  
  273. if (dirty && charCode > 0){
  274. finalText += text.charAt(i);
  275. }
  276. }
  277.  
  278. if (dirty){
  279. value.textContent = finalText;
  280. }
  281. }
  282.  
  283. // even out the arrays with this since nulls wouldn't show up anyways
  284. if ((blockedRanges.length % 2) == 1){
  285. blockedRanges.unshift([0, 0]);
  286. }
  287.  
  288. blockedRanges.sort(function(a, b)
  289. {
  290. if (a[0] < b[0]) return -1;
  291. if (a[0] > b[0]) return 1;
  292. return 0;
  293. });
  294. //clear static text
  295. let allTextNodes = textNodesUnder(document.body);
  296. let notEmptyTextNodes = Array.prototype.filter.call(allTextNodes, function (elem) {return elem.textContent.trim() !== ''; });
  297. notEmptyTextNodes.forEach(clearZalgo);
  298. //clear dynamic text
  299. let mutationObserver = new MutationObserver(function(mutations) {
  300. mutations.forEach(function(mutation) {
  301. let allTextNodes = textNodesUnder(mutation.target)
  302. let notEmptyTextNodes = Array.prototype.filter.call(allTextNodes, function (elem) {return elem.textContent.trim() !== ''; });
  303. notEmptyTextNodes.forEach(clearZalgo);
  304. });
  305. });
  306. mutationObserver.observe(document.documentElement, {
  307. childList: true,
  308. characterData: true,
  309. subtree: true
  310. });
  311. })();