“搞定”CJK!

中文字体和标点设定及修正脚本

目前为 2016-06-17 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name FixCJK!
  3. // @name:zh-CN “搞定”CJK!
  4. // @namespace https://github.com/stecue/fixcjk
  5. // @version 0.14.3
  6. // @description 1) Use real bold to replace synthetic SimSun bold; 2) Regular SimSun/中易宋体 can also be substituted; 3) Reassign font fallback list (Latin AND CJK). Browser serif/sans settings are overridden; 4) Use Latin fonts for Latin part in Latin/CJK mixed texts; 5) Fix fonts and letter-spacing for CJK punctuation marks.
  7. // @description:zh-cn 中文字体和标点设定及修正脚本
  8. // @author stecue@gmail.com
  9. // @license GPLv3
  10. // @match http://*/*
  11. // @match https://*/*
  12. // @match file:///*
  13. // @exclude https://*jsfiddle.net*/*
  14. // @grant GM_addStyle
  15. // ==/UserScript==
  16. (function () {
  17. 'use strict';
  18. // You can change the the following fonts/settings until the "var FixPunct=" line.
  19. var CJKdefault = '"Microsoft YaHei",SimSun,"WenQuanYi Zen Hei Sharp","WenQuanYi Micro Hei"'; //The default CJK font if no sans or serif is specified. Regular weight.
  20. var CJKSimSun= '"Microsoft YaHei","WenQuanYi Micro Hei"'; //Fonts to replace SimSun;
  21. var CJKserif = '"Microsoft YaHei","WenQuanYi Micro Hei"'; //Default serif fonts for CJK. Although It is intended for regular weight but some element with bold weight still use the font here. Therefore "SimSun" itself is not a good choice because it does not have a real bold font.
  22. var CJKsans = '"Microsoft YaHei","Noto Sans CJK SC"'; //Sans-serif fonts for CJK. Regular weight.
  23. var CJKBold = '"Microsoft YaHei","WenQuanYi Micro Hei"'; //The "good CJK font" to replace SimSun bold. Note that some elements still use font in CJKserif defined above such as the menus on JD.com.
  24. var CJKPunct = 'Noto Sans CJK SC,"WenQuanYi Micro Hei",SimHei,SimSun'; //The font to use for CJK quotation marks.
  25. var LatinInSimSun = 'Ubuntu Mono'; //The Latin font in a paragraph whose font was specified to "SimSun" only.
  26. var LatinSans = 'Lato,"Open Sans",Arial'; //Sans-serif fonts for Latin script. It will be overridden by a non-virtual font in the CSS font list if present.
  27. var LatinSerif = 'Constantia,"Liberation Serif","Times New Roman"'; //Serif fonts for Latin script. It will be overridden by a non-virtual font in the CSS font list if present.
  28. var LatinMono = 'Consolas,"DejaVu Sans Mono"'; //Monospace fonts for Latin script. It will be overridden by a non-virtual font in the CSS font list if present.
  29. var FixRegular = true; //Also fix regular fonts. You need to keep this true if you want to use "LatinInSimSun" in Latin/CJK mixed context.
  30. var FixMore = true; //Appendent CJK fonts to all elements. No side effects found so far.
  31. var FixPunct = true; //If Latin punctions in CJK paragraph need to be fixed. Usually one needs full-width punctions in CJK context. Turn it off if the script runs too slow or HTML strings are adding to your editing area.
  32. var useJustify = true; //Make justify as the default alignment.
  33. ///=== "Safe" Zone Ends Here.Do not change following code unless you know the results! ===///
  34. var timeOut=3000; //allow maximum 3.0 seconds to run this script.
  35. var maxlength = 1100200; //maximum length of the page HTML to check for CJK punctuations.
  36. var maxNumElements = 81024; // maximum number of elements to process.
  37. var CJKOnlyThreshold = 11024; // Only CJK if the number of elements reaches this threshold.
  38. var loopThreshold = 8192;
  39. var noBonusLength = 11024; //no bonus functions such as fixing "reversed" pairs.
  40. var noBonusTimeout = 20; //Longest time (in ms) to run bonus functions for each element.
  41. var sqz_timeout=50; // 50ms per element seems long enough.
  42. var invForLimit=6; //the time limit factor (actual limit is timeOut/invForLimit) for the "for loop" in Round 2 & 3.
  43. var processedAll=true;
  44. var ifRound1=true;
  45. var ifRound2=true;
  46. var ifRound3=true;
  47. var debug_verbose = false; //show/hide more information on console.
  48. var debug_00 = false; //debug codes before Rounds 1/2/3/4.
  49. var debug_01 = false; //Turn on colors for Round 1.
  50. var debug_02 = false;
  51. var debug_03 = false;
  52. var debug_04 = false;
  53. var debug_re_to_check = false; //"true" might slow down a lot!
  54. var debug_spaces = false;
  55. var re_to_check = /^\uEEEE/; //use ^\uEEEE for placeholder. Avoid using the "m" or "g" modifier for long document, but the difference seems small?
  56. ///=== The following variables should be strictly for internal use only.====///
  57. var SkippedTagsForFonts=/^(TITLE|HEAD|BODY|SCRIPT|noscript|META|STYLE|AUDIO|video|source|AREA|BASE|canvas|figure|map|object|textarea)$/i;
  58. var SkippedTagsForMarks=/^(TITLE|HEAD|SCRIPT|noscript|META|STYLE|AUDIO|video|source|AREA|BASE|canvas|figure|map|object|textarea|input|code|pre|tt|BUTTON|select|option|label|fieldset|datalist|keygen|output)$/i;
  59. var SkippedTags=SkippedTagsForFonts;
  60. var SafeTags=/^(A|ABBR|UL|LI|SUB|SUP|P|I|B|STRONG|EM|FONT|H[123456]|U|VAR|WBR)$/i; //Safe tags as subelements. They do not need to meet the "no class && no tag" criterion.
  61. if (document.body.classList.contains("mediawiki")) {
  62. SafeTags=/^(A|ABBR|UL|LI|SUB|SUP|P|I|B|STRONG|EM|FONT|H[123456]|U|VAR|WBR|TD|IMG|SPAN|DIV|MATH)$/i; //Safe tags as subelements. They do not need to meet the "no class && no tag" criterion.
  63. }
  64. var ignoredTags=/^(math)$/i;
  65. var CJKclassList='CJK2Fix,MarksFixedE13,FontsFixedE137,\uE985,\uE211,Safe2FixCJK\uE000,Space2Add,CJKTested,SimSun2Fix,\uE699,checkSpacedQM';
  66. var re_autospace_url=/zhihu\.com|guokr\.com|changhai\.org|wikipedia\.org|greasyfork\.org|github\.com/;
  67. var preCodeTags='code,pre,tt';
  68. var t_start = performance.now();
  69. var t_stop = t_start;
  70. var re_simsun = / *simsun *| *宋体 *| *ËÎÌå *| *\5b8b\4f53 */gi;
  71. var all = document.getElementsByTagName('*');
  72. var NumAllDOMs=all.length;
  73. var bodyhtml=document.getElementsByTagName("HTML");
  74. if (bodyhtml[0].innerHTML.length > maxlength) {
  75. console.log('FixCJK!: HTML too long, skip everything. Exiting now...');
  76. ifRound1=false;
  77. ifRound2=false;
  78. ifRound3=false;
  79. FixPunct=false;
  80. }
  81. //Note that if one prefers using pure Latin punctuation for CJK contents, I'll leave it untouched. (maybe in 0.10.x)
  82. //else if (!(bodyhtml[0].innerHTML.match(/[\u3000-\u303F\uFF00-\uFFEF]/m))) {
  83. else if (!(bodyhtml[0].innerHTML.match(/[\u3400-\u9FBF]/))) {
  84. if (debug_verbose===true) {console.log('FixCJK!: Checking for CJK took '+((performance.now()-t_stop)/1000.0).toFixed(3)+' seconds. No CJK found.');}
  85. if (debug_verbose===true) {console.log('FixCJK!: No need to check CJK punctuations.');}
  86. FixPunct=false;
  87. }
  88. else {
  89. if (debug_verbose===true) {console.log('FixCJK!: Checking for CJK took '+((performance.now()-t_stop)/1000.0).toFixed(3)+' seconds. CJK found.');}
  90. FixPunct=true;
  91. }
  92. var sig_sim = 'RealCJKBold\u0020易'; //Just for SimSun;
  93. var sig_song = 'RealCJKBold\u0020宋'; // signature to check if change is sucssful or not.
  94. var sig_hei = 'RealCJKBold\u0020黑'; // signature to check if change is sucssful or not.
  95. var sig_bold = 'RealCJKBold\u0020粗'; // signature to check if change is sucssful or not.
  96. var sig_default = 'RealCJKBold\u0020默'; // signature to check if change is sucssful or not.
  97. var sig_mono= 'RealCJKBold\u0020均';
  98. var sig_punct = '\uE135'; //will be attached to CJKPunct; This is used in punct fixing not font fixing(?)
  99. var qsig_sim = '"' + sig_sim + '"'; //Quoted sinagure; Actually no need to quote.
  100. var qsig_song= '"'+sig_song+'"';
  101. var qsig_hei = '"' + sig_hei + '"'; //Quoted sinagure;
  102. var qsig_bold = '"' + sig_bold + '"';
  103. var qsig_default = '"' + sig_default + '"';
  104. //var qpreCJK = '"' + CJKdefault + '"'; //Quoted "CJK font".
  105. var genPunct='General Punct \uE137'; //Different from sig_punct
  106. var qpreCJK = CJKdefault;
  107. var qCJK = LatinInSimSun + ',' + CJKdefault + ',' + qsig_default;
  108. var qSimSun = qsig_sim+','+LatinInSimSun + ',' + CJKSimSun;
  109. var qBold = LatinInSimSun + ',' + CJKBold + ',' + qsig_bold;
  110. var qsans = LatinSans + ',' + CJKsans + ',' + qsig_hei + ',' + 'sans-serif'; //To replace "sans-serif"
  111. var qserif = LatinSerif + ',' + CJKserif +','+qsig_song+ ',' + 'serif'; //To replace "serif"
  112. var qmono = sig_mono+','+LatinMono + ',' + CJKdefault + ',' + qsig_default + ',' + 'monospace'; //To replace "monospace".
  113. var i = 0;
  114. var max = all.length;
  115. var child = all[i].firstChild;
  116. var if_replace = false;
  117. var font_str = window.getComputedStyle(all[i], null).getPropertyValue('font-family');
  118. var fweight = window.getComputedStyle(all[i], null).getPropertyValue('font-weight');
  119. var re_sans0 = /^ ?sans ?$|^ ?sans-serif ?$/i;
  120. var re_serif = /^ ?serif ?$/i;
  121. var re_mono0 = /^ ?mono ?$|^ ?monospace ?$/i;
  122. //letter-spacing options
  123. var kern_consec_ll='-0.45em'; //。” or ))
  124. var kern_consec_rr='-0.4em'; //((
  125. var kern_consec_lr='-0.8em'; //)(
  126. var kern_consec_pq='-0.5em'; //kern for ,. before right ” Just in case, do not use.
  127. var kern_consec_qp='-0.5em'; //quote followed by period. Just in case, do not use.
  128. var kern_sq='-0.5em'; //Just in case, do not use.
  129. var kern_ind_left_dq='-0.2em';
  130. var kern_ind_right_dq='-0.2em';
  131. var kern_ind_right_dq_tail='-0.3em'; //different from above one b/c the possible extra \n (which will show as a space in most cases).
  132. var kern_dq_right_end='-0.3em'; //Just in case, do not use.
  133. var kern_dq_right_left='-0.8em'; //Just in case, do not use.
  134. //Check if the font definitions are valid
  135. if (check_fonts(CJKdefault, 'CJKdefault') === false)
  136. return false;
  137. else if (check_fonts(CJKserif, 'CJKserif') === false)
  138. return false;
  139. else if (check_fonts(CJKsans, 'CJKsans') === false)
  140. return false;
  141. else if (check_fonts(CJKBold, 'CJKBold') === false)
  142. return false;
  143. else if (check_fonts(LatinInSimSun, 'LatinInSimSun') === false)
  144. return false;
  145. else if (check_fonts(LatinSans, 'LatinSans') === false)
  146. return false;
  147. else if (check_fonts(LatinSerif, 'LatinSerif') === false)
  148. return false;
  149. else if (check_fonts(LatinMono, 'LatinMono') === false)
  150. return false;
  151. else {
  152. }
  153. if (debug_00===true) {console.log(dequote('"SimSun","Times New Roman"""""'));}
  154. //Assign fonts for puncts:
  155. var punctStyle='@font-face { font-family: '+genPunct+';\n src: '+AddLocal(CJKPunct)+';\n unicode-range: U+3000-303F,U+FF00-FFEF;}';
  156. punctStyle=punctStyle+'\n@font-face {font-family:RealCJKBold\u0020易;\n src:local(SimHei);\n unicode-range: U+A0-2FF,U+2000-2FFF;}';
  157. var useCSSforSimSun=false;
  158. if (useCSSforSimSun===true) {
  159. punctStyle=punctStyle+'\n @font-face { font-family: SimSun;\n src: local('+FirstFontOnly('SimSun')+');\n unicode-range: U+3400-9FBF;}';
  160. punctStyle=punctStyle+'\n @font-face { font-family: 宋体;\n src: local('+FirstFontOnly('SimSun')+');\n unicode-range: U+3400-9FBF;}';
  161. punctStyle=punctStyle+'\n @font-face { font-family: ËÎÌå;\n src: local('+FirstFontOnly('SimSun')+');\n unicode-range: U+3400-9FBF;}';
  162. punctStyle=punctStyle+'\n @font-face { font-family: 宋体;\n src: local('+FirstFontOnly(LatinInSimSun)+');\n unicode-range: U+0000-2C7F;}';
  163. }
  164. if (debug_00===true) console.log(punctStyle);
  165. GM_addStyle(punctStyle);
  166. ///----------------------------
  167. qpreCJK = dequote(qpreCJK);
  168. qCJK = dequote(qCJK);//LatinInSimSun + ',' + CJKdefault + ',' + qsig_default;
  169. qSimSun = dequote(qSimSun);//LatinInSimSun + ',' + CJKserif + ',' + qsig_sun;
  170. qBold = dequote(qBold);//LatinInSimSun + ',' + CJKBold + ',' + qsig_bold;
  171. qsans = dequote(qsans);//LatinSans + ',' + CJKsans + ',' + qsig_hei + ',' + 'sans-serif'; //To replace "sans-serif"
  172. qserif = dequote(qserif);//LatinSerif + ',' + CJKserif + ',' + qsig_sun + ',' + 'serif'; //To replace "serif"
  173. qmono = dequote(qmono);//LatinMono + ',' + CJKdefault + ',' + qsig_default + ',' + 'monospace'; //To replace "monospace".
  174. CJKPunct=dequote(CJKPunct)+','+sig_punct;
  175. if (debug_00===true) {console.log('Entering Loops...');}
  176. /// ===== Labeling CJK elements === ///
  177. t_stop=performance.now();
  178. for (i=0;i < all.length;i++) {
  179. if (performance.now()-t_stop>300) {console.log("FIXME: Too slow. Stopped @"+all[i].nodeName+"#"+i.toString());break;}
  180. if ((all[i].nodeName.match(SkippedTags)) || all[i] instanceof SVGElement){
  181. continue;
  182. }
  183. all[i].classList.add("CJKTested");
  184. font_str=dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  185. if (debug_01===true) console.log(font_str);
  186. if (font_str.match(re_simsun)) {
  187. var font_size=(window.getComputedStyle(all[i], null).getPropertyValue('font-size')).slice(0,-2);
  188. if (font_size < 18) {
  189. all[i].classList.add("CJK2Fix");
  190. all[i].classList.add("SimSun2Fix");
  191. all[i].classList.add("Space2Add");
  192. }
  193. else {
  194. all[i].style.fontFamily=font_str;
  195. all[i].classList.add("CJK2Fix");
  196. all[i].classList.add("Space2Add");
  197. }
  198. continue;
  199. }
  200. child = all[i].firstChild;
  201. while (child) {
  202. if (child.nodeType == 3 && (child.data.match(/[\u3400-\u9FBF]/))) {
  203. all[i].classList.add("CJK2Fix");
  204. all[i].classList.add("Space2Add");
  205. if (!(all[i].parentNode.nodeName.match(SkippedTags))) {
  206. all[i].parentNode.classList.add("CJK2Fix");
  207. all[i].parentNode.classList.add("Space2Add");
  208. }
  209. break;
  210. }
  211. child=child.nextSibling;
  212. }
  213. }
  214. //Do not try to fixpuncts if it is an English site. Just trying to save time.
  215. if ((document.getElementsByClassName('CJK2Fix')).length < 1) {
  216. FixPunct=false;
  217. }
  218. if (debug_verbose===true) {console.log('FixCJK!: Labling took '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  219. ///===FixFonts, Rounds 1-3===///
  220. FixAllFonts();
  221. ///===Round 4, FixPunct===///
  222. if (debug_verbose===true) {console.log('FixCJK!: Labling and Fixing fonts took '+((t_stop-t_start)/1000).toFixed(3)+' seconds.');}
  223. if ((t_stop-t_start)*2 > timeOut || max > maxNumElements ) {
  224. console.log('FixCJK!: Too slow or too many elements.');
  225. FixPunct=false;
  226. }
  227. if (FixPunct===false) {
  228. if (debug_verbose===true) {console.log('FixCJK!: Skipping fixing punctuations...');}
  229. }
  230. var useDelayedFix=false;
  231. var useLoop=false;
  232. var returnNow=true;
  233. var returnLater=false; //Do the actual fixing.
  234. var MaxNumLoops=1;
  235. if (document.URL.match(/zhihuxcom|sinaxcom/)) {
  236. useLoop=true;
  237. }
  238. if (useDelayedFix===true) {
  239. var DelayedTimer=200;
  240. window.setTimeout(FunFixPunct(useLoop,MaxNumLoops,returnLater),DelayedTimer);
  241. }
  242. else {
  243. window.setTimeout(FunFixPunct(useLoop,MaxNumLoops,returnLater),10);
  244. }
  245. ///===End of Solving the picture problem===///
  246. if (debug_verbose===true) {console.log('FixCJK!: Fixing punctuations took '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  247. ///===Add onClick listener before exiting===///
  248. var NumClicks=0;
  249. var t_last=performance.now();
  250. var t_interval=timeOut; //The interval between two checks.
  251. var NumAllCJKs=(document.getElementsByClassName('CJK2Fix')).length;
  252. var NumPureEng=0;
  253. var LastURL=document.URL;
  254. var LastMod=document.lastModified;
  255. var ItvScl=3.0;
  256. if (NumAllCJKs*1.0/NumAllDOMs*100 < 1.0) {
  257. NumPureEng++;
  258. }
  259. //document.onClick will cause problems on some webpages on Firefox.
  260. var downtime=performance.now();
  261. var downX=0;
  262. var downY=0;
  263. document.body.addEventListener("mousedown",function (e){downtime=performance.now();downX=e.clientX;downY=e.clientY;},false);
  264. document.body.addEventListener("mouseup",function (e){
  265. if (((performance.now()-downtime) > 800) && (Math.abs(e.clientX-downX)+Math.abs(e.clientY-downY)) < 3) {
  266. e.target.classList.add("SafedByUser");
  267. e.target.classList.add("CJK2Fix");
  268. e.target.classList.remove("MarksFixedE135");
  269. e.target.classList.remove("CJKTested");
  270. NumClicks=1;
  271. if (debug_verbose===true) {console.log(e.target.nodeName+"."+e.target.className+":: "+(Math.abs(e.clientX-downX)+Math.abs(e.clientY-downY)).toString());}
  272. //ReFix after other things are done.
  273. setTimeout(ReFixCJK,5,e);
  274. if (document.URL.match(/zhihu\.com/mg)) {
  275. FixLazy();
  276. setTimeout(addSpaces,15);
  277. }
  278. else if (document.URL.match(re_autospace_url)) {
  279. setTimeout(addSpaces,15);
  280. }
  281. }
  282. else if (((performance.now()-downtime) < 300) && (Math.abs(e.clientX-downX)+Math.abs(e.clientY-downY)) ===0 ) {
  283. //ReFix after other things are done.
  284. setTimeout(ReFixCJK,10,e);
  285. }
  286. },false);
  287. document.body.addEventListener("dblclick",function() {setTimeout(addSpaces,10);},false);
  288. ///===Time to exit the main function===///
  289. var t_fullstop=performance.now();
  290. if (processedAll===true) {
  291. console.log('FixCJK!: NORMAL TERMINATION: '+((t_fullstop-t_start)/1000).toFixed(3)+' seconds is the overall execution time. No skipped step(s).');
  292. }
  293. else {
  294. console.log('FixCJK!: EXECUTION ABORTED: '+((t_fullstop-t_start)/1000).toFixed(3)+' seconds is the overall execution time. Some step(s) were skipped due to performance issues.');
  295. }
  296. ////////////////////======== Main Function Ends Here ==============/////////////////////////////
  297. //===The actual listening functions===//
  298. function labelPreCode() {
  299. var bannedTagList=preCodeTags.split(',');
  300. for (var itag=0;itag<bannedTagList.length;itag++) {
  301. var all2Ban=document.getElementsByTagName(bannedTagList[itag]);
  302. for (var iele=0;iele<all2Ban.length;iele++) {
  303. banHelper(all2Ban[iele]);
  304. }
  305. }
  306. function banHelper(node) {
  307. var child=node.firstChild;
  308. while (child) {
  309. if ( child.nodeType===1 && !(child instanceof SVGElement) ) {
  310. banHelper(child);
  311. }
  312. child=child.nextSibling;
  313. }
  314. node.classList.add("preCode");
  315. }
  316. }
  317. function addSpaces() {
  318. var t_spaces=performance.now();
  319. if (debug_spaces===true) console.log('FixCJK!: Adding spaces...');
  320. var checkSpaces=false; //seems no need to check first at all.
  321. if (checkSpaces===true) {
  322. checkSpacesHelper(document.getElementsByClassName("SafedByUser"));
  323. checkSpacesHelper(document.getElementsByClassName("Space2Add"));
  324. }
  325. function checkSpacesHelper(allE) {
  326. for (var ic=0;ic<allE.length;ic++) {
  327. font_str=dequote(window.getComputedStyle(allE[ic], null).getPropertyValue('font-family'));
  328. if (font_str.match(/General Punct[^,]*[,][^,]*均/)) {
  329. if (debug_spaces===true) {console.log(currE.innerHTML.slice(0,20));}
  330. var currE=allE[ic];
  331. var toBODY=false; //This is some problem with "toBODY=TRUE" now.
  332. if (toBODY===true) {
  333. while (currE.nodeName !== "BODY") {
  334. currE.classList.add("noAddedSpances");
  335. }
  336. }
  337. else {
  338. currE.classList.add("noAddedSpances");
  339. currE.parentNode.classList.add("noAddedSpances");
  340. }
  341. }
  342. }
  343. var allNoES=document.getElementsByClassName("noAddedSpances");
  344. if (debug_spaces===true) {console.log(allNoES.length);}
  345. for (ic=0;ic<allNoES.length;ic++) {
  346. allNoES[ic].classList.remove("Space2Add");
  347. }
  348. }
  349. addSpacesHelper(document.getElementsByClassName("SafedByUser"));
  350. addSpacesHelper(document.getElementsByClassName("Space2Add"));
  351. function addSpacesHelper(allE) {
  352. //Now the tag protection is fixed, I'll alway use the "useSpan" method (different from 0.13.0)
  353. for (var is=0;is<allE.length;is++) {
  354. if (!(allE[is].parentNode.classList.contains("Safe2FixCJK\uE000") && allE[is].parentNode.classList.contains("Space2Add")) ) {
  355. if (allE[is].classList.contains("Safe2FixCJK\uE000") || allE[is].classList.contains("SafedByUser")) {
  356. if ( !(allE[is].classList.contains("preCode")) ) {
  357. var tmp_str=allE[is].innerHTML;
  358. //protect the Latins in tags
  359. var re_zhen=/(<[^><]*[\u3400-\u9FBF][\u0020\u00A0]?)([“‘\u0021-\u003B\u003D\u003F-\u005A\u005E-\u007E\u0391-\u03FF][^><]*>)/mg;
  360. while (tmp_str.match(re_zhen) ) {
  361. tmp_str=tmp_str.replace(re_zhen,'$1\uED20$2'); //use \uED20 to replace spaces
  362. if (debug_spaces===true) {console.log(tmp_str);}
  363. }
  364. var re_enzh=/(<[^><]*[\u0021-\u003B\u003D\u003F-\u005A\u005E-\u007E\u0391-\u03FF’”])([\u0020\u00A0]?[\u3400-\u9FBF][^><]*>)/mg;
  365. while (tmp_str.match(re_enzh) ) {
  366. tmp_str=tmp_str.replace(re_enzh,'$1\uED20$2'); //use \uED20 to replace spaces
  367. if (debug_spaces===true) {console.log(tmp_str);}
  368. }
  369. //en:zh;
  370. re_enzh=/([\u0021\u0023-\u0026\u0028-\u003B\u003D\u003F-\u005A\u005E-\u007E\u0391-\u03FF])(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}((?:<[\u002F]?(?:span|sup|b|i|strong|em|u|var|a)[^\uE985\uE211><]*>){0,5})(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}([\u3400-\u9FBF])/img;
  371. var space2BeAdded='<span class="\uE699 FontsFixedE137" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:Arial,Helvetica,sans-serif;font-size:60%;">\u0020</span>';
  372. var enzh_withSpace='$1$2'+space2BeAdded+'$3';
  373. tmp_str=tmp_str.replace(re_enzh,enzh_withSpace);
  374. //Special treatment of ’” because of lacking signature in the closing tag (</span>)
  375. /////first after tags
  376. re_enzh=/((?:<[^\uE985\uE211><]*>)+[\u201D\u2019])(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}((?:<[\u002F]?(?:span|sup|b|i|strong|em|u|var|a)[^\uE985\uE211><]*>){0,5})(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}([\u3400-\u9FBF])/img;
  377. tmp_str=tmp_str.replace(re_enzh,enzh_withSpace);
  378. /////then without tags
  379. re_enzh=/([^>][\u201D\u2019])(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}((?:<[\u002F]?(?:span|sup|b|i|strong|em|u|var|a)[^\uE985\uE211><]*>){0,5})(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}([\u3400-\u9FBF])/img;
  380. tmp_str=tmp_str.replace(re_enzh,enzh_withSpace);
  381. //now zh:en
  382. re_zhen=/([\u3400-\u9FBF])(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}((?:<[\u002F]?(?:span|sup|b|i|strong|em|u|var|a)[^\uE985\uE211><]*>){0,5})(?:[\u0020\u00A0\u200B-\u200E]|&nbsp;){0,5}([‘“\u0021\u0023-\u0026\u0028-\u003B\u003D\u003F-\u005A\u005E-\u007E\u0391-\u03FF])/img;
  383. var zhen_withSpace='$1'+space2BeAdded+'$2$3';
  384. tmp_str=tmp_str.replace(re_zhen,zhen_withSpace);
  385. //now en["']zh (TODO in 0.15?)
  386. //now zh['"]en (TODO in 0.15?)
  387. tmp_str=tmp_str.replace(/\uED20/mg,'');
  388. allE[is].innerHTML=tmp_str;
  389. }
  390. else {
  391. if (debug_spaces===true) {console.log("Skipping banned tags:"+allE[is].tagName);}
  392. }
  393. }
  394. }
  395. }
  396. }
  397. window.setTimeout(removeSpacesForSimSun,10);
  398. console.log("FixCJK: Adding spaces took "+((performance.now()-t_spaces)/1000).toFixed(3)+" seconds.");
  399. }
  400. function removeSpacesForSimSun() {
  401. var allS=document.getElementsByClassName("\uE699");
  402. var font_str='';
  403. for (var i=0;i<allS.length;i++) {
  404. font_str=((dequote(window.getComputedStyle(allS[i].parentNode, null).getPropertyValue('font-family'))).split(','))[1];
  405. if (font_str.match(re_simsun)) {
  406. allS[i].innerHTML='';
  407. }
  408. else if (font_str.match(/RealCJKBold.易/)) {
  409. allS[i].parentNode.classList.add("checkSpacedQM");
  410. }
  411. }
  412. allS=document.getElementsByClassName("checkSpacedQM");
  413. for (i=0;i<allS.length;i++){
  414. var toRemoved=/(<span[^><]*\uE699[^><]*>\u0020<\/span>)((?:<[^><\uE985\uE211]*>)*[\u2018\u201C])/g;
  415. if (allS[i].innerHTML.match(toRemoved)) {
  416. allS[i].innerHTML=allS[i].innerHTML.replace(toRemoved,'$2');
  417. }
  418. //No closing tag: En"Zh
  419. toRemoved=/([\u2019\u201D])<span[^><]*\uE699[^><]*>\u0020<\/span>/g;
  420. if (allS[i].innerHTML.match(toRemoved)) {
  421. allS[i].innerHTML=allS[i].innerHTML.replace(toRemoved,'$1');
  422. }
  423. //With closing tag: En"Zh
  424. toRemoved=/((?:^|[^>]|<[^><\uE211\uE985]*>)[\u2019\u201D](?:<[^><\uE211\uE985]*>)+)(<span[^><]*\uE699[^><]*>\u0020<\/span>)/mg;
  425. if (allS[i].innerHTML.match(toRemoved)) {
  426. allS[i].innerHTML=allS[i].innerHTML.replace(toRemoved,'$1');
  427. }
  428. }
  429. }
  430. function ReFixCJK (e) {
  431. var bannedTagsInReFix=/^(A|BUTTON|TEXTAREA|AUDIO|VIDEO|SOURCE|FORM|INPUT|select|option|label|fieldset|datalist|keygen|output|canvas|nav|svg|img|figure|map|area|track|menu|menuitem)$/i;
  432. if (debug_verbose===true) {console.log(e.target.nodeName);}
  433. t_start=performance.now();
  434. if (document.URL!==LastURL) {
  435. NumPureEng = 0;
  436. LastURL=document.URL;
  437. }
  438. var clickedNode=e.target;
  439. document.body.classList.remove("SafedByUser"); //Remove the SafedByUser of the "BODY" element if it is clicked by user.
  440. while (clickedNode.nodeName!=="BODY") {
  441. if (clickedNode.nodeName.match(bannedTagsInReFix)) {
  442. console.log("FixCJK!: Not a valid click on DOM element \u201C"+clickedNode.nodeName+"."+clickedNode.className+"\u201D");
  443. return false;
  444. }
  445. if (debug_verbose===true) {console.log("Clicked: "+clickedNode.nodeName);}
  446. clickedNode=clickedNode.parentNode;
  447. }
  448. if ((document.lastModified===LastMod) && (NumClicks >2)) {
  449. console.log('FixCJK!: Document modified at '+document.lastModified+', no change.');
  450. return true;
  451. }
  452. else {
  453. if (debug_verbose===true) {console.log('FixCJK!: Document modified at '+document.lastModified);}
  454. }
  455. //NumPureEng method is still usefull because document.lastModified method is only partially reliable.
  456. if (NumPureEng >= 2) {
  457. console.log('Probably pure English/Latin site, re-checking skipped.');
  458. return true;
  459. }
  460. if (debug_verbose===true) {alert('FixCJK!: '+NumClicks.toString());}
  461. //First remove the "CJK2Fix" attibute for those already processed.
  462. var AllCJKFixed=document.getElementsByClassName("FontsFixedE137");
  463. for (i=0;i<AllCJKFixed.length;i++) {
  464. if (debug_verbose===true) {console.log(AllCJKFixed[i].className);}
  465. if (AllCJKFixed[i].classList.contains("MarksFixedE135")) {
  466. AllCJKFixed[i].classList.remove("CJK2Fix");
  467. }
  468. }
  469. if ((NumClicks < 2) || ((t_start-t_last)*ItvScl > t_interval) ) {
  470. FixRegular = true; //Also fix regular fonts. You need to keep this true if you want to use "LatinInSimSun" in Latin/CJK mixed context.
  471. FixMore = false; //Appendent CJK fonts to all elements. No side effects found so far.
  472. FixPunct = true; //If Latin punctions in CJK paragraph need to be fixed. Usually one needs full-width punctions in CJK context. Turn it off if the script runs too slow or HTML strings are adding to your editing area.
  473. maxlength = 1100200; //maximum length of the page HTML to check for CJK punctuations.
  474. maxNumElements = 8000; // maximum number of elements to process.
  475. CJKOnlyThreshold = 2000; // Only CJK if the number of elements reaches this threshold.
  476. invForLimit=6; //the time limit factor (actual limit is timeOut/invForLimit) for the "for loop" in Round 2 & 3.
  477. processedAll=true;
  478. ifRound1=true;
  479. ifRound2=true;
  480. ifRound3=false;
  481. //FixCJK();
  482. var ReFixAll=document.getElementsByTagName('*');
  483. var NumFixed=0;
  484. var NumReFix=0;
  485. for (i=0;i<ReFixAll.length;i++) {
  486. if ((ReFixAll[i].nodeName.match(SkippedTags)) || ReFixAll[i] instanceof SVGElement || ReFixAll[i].classList.contains("CJKTested")){
  487. continue;
  488. }
  489. else if (ReFixAll[i].className.match("SafedByUser")) {
  490. ReFixAll[i].classList.add("CJK2Fix");
  491. NumReFix++;
  492. }
  493. else if ((ReFixAll[i].hasAttribute('class') ===true) && (ReFixAll[i].className.match(/FixedE1/))) {
  494. NumFixed++;
  495. continue;
  496. }
  497. else {
  498. child = ReFixAll[i].firstChild;
  499. while (child) {
  500. if (child.nodeType == 3 && (child.data.match(/[\u3400-\u9FBF]/))) {
  501. if (debug_verbose===true) {
  502. console.log(ReFixAll[i].className+':: '+child.data);
  503. console.log(ReFixAll[i].outerHTML);
  504. }
  505. ReFixAll[i].classList.add("CJK2Fix");
  506. //ReFixAll[i].className=(ReFixAll[i].className).replace(/(?: CJK2Fix)+/g,' CJK2Fix');
  507. NumReFix++;
  508. break;
  509. }
  510. child=child.nextSibling;
  511. }
  512. }
  513. }
  514. FixAllFonts();
  515. if (debug_verbose===true) {console.log('FixCJK!: '+NumFixed.toString()+' elements has been fixed.');}
  516. if (debug_verbose===true) {console.log('FixCJK!: '+NumReFix.toString()+' elements to Re-Fix.');}
  517. labelPreCode();
  518. FunFixPunct(useLoop,2,returnLater);
  519. console.log('FixCJK!: ReFixing took '+((performance.now()-t_start)/1000).toFixed(3)+' seconds.');
  520. NumAllCJKs=(document.getElementsByClassName('MarksFixedE135')).length;
  521. if (NumAllCJKs*1.0/NumAllDOMs*100 < 1.0) {
  522. NumPureEng++;
  523. }
  524. }
  525. else {
  526. console.log('FixCJK!: No need to rush. Just wait for '+(t_interval/1000/ItvScl).toFixed(1)+' seconds before clicking again. (But I did fix the spaces between CJK & \w');
  527. }
  528. NumClicks++;
  529. LastMod=document.lastModified;
  530. t_last=performance.now();
  531. }
  532. ///===various aux functions===///
  533. function check_fonts(font_var, fvname) {
  534. var fl = font_var.split(',');
  535. for (i = 0; i < fl.length; i++) {
  536. if (!(fl[i].match(/^[^" ][^"]+[^" ]$|^"[^ ][^"]+[^ ]"$/))) {
  537. alert('Check your font definition: ' + fl[i] + ' in ' + fvname);
  538. return false;
  539. }
  540. }
  541. return true;
  542. }
  543. function list_has(font_str, family) {
  544. /// Fucntion to check matches
  545. var allfonts = font_str.split(',');
  546. for (var j = 0, maxl = allfonts.length; j < maxl; j++) {
  547. if (allfonts[j].match(family)) {
  548. return j;
  549. }
  550. }
  551. return false;
  552. }
  553. function replace_font(font_str, family, qBold) {
  554. var allfonts = font_str.split(',');
  555. var j = 0;
  556. var maxl = allfonts.length;
  557. for (j = 0; j < maxl; j++) {
  558. if (allfonts[j].match(family)) {
  559. allfonts[j] = qBold;
  560. }
  561. }
  562. var toReturn = allfonts[0];
  563. for (j = 1; j < maxl; j++) {
  564. toReturn = toReturn + ',' + allfonts[j];
  565. }
  566. return toReturn;
  567. }
  568. function has_genfam(font_str) {
  569. /// Test if font_str include general families.
  570. if (list_has(font_str, re_sans0)) {
  571. return true;
  572. }
  573. else if (list_has(font_str, re_serif)) {
  574. return true;
  575. }
  576. else if (list_has(font_str, re_mono0)) {
  577. return true;
  578. }
  579. return false;
  580. }
  581. function dequote(font_str) {
  582. /// Function to dequote non-standard font lists.
  583. var strl=font_str.split(','); //font list;
  584. for (var k=0;k < strl.length; k++) {
  585. while (strl[k].charAt(0).match(/["' ]/)) {
  586. strl[k]=strl[k].slice(1);
  587. }
  588. while (strl[k].charAt(strl[k].length-1).match(/["' ]/)) {
  589. strl[k]=strl[k].slice(0,-1);
  590. }
  591. }
  592. var dequoted=strl[0];
  593. for (k=1;k<strl.length;k++) {
  594. dequoted=dequoted+','+strl[k];
  595. }
  596. return dequoted;
  597. }
  598. function FirstFontOnly(font_str) {
  599. return ((dequote(font_str)).split(','))[0];
  600. }
  601. function AddLocal(font_str) {
  602. font_str=(dequote(font_str)).split(',');
  603. var localed='local("'+font_str[0]+'"), local("'+font_str[0]+' Regular")';
  604. for (var l=1;l<font_str.length;l++) {
  605. localed=localed+',\n'+'local("'+font_str[l]+'"),local("'+font_str[l]+' Regular")';
  606. }
  607. return localed;
  608. }
  609. /// ======================== FixAllFonts, 3 Rounds ==============================///
  610. function FixAllFonts () {
  611. if (debug_verbose===true) {
  612. console.log("Round 1: "+ifRound1.toString());
  613. console.log("Round 2: "+ifRound2.toString());
  614. console.log("Round 3: "+ifRound3.toString());
  615. }
  616. SkippedTags=SkippedTagsForFonts;
  617. /// ===== First round: Replace all bold fonts to CJKBold ===== ///
  618. t_stop=performance.now();
  619. //First fix all SimSun parts in Round 1&2.
  620. var allSuns=document.getElementsByClassName("SimSun2Fix");
  621. for (var isun=0;isun< allSuns.length;isun++) {
  622. if (allSuns[isun].classList.contains("FontsFixedE137")) {
  623. continue;
  624. }
  625. font_str = dequote(window.getComputedStyle(allSuns[isun], null).getPropertyValue('font-family'));
  626. if (font_str.match(re_simsun) && !(font_str.match(sig_sim)) ) {
  627. allSuns[isun].style.fontFamily = font_str.replace(re_simsun,qSimSun);
  628. }
  629. }
  630. all = document.getElementsByClassName('CJK2Fix');
  631. if (ifRound1===true) {
  632. for (i = 0; i < all.length; i++) {
  633. if (i % 500===0) { //Check every 500 elements.
  634. if ((performance.now()-t_stop)*invForLimit > timeOut) {
  635. ifRound1=false;
  636. ifRound2=false;
  637. ifRound3=false;
  638. FixPunct=false;
  639. processedAll=false;
  640. console.log('FixCJK!: Round 1 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds. Too slow to continue.');
  641. break;
  642. }
  643. else {
  644. if (debug_verbose===true) {console.log('FixCJK!: Round 1 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  645. }
  646. }
  647. child = all[i].firstChild;
  648. if_replace = false;
  649. //Only change if current node (not child node) contains CJK characters.
  650. font_str = dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  651. fweight = window.getComputedStyle(all[i], null).getPropertyValue('font-weight');
  652. while (child) {
  653. if (child.nodeType == 3 && (child.data.match(/[\u3400-\u9FBF]/)) && (fweight == 'bold' || fweight > 500) && (!(font_str.match(sig_bold)))) {
  654. //Test if contains SimSun
  655. if (debug_01===true) {all[i].style.color="Blue";} //Bold-->Blue;
  656. if (font_str.match(re_simsun)) {
  657. //all[i].style.color="Sienna"; //SimSun --> Sienna
  658. all[i].style.fontFamily = genPunct+','+font_str.replace(re_simsun, qBold);
  659. if (!(has_genfam(all[i].style.fontFamily))) {
  660. all[i].style.fontFamily = genPunct+','+all[i].style.fontFamily + ',' + 'sans-serif';
  661. }
  662. } //Test if contains Sans
  663. else if (list_has(font_str, re_sans0) !== false) {
  664. //all[i].style.color="Salmon";
  665. all[i].style.fontFamily = genPunct+','+ replace_font(font_str, re_sans0, LatinSans+','+qBold) + ',sans-serif';
  666. } //Test if contains serif
  667. else if (list_has(font_str, re_serif) !== false) {
  668. //all[i].style.color="SeaGreen";
  669. all[i].style.fontFamily = genPunct+','+ replace_font(font_str, re_serif, LatinSerif + ',' +qBold) + ',serif';
  670. } //Test if contains monospace
  671. else if (list_has(font_str, re_mono0) !== false) {
  672. //all[i].style.color="Maroon";
  673. all[i].style.fontFamily = genPunct+','+ replace_font(font_str, re_mono0, LatinMono + ',' +qBold) + ',monospace';
  674. } //Just append the fonts to the font preference list.
  675. else {
  676. //all[i].style.color="Fuchsia"; //qBold+"false-safe" sans-serif;
  677. all[i].style.fontFamily = genPunct+','+font_str + ',' + LatinSans + ',' + qBold + ',' + ' sans-serif';
  678. //console.log(all[i].style.fontFamily);
  679. }
  680. }
  681. child = child.nextSibling;
  682. }
  683. }
  684. }
  685. if (FixRegular === false) {
  686. return false;
  687. }
  688. /// ===== Second Round: Deal with regular weight. ===== ///
  689. var tmp_idx=0;
  690. max = all.length;
  691. if ((performance.now()-t_stop)*4 > timeOut) {
  692. ifRound2=false;
  693. ifRound3=false;
  694. FixPunct=false;
  695. processedAll=false;
  696. console.log('FixCJK!: Round 1 has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds. Skipping following steps.');
  697. }
  698. t_stop=performance.now();
  699. if (ifRound2===true) {
  700. //Now fix the rest.
  701. for (i = 0; i < all.length; i++) {
  702. if (i % 500===0) { //Check every 500 elements.
  703. if ((performance.now()-t_stop)*invForLimit > timeOut) {
  704. ifRound2=false;
  705. ifRound3=false;
  706. FixPunct=false;
  707. processedAll=false;
  708. console.log('FixCJK!: Round 2 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds. Too slow to continue.');
  709. break;
  710. }
  711. else {
  712. if (debug_verbose===true) {console.log('FixCJK!: Round 2 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  713. }
  714. }
  715. if (all[i].classList.contains("FontsFixedE137") ) {
  716. continue;
  717. }
  718. font_str = dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  719. fweight = window.getComputedStyle(all[i], null).getPropertyValue('font-weight');
  720. if (font_str.match(sig_hei) || font_str.match(sig_song) ||font_str.match(sig_bold) || font_str.match(sig_mono) || font_str.match(sig_default)) {
  721. continue;
  722. }
  723. else {
  724. if (debug_02===true) {all[i].style.color='Teal';} //Teal for true;
  725. if (debug_02===true) {if (all[i].innerHTML.match(re_to_check)) {console.log('\\\\\\\\\\\\afterall:'+i.toString()+'::'+all[i].style.fontFamily+'\n-->if_replace:'+if_replace);}}
  726. //Test if contains Sans
  727. if (list_has(font_str, re_sans0) !== false) {
  728. //all[i].style.color="Salmon";
  729. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_sans0, qsans);
  730. } //Test if contains serif
  731. else if (list_has(font_str, re_serif) !== false) {
  732. //all[i].style.color="SeaGreen";
  733. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_serif, qserif);
  734. } //Test if contains monospace
  735. else if (list_has(font_str, re_mono0) !== false) {
  736. //all[i].style.color="Maroon";
  737. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_mono0, qmono);
  738. }
  739. else {
  740. if (debug_02===true) {all[i].style.color='Fuchsia';}
  741. if (font_str.match(re_simsun)) {
  742. //all[i].style.color='Fuchsia';
  743. //This is needed because some elements cannot be captured in "child elements" processing. (Such as the menues on JD.com) No idea why.
  744. //all[i].style.fontFamily = genPunct+','+font_str.replace(re_simsun, qSimSun) + ',' + 'serif';
  745. }
  746. else {
  747. //all[i].style.color='Fuchsia';
  748. all[i].style.fontFamily = genPunct+','+font_str + ',' + qCJK + ',' + 'sans-serif';
  749. }
  750. }
  751. }
  752. if (FixMore === false) {
  753. //Add FontsFixed if Round 3 is skipped intentially.
  754. all[i].classList.add("FontsFixedE137");
  755. }
  756. }
  757. }
  758. if (debug_verbose===true) {console.log('FixCJK!: Round 2 took '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  759. t_stop=performance.now();
  760. if (debug_02===true) console.log('Just before Round 3:'+tmp_idx.toString()+'::'+all[tmp_idx].innerHTML);
  761. if (debug_02===true) console.log('Just before Round 3:'+tmp_idx.toString()+'::'+dequote(window.getComputedStyle(all[tmp_idx], null).getPropertyValue('font-family')));
  762. /// ===== The Third round: Add CJKdefault to all elements ===== ///
  763. if (FixMore === false) {
  764. t_stop=performance.now();
  765. if (debug_verbose===true) {console.log('FixCJK!: FixMore/Round 3 is intentionally skipped.');}
  766. return false;
  767. }
  768. all = document.getElementsByTagName('*');
  769. max = all.length;
  770. if (max > maxNumElements) {
  771. ifRound3=false;
  772. FixPunct=false;
  773. processedAll=false;
  774. console.log('FixCJK!: '+max.toString()+' elements, too many. Skip Round 3 and punctuation fixing. Exiting now...');
  775. }
  776. else if (max > CJKOnlyThreshold) {
  777. ifRound3=true;
  778. FixPunct=true;
  779. processedAll=true;
  780. all = document.getElementsByTagName('CJK2Fix');
  781. console.log('FixCJK!: '+max.toString()+' elements, too many. Only CJK elements will be processed in Round 3.');
  782. }
  783. else {
  784. if (debug_verbose===true) {console.log('FixCJK!: All elements will be processed in Round 3.');}
  785. }
  786. if (ifRound3===true) {
  787. for (i = 0; i < all.length; i++) {
  788. //all[i].style.color="SeaGreen";
  789. if (i % 500===0) { //Check every 500 elements.
  790. if ((performance.now()-t_stop)*invForLimit > timeOut) {
  791. ifRound3=false;
  792. FixPunct=false;
  793. processedAll=false;
  794. console.log('FixCJK!: Round 3 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds. Too slow to continue. Exiting now...');
  795. break;
  796. }
  797. else {
  798. if (debug_verbose===true) {console.log('FixCJK!: Round 3 itself has been running for '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  799. }
  800. }
  801. if (all[i].nodeName.match(SkippedTags)) {
  802. continue;
  803. }
  804. font_str = dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  805. if (!(font_str.match(sig_song) || font_str.match(sig_hei) || font_str.match(sig_bold) || font_str.match(sig_default) || font_str.match(/\uE137/))) {
  806. if (list_has(font_str, re_sans0) !== false) {
  807. //all[i].style.color="Salmon";
  808. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_sans0, qsans);
  809. } //Test if contains serif
  810. else if (list_has(font_str, re_serif) !== false) {
  811. //all[i].style.color="SeaGreen";
  812. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_serif, qserif);
  813. } //Test if contains monospace
  814. else if (list_has(font_str, re_mono0) !== false) {
  815. //all[i].style.color="Maroon";
  816. all[i].style.fontFamily = genPunct+','+replace_font(font_str, re_mono0, qmono);
  817. }
  818. else {
  819. //SimSun should be taken care of throught the "SimSun2Fix" class.
  820. if (debug_03 === true) { all[i].style.color='Olive';}
  821. all[i].style.fontFamily = genPunct+','+font_str + ',' + qCJK + ',' + 'sans-serif';
  822. }
  823. }
  824. else {
  825. //all[i].style.color="Silver"; //Signed-->Silver
  826. }
  827. all[i].classList.add("FontsFixedE137");
  828. }
  829. }
  830. if (debug_verbose===true) {console.log('FixCJK!: Round 3 took '+((performance.now()-t_stop)/1000).toFixed(3)+' seconds.');}
  831. t_stop=performance.now();
  832. }
  833. ///===The Actual Round 4===///
  834. function FunFixPunct(useLoop,MaxNumLoops,returnNow) {
  835. SkippedTags=SkippedTagsForMarks;
  836. var recursion_start=0;
  837. //Use Recursion instead of loop, should be put in the MaxNumLoops in production code.
  838. if (returnNow===true) {
  839. return true;
  840. }
  841. var useRecursion=true;
  842. if (useLoop===true) {useRecursion=false;}
  843. if (document.getElementsByClassName("CJK2Fix") > loopThreshold) {
  844. useRecursion=false;
  845. }
  846. if (useRecursion===true) {
  847. if (debug_verbose===true) {console.log('Using Recursion');}
  848. labelPreCode();
  849. var allrecur=document.getElementsByClassName("CJK2Fix");
  850. for (var ir=0; ir<allrecur.length; ir++) {
  851. if ( !(allrecur[ir].classList.contains("MarksFixedE135")) ) {
  852. //Seems no need to add !(allrecur[ir].parentNode.classList.contains("CJK2Fix")). It might be faster to fix the deepest element first through looping.
  853. recursion_start=performance.now();
  854. FixPunctRecursion(allrecur[ir]);
  855. if ( (performance.now()-t_start) > timeOut ) {
  856. processedAll=false;
  857. console.log("FixCJK!: Time out. Last fixing took "+((performance.now()-recursion_start)/1000).toFixed(3)+" seconds.");
  858. console.log("FIXME:"+allrecur[ir].nodeName+"."+allrecur[ir].className);
  859. break;
  860. }
  861. }
  862. }
  863. }
  864. else {
  865. while ((FixPunct === true) && (MaxNumLoops>0)) {
  866. if ((performance.now()-t_start) > timeOut) {
  867. processedAll=false;
  868. console.log('FixCJK!: Time out, stopping now...');
  869. break;
  870. }
  871. FixPunctLoop(MaxNumLoops);
  872. MaxNumLoops--;
  873. }
  874. }
  875. }
  876. /////=====The Recursive Implementation=====/////
  877. function FixPunctRecursion(node) {
  878. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) {console.log("Checking node: "+node.nodeName+"."+node.className+"@"+node.parentNode.nodeName+":: "+node.innerHTML.slice(0,216));}
  879. var tabooedTags=SkippedTagsForMarks;
  880. var child=node.firstChild;
  881. var currHTML="";
  882. var allSubSafe=true;
  883. var node2fix=true;
  884. if (node.classList.contains("MarksFixedE135")) {
  885. return true;
  886. }
  887. if (node.nodeName.match(tabooedTags)) {
  888. //Although BODY is tabooed, this is OK because a loop is outside this recursive implementation.
  889. node.classList.remove("Safe2FixCJK\uE000");
  890. node.classList.add("MarksFixedE135");
  891. return false;
  892. }
  893. //Add lang attibute. Firefox cannot detect lang=zh automatically and it will treat CJK characters as letters if no lang=zh. For example,
  894. //the blank spaces will be streched but not the "character-spacing" if using align=justify.
  895. if (window.getComputedStyle(node,null).getPropertyValue('text-align').match(/start/) && useJustify===true) {
  896. node.style.textAlign="justify";
  897. }
  898. node.lang="zh";
  899. while (child) {
  900. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) {console.log("Checking subnode: "+child+"@"+node.nodeName);}
  901. if ( child.nodeType === 3 && !(node.nodeName.match(tabooedTags)) ) {
  902. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) {console.log("Found as Type 3 subnode: "+child.nodeName+"."+child.className+"@"+node.nodeName+":: "+child.data);}
  903. if (debug_verbose===true) {
  904. console.log("Permitted to check: "+node.nodeName+"."+node.className);
  905. }
  906. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check)) && node.nodeName.match(tabooedTags)) {
  907. console.log("ERROR: Wrong Operation on: "+node.nodeName+"."+node.className+":: "+node.textContent);
  908. console.log("ERROR: Wrong Operation because: "+child.data);
  909. }
  910. }
  911. if (child.nodeType===1 && !(child instanceof SVGElement)) {
  912. if (child.nodeName.match(tabooedTags) ) {
  913. //was like this: if (child.nodeName.match(tabooedTags) || child.classList.contains("MarksFixedE135")) {. I don't know why.
  914. child.classList.remove("Safe2FixCJK\uE000");
  915. child.classList.remove("CJK2Fix");
  916. child.classList.add("MarksFixedE135");
  917. node2fix=false;
  918. }
  919. else if (child.nodeName.match(ignoredTags)) {
  920. //Simply do nothing. Such as <math> tag.
  921. child.classList.add("Safe2FixCJK\uE000");
  922. child.classList.add("MarksFixedE135");
  923. }
  924. else if (child.classList.contains("MarksFixedE135")) {
  925. //Fixed, do nothing.
  926. }
  927. else {
  928. FixPunctRecursion(child); //This is the recursion part. The child.class might be changed. TODO: use node2fix=FixPun...?
  929. }
  930. //Test again after fixing child:
  931. if (!(child.classList.contains("Safe2FixCJK\uE000"))) {allSubSafe=false;} //\uE000 is Tux in Linux Libertine.
  932. }
  933. child=child.nextSibling;
  934. }
  935. if (allSubSafe===true && (!(node instanceof SVGElement))) {
  936. var orig_class=node.className;
  937. var CJKclasses=CJKclassList.split(',');
  938. for (var icl=0;icl<CJKclasses.length;icl++) {
  939. node.classList.remove(CJKclasses[icl]);
  940. }
  941. if (node.tagName.match(SafeTags)) {
  942. //note that Safe2FixCJK\uE000 means it is safe as a subelement. Safe2FixCJK\uE000 also means node.innerHTML is safe. However itself may have event listeners attached to it.
  943. node.className=orig_class;
  944. node.classList.add("Safe2FixCJK\uE000");
  945. }
  946. else if (node.classList.length===0 && node.id.length ===0 && !(node.nodeName.match(tabooedTags))) {
  947. //It would be crazy if add listeners just by tags.
  948. node.className=orig_class;
  949. node.classList.add("Safe2FixCJK\uE000");
  950. }
  951. else {
  952. node.className=orig_class;
  953. }
  954. }
  955. //Force to fix if Safed by User
  956. if (!(node instanceof SVGElement) && node.classList.contains("SafedByUser") ) {
  957. console.log("SAFED BY USER: "+node.nodeName+"."+node.className);
  958. allSubSafe=true;
  959. node.classList.add("CJK2Fix");
  960. node.classList.remove("MarksFixedE135");
  961. node2fix=true;
  962. //Do not add it to "Safe2FixCJK\uE000" class, otherwise re-check may destroy the listeners attached to the "outerHTML".
  963. }
  964. //Config and Filtering Done. Fix puncts if necessary.
  965. if (allSubSafe===true && node2fix===true && !(node.nodeName.match(tabooedTags)) && node.classList.contains("CJK2Fix") && !(node.classList.contains("MarksFixedE135"))) {
  966. if (debug_verbose===true) console.log("USING Recursion: "+node.nodeName+'.'+node.className);
  967. if (node.classList.contains("SafedByUser")) {
  968. if (debug_verbose===true) {console.log("SAFEDDD BY USER: "+node.nodeName+"."+node.className);}
  969. //If we need to fix the spaces then we need to keep the "SafedByUser" class.
  970. //node.classList.remove("SafedByUser");
  971. }
  972. if (debug_verbose===true) { console.log("WARNING: Danger Operation on: "+node.nodeName+"."+node.className+":: "+node.innerHTML.slice(0,216)); }
  973. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) {console.log("Checking if contain punctuations to fix");}
  974. if (node.innerHTML.match(/[“”‘’、,。:;!?)】〉》」』『「《〈【(]/m)) {
  975. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) { console.log("WARNING: Danger Operation on: "+node.nodeName+"."+node.className);}
  976. if (node.classList.contains("preCode")) {
  977. node.classList.remove("Safe2FixCJK\uE000"); //Do not performan fixing on "fully banned" tags.
  978. node.classList.remove("Space2Add");
  979. }
  980. else if (window.getComputedStyle(node, null).getPropertyValue("white-space").match(/pre/)){
  981. node.innerHTML=FixMarksInCurrHTML(node.innerHTML,false,false);
  982. }
  983. else {
  984. if (debug_re_to_check===true && (node.innerHTML.match(re_to_check))) {console.log("Now fixing --> "+node.nodeName+"."+node.className+":: "+node.innerHTML.slice(0,216));}
  985. node.innerHTML=FixMarksInCurrHTML(node.innerHTML,true,false);
  986. }
  987. }
  988. node.classList.add("MarksFixedE135");
  989. return true;
  990. }
  991. else {
  992. node.classList.add("MarksFixedE135");
  993. return true;
  994. }
  995. }
  996. ///== Each Loop in FunFixPunct() ==///
  997. function FixPunctLoop(MaxNumLoops) {
  998. SkippedTags=SkippedTagsForMarks;
  999. console.log('FixCJK!: Using loops'); //Recursion is the default implementation.
  1000. var i=0;
  1001. var puncnode=new Array('');
  1002. var puncid=new Array('');
  1003. var currpunc=0;
  1004. var numnodes=0;
  1005. var maxChildDataLength=80;
  1006. var delete_all_extra_spaces=true;
  1007. var AlsoChangeFullStop=false;
  1008. var all = document.getElementsByClassName('CJK2Fix');
  1009. numnodes=0;
  1010. puncnode=new Array('');
  1011. puncid=new Array('');
  1012. for (i = 0; i < all.length; i++) {
  1013. child = all[i].firstChild;
  1014. if_replace = false;
  1015. //Only change if current node (not child node) contains CJK characters.
  1016. //font_str = dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  1017. //fweight = window.getComputedStyle(all[i], null).getPropertyValue('font-weight');
  1018. //console.log(child.nodeType);
  1019. font_str = dequote(window.getComputedStyle(all[i], null).getPropertyValue('font-family'));
  1020. if (debug_04===true) {
  1021. if (font_str.match('monospace')) {
  1022. all[i].style.color='MidnightBlue';
  1023. }
  1024. }
  1025. while (child) {
  1026. if (child.nodeType == 3 && !(child.data.match(/^[\s]+$/mg))) {
  1027. //console.log(child.data);
  1028. //use "mg" to also match paragraphs with punctions at the end or beginning of a line.
  1029. if (all[i].nodeName.match(SkippedTags)) {
  1030. if (MaxNumLoops===0) {
  1031. console.log('FixCJK!: Skipped Change (Case 0): '+all[i].nodeName+'#'+i.toString()+': '+child.data.slice(0,Math.min(maxChildDataLength,child.data.length)));
  1032. }
  1033. if (debug_04===true) { console.log('Processing node '+i+'::'+all[i].nodeName); }
  1034. break;
  1035. }
  1036. else {
  1037. if ((child.data.match(/[“‘][ \n\t]*[\u3400-\u9FBF\u3000-\u303F\uFF00-\uFFEF]+|[\u3400-\u9FBF\u3000-\u303F\uFF00-\uFFEF][ \n\t]*[”’]/mg)) && (!(font_str.match('monospace')))) {
  1038. if (debug_04===true) {all[i].style.color='Purple';} //Punctions-->Purple;
  1039. numnodes++;
  1040. puncnode.push(i);
  1041. if (MaxNumLoops===0) {
  1042. console.log('FixCJK!: To Change (Case A): '+all[i].nodeName+'#'+i.toString()+': '+child.data.slice(0,Math.min(maxChildDataLength,child.data.length)));
  1043. }
  1044. //if (all[i].id.match(/^$/)) {all[i].id='punct'+i.toString();}
  1045. //puncid.push(all[i].id);
  1046. if_replace=true;
  1047. break;
  1048. }
  1049. else if ((delete_all_extra_spaces===true) && (child.data.match(/[\u3000-\u303F\uFF00-\uFFEF][\n]?[ ][^ |$]/mg))) {
  1050. if (debug_04===true) {all[i].style.color='Purple';} //Punctions-->Purple;
  1051. numnodes++;
  1052. puncnode.push(i);
  1053. if (MaxNumLoops===0) {
  1054. console.log('FixCJK!: To Change (Case B): '+all[i].nodeName+'#'+i.toString()+': '+child.data.slice(0,Math.min(maxChildDataLength,child.data.length)));
  1055. }
  1056. //if (all[i].id.match(/^$/)) {all[i].id='punct'+i.toString();}
  1057. //puncid.push(all[i].id);
  1058. break;
  1059. }
  1060. else if ((AlsoChangeFullStop===true) && child.data.match(/[?!:;、,。]/mg)) {
  1061. if (MaxNumLoops===0) {
  1062. console.log('FixCJK!: To Change (Case C): '+all[i].nodeName+'#'+i.toString()+': '+child.data.slice(0,Math.min(maxChildDataLength,child.data.length)));
  1063. }
  1064. numnodes++;
  1065. puncnode.push(i);
  1066. //if (all[i].id.match(/^$/)) {all[i].id='punct'+i.toString();}
  1067. //puncid.push(all[i].id);
  1068. if_replace=true;
  1069. break;
  1070. }
  1071. else if (child.data.match(/[\u3000-\u303F\uFF00-\uFFEF][\u3000-\u303F\uFF00-\uFFEF]/mg)) {
  1072. if (MaxNumLoops===0) {
  1073. console.log('FixCJK!: To Change (Case D): '+all[i].nodeName+'#'+i.toString()+': '+child.data.slice(0,Math.min(maxChildDataLength,child.data.length)));
  1074. }
  1075. numnodes++;
  1076. puncnode.push(i);
  1077. if_replace=true;
  1078. break;
  1079. }
  1080. else {
  1081. }
  1082. }
  1083. }
  1084. child = child.nextSibling;
  1085. }
  1086. if (if_replace === false) {
  1087. all[i].classList.add("MarksFixedE135"); //one can not remove CJKFixed classname now because index i is "live".
  1088. if (debug_04===true) {console.log(all[i].nodeName+'::'+all[i].className);}
  1089. }
  1090. else {
  1091. if (debug_04===true) {console.log(all[i].nodeName+'::'+all[i].innerHTML);}
  1092. }
  1093. }
  1094. if ((performance.now()-t_start) > timeOut) {
  1095. processedAll=false;
  1096. console.log('FixCJK!: Time out, stopping now...');
  1097. return false;
  1098. }
  1099. if (numnodes===0) {
  1100. FixPunct=false;
  1101. return false;
  1102. }
  1103. if (debug_verbose===true) {console.log('FixCJK!: '+MaxNumLoops.toString()+' (or less) loop(s) left.');}
  1104. if (debug_verbose===true) {console.log('FixCJK!: '+numnodes.toString()+' element(s) to change.');}
  1105. currpunc=0;
  1106. //var kern_dq_right='-1px';
  1107. //var kern_dq_right_tail='-5px';
  1108. while(numnodes>0) {
  1109. if ((performance.now()-t_start) > timeOut) {
  1110. processedAll=false;
  1111. console.log('FixCJK!: Time out, some elements are left unchanged...');
  1112. break;
  1113. }
  1114. numnodes--;
  1115. currpunc=puncnode.pop();
  1116. if (MaxNumLoops===0) {
  1117. console.log('FixCJK!: currpunc='+currpunc.toString()+': '+all[currpunc].nodeName+': '+currHTML.slice(0,Math.min(maxChildDataLength,currHTML.length)));
  1118. }
  1119. if (debug_04===true) {console.log(currpunc);}
  1120. //console.log(currpunc.toString()+":: "+all[currpunc].outerHTML);
  1121. all[currpunc].innerHTML=FixMarksInCurrHTML(all[currpunc].innerHTML,true,false);
  1122. all[currpunc].classList.add("MarksFixedE135"); //We cannot Remove the "CJK2Fix" class here because the index i is "live".
  1123. }
  1124. }
  1125. ///==Fix punct in a currHTML===///
  1126. function FixMarksInCurrHTML(currHTML,delete_all_extra_spaces,AlsoChangeFullStop) {
  1127. //“<-->\u201C, ”<-->\u201D
  1128. //‘<-->\u2018, ’<-->\u2019
  1129. var changhai_style=false;
  1130. var Squeezing=true;
  1131. var SqueezeInd=true;
  1132. var tmp_str='';
  1133. var FixMarks_start=performance.now();
  1134. if (changhai_style===true) {
  1135. //Simply inserting blanck space, like changhai.org.
  1136. currHTML=currHTML.replace(/([\u3400-\u9FBF\u3000-\u303F\uFF00-\uFFEF]?)([“‘])([\u3400-\u9FBF\u3000-\u303F\uFF00-\uFFEF]+)/g,'$1 $2$3');
  1137. currHTML=currHTML.replace(/([\u3400-\u9FBF\u3000-\u303F\uFF00-\uFFEF])([”’])([^,, ])/g,'$1$2 $3');
  1138. if (debug_04===true) {console.log(currHTML);}
  1139. all[currpunc].innerHTML=currHTML;
  1140. return true;
  1141. }
  1142. //==We need to protect the quotation marks within tags first===//
  1143. // \uE862,\uE863 <==> ‘,’
  1144. // \uE972,\uE973 <==> “,”
  1145. while (currHTML.match(/<[^>]*[“”‘’、,。:;!?)】〉》」』『「《〈【(][^<]*>/m)) {
  1146. currHTML=currHTML.replace(/(<[^>]*)‘([^<]*>)/mg,'$1\uE862$2');
  1147. currHTML=currHTML.replace(/(<[^>]*)’([^<]*>)/mg,'$1\uE863$2');
  1148. currHTML=currHTML.replace(/(<[^>]*)“([^<]*>)/mg,'$1\uE972$2');
  1149. currHTML=currHTML.replace(/(<[^>]*)”([^<]*>)/mg,'$1\uE973$2');
  1150. currHTML=currHTML.replace(/(<[^>]*)、([^<]*>)/mg,'$1\uEA01$2');
  1151. currHTML=currHTML.replace(/(<[^>]*),([^<]*>)/mg,'$1\uEA02$2');
  1152. currHTML=currHTML.replace(/(<[^>]*)。([^<]*>)/mg,'$1\uEA03$2');
  1153. currHTML=currHTML.replace(/(<[^>]*):([^<]*>)/mg,'$1\uEA04$2');
  1154. currHTML=currHTML.replace(/(<[^>]*);([^<]*>)/mg,'$1\uEA05$2');
  1155. currHTML=currHTML.replace(/(<[^>]*)!([^<]*>)/mg,'$1\uEA06$2');
  1156. currHTML=currHTML.replace(/(<[^>]*)?([^<]*>)/mg,'$1\uEA07$2');
  1157. currHTML=currHTML.replace(/(<[^>]*))([^<]*>)/mg,'$1\uEA08$2');
  1158. currHTML=currHTML.replace(/(<[^>]*)】([^<]*>)/mg,'$1\uEA09$2');
  1159. currHTML=currHTML.replace(/(<[^>]*)〉([^<]*>)/mg,'$1\uEA10$2');
  1160. currHTML=currHTML.replace(/(<[^>]*)》([^<]*>)/mg,'$1\uEA11$2');
  1161. currHTML=currHTML.replace(/(<[^>]*)」([^<]*>)/mg,'$1\uEA12$2');
  1162. currHTML=currHTML.replace(/(<[^>]*)』([^<]*>)/mg,'$1\uEA13$2');
  1163. currHTML=currHTML.replace(/(<[^>]*)『([^<]*>)/mg,'$1\uEA14$2');
  1164. currHTML=currHTML.replace(/(<[^>]*)「([^<]*>)/mg,'$1\uEA15$2');
  1165. currHTML=currHTML.replace(/(<[^>]*)《([^<]*>)/mg,'$1\uEA16$2');
  1166. currHTML=currHTML.replace(/(<[^>]*)〈([^<]*>)/mg,'$1\uEA17$2');
  1167. currHTML=currHTML.replace(/(<[^>]*)【([^<]*>)/mg,'$1\uEA18$2');
  1168. currHTML=currHTML.replace(/(<[^>]*)(([^<]*>)/mg,'$1\uEA19$2');
  1169. }
  1170. var time2protect=performance.now()-FixMarks_start;
  1171. //Now let's fix the punctions.
  1172. //First we need to fix the "reverse-paired" punctuations.
  1173. var fixpair=false; //the current code has problems if unpaired quotation marks are present.
  1174. var fixpair_timeout = noBonusTimeout; //Don't spend too much time on this "bonus" function.
  1175. var fixpair_start=performance.now();
  1176. if ( currHTML.length > noBonusLength ) {fixpair=false;}
  1177. if (debug_re_to_check===true && (currHTML.match(re_to_check))) {console.log("Reversing "+currHTML);}
  1178. if (fixpair===true) { //[\w,./<>?;:[]\{}|`~!@#$%^&*()_+-=]*
  1179. var revpaired=/(^[^\u201C\u201D]?(?:[^\u201C\u201D]*\u201C[^\u201C\u201D]*\u201D)*[^\u201C\u201D]*)\u201D([^\u201C\u201D]{2,})\u201C/;
  1180. while (currHTML.match(revpaired) && (performance.now()-fixpair_start)<fixpair_timeout ) {
  1181. if (debug_re_to_check===true && currHTML.match(re_to_check)) {console.log("Pair reversed: "+(performance.now()-t_start).toString());}
  1182. currHTML=currHTML.replace(revpaired,'$1\u201C$2\u201D');
  1183. }
  1184. }
  1185. var fixpair_stop=performance.now()-fixpair_start;
  1186. var paired_start=performance.now();
  1187. //Find and preserve paired Latin marks.
  1188. var paired=/(\u201C)([^\u3000-\u303F\u3400-\u9FBF\uE000-\uED00\uFF00-\uFFEF]*)(\u201D)/mg;
  1189. while (currHTML.match(paired)) {
  1190. if (debug_re_to_check===true && currHTML.match(re_to_check)) console.log("Quotation mark pair found@"+currHTML);
  1191. currHTML=currHTML.replace(paired,'\uEC1C$2\uEC1D');
  1192. }
  1193. //Find paired CJK marks. Seems like O(n^2) without the "g" modifier?
  1194. paired=/(\u201C)([^\u201D]*[\u3400-\u9FBF][^\u201D]*)(\u201D)/mg;
  1195. while (currHTML.match(paired)) {
  1196. currHTML=currHTML.replace(paired,'\uEB1C$2\uEB1D');
  1197. }
  1198. var paired_stop=performance.now()-paired_start;
  1199. //"unpaired \u201C or \u201D", not just use at the beginning of a paragraph.
  1200. var unpaired_timeout = noBonusTimeout; //not so important, therefore cannot spend too much time here.
  1201. var unpaired_start=performance.now();
  1202. var unpaired=/\u201C([^\u201D\u3400-\u9FBF]{0,3}[\u3400-\u9FBF][^\u201C\u201D]*$)/m;
  1203. while ( currHTML.length< noBonusLength && currHTML.match(unpaired) && (performance.now()-unpaired_start)<unpaired_timeout) {
  1204. currHTML=currHTML.replace(unpaired,'\uEB1C$1'); //We need the greedy method to get the longest match.
  1205. }
  1206. unpaired=/(^[^\u201C\u201D]*[\u3400-\u9FBF][^\u201D\u3400-\u9FBF]{0,3})\u201D/m;
  1207. while ( currHTML.length< noBonusLength && currHTML.match(unpaired) && (performance.now()-unpaired_start)<unpaired_timeout) {
  1208. currHTML=currHTML.replace(unpaired,'$1\uEB1D'); //We need the greedy method to get the longest match.
  1209. }
  1210. //For single quotations:
  1211. var paired_single_start=performance.now();
  1212. paired=/(\u2018)([^\u2019]*[\u3000-\u303F\u3400-\u9FBF\uFF00-\uFFEF][^\u2019]*)(\u2019)/mg;
  1213. while (currHTML.match(paired)) {
  1214. currHTML=currHTML.replace(paired,'\uEB18$2\uEB19');
  1215. }
  1216. var paired_single_stop=performance.now()-paired_single_start;
  1217. //"unpaired ‘ (\u2018)", not just use at the beginning of a paragraph.
  1218. unpaired_start=performance.now();
  1219. unpaired=/\u2018([^\u201D\u3400-\u9FBF]{0,3}[\u3400-\u9FBF][^\u2018\u2019]*$)/m;
  1220. while ( currHTML.length< noBonusLength && currHTML.match(unpaired) && (performance.now()-unpaired_start)<unpaired_timeout) {
  1221. currHTML=currHTML.replace(unpaired,'\uEB18$1'); //We need the greedy method to get the longest match.
  1222. }
  1223. //CJK’, otherwise words like it's might be affected.
  1224. unpaired=/(^[^\u2018\u2019]*[\u3400-\u9FBF])\u2019/m;
  1225. while ( currHTML.length< noBonusLength && currHTML.match(unpaired) && (performance.now()-unpaired_start)<unpaired_timeout) {
  1226. currHTML=currHTML.replace(unpaired,'$1\uEB19'); //We need the greedy method to get the longest match.
  1227. }
  1228. ///=== Unicode Shifting Ends ===///
  1229. var time2shift=performance.now()-FixMarks_start-time2protect;
  1230. //Remove extra spaces if necessary
  1231. if (delete_all_extra_spaces===true) {
  1232. //For changhai.org and similar sites.
  1233. currHTML=currHTML.replace(/([、,。:;!?)】〉》」』\uEB1D\uEB19]+)(?:[\s]|&nbsp;){0,2}/g,'$1');
  1234. currHTML=currHTML.replace(/([^\s])(?:[\s]|&nbsp;){0,2}([『「《〈【(\uEB1C\uEB18]+)/g,'$1$2');
  1235. }
  1236. else {
  1237. //Delete at most 1 spaces before and after because of the wider CJK marks.
  1238. currHTML=currHTML.replace(/([\uEB1D\uEB19])[ ]?/mg,'$1');
  1239. currHTML=currHTML.replace(/[ ]?([\uEB1C\uEB18])/mg,'$1');
  1240. }
  1241. ///--Group Left: [、,。:;!?)】〉》」』\uEB1D\uEB19] //Occupies the left half width.
  1242. ///--Group Right:[『「《〈【(\uEB1C\uEB18] //Occupies the right half width.
  1243. ///=====Use \uE211 as the calss name for TWO-PUNCT RULES====//
  1244. ///===Do not use the "g" modefier because we are using loops===//
  1245. var reLL=/([\n]?[、,。:;!?)】〉》」』\uEB1D\uEB19][\n]?)([、,。:;!?)】〉》」』\uEB1D\uEB19])/m;
  1246. var reLR=/([\n]?[、,。:;!?)】〉》」』\uEB1D\uEB19][\n]?)([『「《〈【(\uEB1C\uEB18])/m;
  1247. var reRR=/([\n]?[『「《〈【(\uEB1C\uEB18][\n]?)([『「《〈【(\uEB1C\uEB18])/m;
  1248. var reRL=/([\n]?[『「《〈【(\uEB1C\uEB18][\n]?)([、,。:;!?)】〉》」』\uEB1D\uEB19])/m;
  1249. var sqz_start=performance.now();
  1250. while (currHTML.match(/[、,。:;!?)】〉》」』\uEB1D\uEB19『「《〈【(\uEB1C\uEB18]{2,}/m) && (performance.now()-sqz_start)<sqz_timeout) {
  1251. if (currHTML.match(reLL)) {
  1252. //--TWO PUNCTS: {Left}{Left}--//
  1253. tmp_str='<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;letter-spacing:'+kern_consec_ll+';">$1</span>$2';
  1254. currHTML=currHTML.replace(reLL,tmp_str);
  1255. }
  1256. else if (currHTML.match(reLR)) {
  1257. //--TWO PUNCTS: {Left}{Right}--//
  1258. tmp_str='<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;letter-spacing:'+kern_consec_lr+';">$1</span>$2';
  1259. currHTML=currHTML.replace(reLR,tmp_str);
  1260. }
  1261. else if (currHTML.match(reRR)) {
  1262. //--TWO PUNCTS: {Right}{Right}--//
  1263. tmp_str='<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;letter-spacing:'+kern_consec_rr+';">$1</span>$2';
  1264. currHTML=currHTML.replace(reRR,tmp_str);
  1265. }
  1266. else if (currHTML.match(reRL)) {
  1267. //--TWO PUNCTS: no letter-spacing adjustment for {Right}-{Left}--//
  1268. currHTML=currHTML.replace(reRL,'$1<wbr>$2');
  1269. }
  1270. else {
  1271. console.log("FIXME: current combination of punctuations has not been considered!");
  1272. break;
  1273. }
  1274. }
  1275. ///---Done with conseqtive puncts--///
  1276. if (debug_04===true) {all[currpunc].style.color="Pink";}
  1277. if ((AlsoChangeFullStop===true) && (currHTML.match(/[?!:;、,。]/mg))) {
  1278. currHTML=currHTML.replace(/([?!:;、,。])/mg,'<span class="\uE985" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:'+dequote(CJKPunct)+';">$1</span>');
  1279. }
  1280. if (SqueezeInd===true) {
  1281. //Do not squeeze the last punctuation marks in a paragraph. Too risky.
  1282. currHTML=currHTML.replace(/([<[^\uE211]*>]|[^><])([『「《〈【(\uEB1C\uEB18])/mg,'$1<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;margin-left:-0.2em;">$2</span>');
  1283. //But the first punctuation marks in a paragraph seems OK.
  1284. currHTML=currHTML.replace(/^([『「《〈【(\uEB1C\uEB18])/mg,'<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;margin-left:-0.3em;">$1</span>');
  1285. currHTML=currHTML.replace(/([、,。:;!?)】〉》」』\uEB1D\uEB19])([<[^\uE211]*>]|[^><])/mg,'<span class="\uE211" style="display:inline;padding-left:0px;padding-right:0px;float:none;margin-right:-0.2em;">$1</span>$2');
  1286. }
  1287. ///=== Squeezing Ends ===///
  1288. var time2squeeze=performance.now()-FixMarks_start-time2shift-time2protect;
  1289. ///=== Change the protected punctuations in tags back==///
  1290. currHTML=currHTML.replace(/\uE862/mg,'\u2018');
  1291. currHTML=currHTML.replace(/\uE863/mg,'\u2019');
  1292. currHTML=currHTML.replace(/\uE972/mg,'\u201C');
  1293. currHTML=currHTML.replace(/\uE973/mg,'\u201D');
  1294. currHTML=currHTML.replace(/\uEA01/mg,'、');
  1295. currHTML=currHTML.replace(/\uEA02/mg,',');
  1296. currHTML=currHTML.replace(/\uEA03/mg,'。');
  1297. currHTML=currHTML.replace(/\uEA04/mg,':');
  1298. currHTML=currHTML.replace(/\uEA05/mg,';');
  1299. currHTML=currHTML.replace(/\uEA06/mg,'!');
  1300. currHTML=currHTML.replace(/\uEA07/mg,'?');
  1301. currHTML=currHTML.replace(/\uEA08/mg,')');
  1302. currHTML=currHTML.replace(/\uEA09/mg,'】');
  1303. currHTML=currHTML.replace(/\uEA10/mg,'〉');
  1304. currHTML=currHTML.replace(/\uEA11/mg,'》');
  1305. currHTML=currHTML.replace(/\uEA12/mg,'」');
  1306. currHTML=currHTML.replace(/\uEA13/mg,'』');
  1307. currHTML=currHTML.replace(/\uEA14/mg,'『');
  1308. currHTML=currHTML.replace(/\uEA15/mg,'「');
  1309. currHTML=currHTML.replace(/\uEA16/mg,'《');
  1310. currHTML=currHTML.replace(/\uEA17/mg,'〈');
  1311. currHTML=currHTML.replace(/\uEA18/mg,'【');
  1312. currHTML=currHTML.replace(/\uEA19/mg,'(');
  1313. ///////==== Change quotation marks back =====/////
  1314. currHTML=currHTML.replace(/\uEC1C/mg,'\u201C');
  1315. currHTML=currHTML.replace(/\uEC1D/mg,'\u201D');
  1316. currHTML=currHTML.replace(/\uEB1C/mg,'<span class="\uE985" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:'+dequote(CJKPunct)+';">\u201C</span>');
  1317. currHTML=currHTML.replace(/\uEB1D/mg,'<span class="\uE985" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:'+dequote(CJKPunct)+';">\u201D</span>');
  1318. currHTML=currHTML.replace(/\uEB18/mg,'<span class="\uE985" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:'+dequote(CJKPunct)+';">\u2018</span>');
  1319. currHTML=currHTML.replace(/\uEB19/mg,'<span class="\uE985" style="display:inline;padding-left:0px;padding-right:0px;float:none;font-family:'+dequote(CJKPunct)+';">\u2019</span>');
  1320. ///=== Replacing and Restoring Ends ===///
  1321. var time2replace=performance.now()-FixMarks_start-time2squeeze-time2shift-time2protect;
  1322. if ( (performance.now()-FixMarks_start)>200 ) {
  1323. console.log("FIXME: String Operation Too Slow: "+(performance.now()-FixMarks_start).toFixed(0)+" ms.");
  1324. console.log("Protect: "+time2protect.toFixed(0)+" ms.");
  1325. console.log("Shift: "+time2shift.toFixed(0)+" ms.");
  1326. console.log(" ----->rev: "+fixpair_stop.toFixed(0)+" ms.");
  1327. console.log(" ----->\u201C,\u201D: "+paired_stop.toFixed(0)+" ms.");
  1328. console.log(" ----->\u2018,\u2019: "+paired_single_stop.toFixed(0)+" ms.");
  1329. console.log("Squeeze: "+time2squeeze.toFixed(0)+" ms.");
  1330. console.log("Replace: "+time2replace.toFixed(0)+" ms.");
  1331. console.log("String(Length): "+currHTML.slice(0,216)+"...("+currHTML.length+")");
  1332. }
  1333. return currHTML;
  1334. }
  1335. ///===The following loop is to solve the lazy loading picture problem on zhihu.com===///
  1336. //No need if using the recursive implementation. However, it is still needed if the "forced fixing" is triggered.
  1337. function FixLazy() {
  1338. var all=document.getElementsByTagName('img');
  1339. for (var i=0;i<all.length;i++) {
  1340. if (all[i].hasAttribute('data-actualsrc')) {
  1341. all[i].src=all[i].getAttribute('data-actualsrc');
  1342. }
  1343. }
  1344. }
  1345. }
  1346. ) ();