Extract Special Characters

Extract special character data from ocr-gt-tools wiki

目前為 2016-04-19 提交的版本,檢視 最新版本

  1. // ==UserScript==
  2. // @name Extract Special Characters
  3. // @namespace http://github.com/kba/
  4. // @include https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Special-Characters
  5. // @version 1
  6. // @require https://code.jquery.com/jquery-2.2.3.min.js
  7. // @require https://cdnjs.cloudflare.com/ajax/libs/z-schema/3.17.0/ZSchema-browser.js
  8. // @grant GM_addStyle
  9. // @description Extract special character data from ocr-gt-tools wiki
  10. // ==/UserScript==
  11. /*globals GM_addStyle */
  12. /*globals ZSchema */
  13.  
  14. var SCHEMA = {
  15. 'type': 'object',
  16. "additionalProperties": false,
  17. 'properties': {
  18. 'sample': {
  19. 'type': 'array',
  20. 'items': {
  21. 'type': 'string',
  22. 'pattern': '^<a.*<img.*',
  23. }
  24. },
  25. 'recognition': {
  26. 'type': 'string'
  27. },
  28. 'baseLetter': {
  29. 'type': 'array'
  30. },
  31. 'name': {
  32. 'type': 'object'
  33. },
  34. 'notes': {
  35. 'type': 'object'
  36. },
  37. 'shortcutLinux': {
  38. 'type': 'string',
  39. 'pattern': '^<kbd',
  40. },
  41. 'shortcutWindows': {
  42. 'type': 'string',
  43. 'pattern': '^<kbd',
  44. },
  45. },
  46. 'required': ['sample', 'recognition', 'baseLetter'],
  47. };
  48.  
  49. function scrapeSpecialGlyphs() {
  50. var glyphJson = {};
  51. var validator = new ZSchema();
  52. var h2s = $(".markdown-body h2").get();
  53. for (var i = 0; i < h2s.length; i++) {
  54. var $h2 = $(h2s[i]);
  55. var glyphDesc = {};
  56. var glyphId = $h2.text().trim();
  57. glyphJson[glyphId] = glyphDesc;
  58. var lis = $h2.next('ul').find('li').get();
  59. for (var j = 0; j < lis.length; j++) {
  60. var liHtml = $(lis[j]).html();
  61. var colonIndex = liHtml.indexOf(':');
  62. var varName = liHtml.substring(0, colonIndex)
  63. .toLowerCase()
  64. .replace(/[^a-z0-9]+/g, '_')
  65. .replace(/_([a-z])/g, function(orig, ch) {
  66. return ch.toUpperCase();
  67. })
  68. .replace(/^_|_$/, '');
  69. var rawValue = liHtml.substring(colonIndex + 1).trim();
  70. if (varName === 'baseLetter' || varName === 'sample') {
  71. glyphDesc[varName] = rawValue.split(/\s*;\s*/);
  72. } else if (/[A-Z][a-z]$/.test(varName)) {
  73. var lang = varName.substr(-2).toLowerCase();
  74. varName = varName.substring(0, varName.length - 2);
  75. glyphDesc[varName] = glyphDesc[varName] || {};
  76. glyphDesc[varName][lang] = rawValue;
  77. } else {
  78. glyphDesc[varName] = rawValue;
  79. }
  80. }
  81. if (!validator.validate(glyphDesc, SCHEMA)) {
  82. showError(glyphId, validator.getLastErrors());
  83. }
  84. }
  85. return glyphJson;
  86. }
  87.  
  88. GM_addStyle(
  89. `
  90. pre.glyph-error
  91. {
  92. background: #a00;
  93. color: white;
  94. white-space: pre-wrap;
  95. }
  96. div#glyph-bar
  97. {
  98. font-size: x-large;
  99. position:fixed;
  100. bottom: 0;
  101. height: 48px;
  102. border: 2px solid black;
  103. background: white;
  104. width: 100%;
  105. }
  106. div#glyph-bar .left * { float: left; }
  107. div#glyph-bar .right * { float: right; }
  108. div#glyph-bar *
  109. {
  110. height: 100%;
  111. font-size: x-large;
  112. }
  113. div#glyph-bar input[type='text']
  114. {
  115. font-family: "Garamond", "Bookman", serif;
  116. }
  117. div#glyph-invalid
  118. {
  119. display: none;
  120. background: #900;
  121. color: white !important;
  122. max-width: 50%;
  123. overflow-y: scroll;
  124. }
  125. div#glyph-invalid a
  126. {
  127. display: inline-block;
  128. color: white !important;
  129. float: none;
  130. margin: 0 2px;
  131. }
  132. `
  133. );
  134. $("body").append(
  135. `
  136. <div id="glyph-bar">
  137. <div class="left">
  138. <label for="glyph-input" style="font-family: monospace; font-size: 30px">TRY&gt;</label>
  139. <input id="glyph-input" type="text"/>
  140. <div id="glyph-propose">&nbsp;</div>
  141. </div>
  142. <div class="right">
  143. <div id="glyph-invalid">!! INVALID </div>
  144. <button id="glyph-schema">Schema</button>
  145. <button id="glyph-json">JSON</button>
  146. </div>
  147. </div>
  148. `);
  149.  
  150. function escapeHTML(str) {
  151. var entityMap = {
  152. "&": "&amp;",
  153. "<": "&lt;",
  154. ">": "&gt;",
  155. '"': '&quot;',
  156. "'": '&#39;',
  157. "/": '&#x2F;'
  158. };
  159. return String(str).replace(/[&<>"'\/]/g, function(s) {
  160. return entityMap[s];
  161. });
  162. }
  163.  
  164. function showError(glyphId, err) {
  165. $(`h2:contains('${glyphId}')`).append(
  166. `<pre class='glyph-error'>${escapeHTML(JSON.stringify(err, null, 2))}</pre>`);
  167. $("#glyph-invalid").show().append(
  168. `<a href="#${glyphId}">[${ $("#glyph-invalid a").length + 1}]</a>`);
  169. }
  170.  
  171. function clearProposals() {
  172. $('#glyph-propose').empty();
  173. }
  174.  
  175. function showProposals($input, from, to) {
  176. clearProposals();
  177. var $propose = $('#glyph-propose');
  178. var val = $input.val();
  179. var chosen = val.substring(from, to);
  180. console.log(chosen, from, to);
  181. $.each(window.glyphJson, function() {
  182. var glyphDesc = this;
  183. if (glyphDesc.baseLetter.indexOf(chosen) === -1) {
  184. return;
  185. }
  186. $.each(glyphDesc.sample, function(i, sample) {
  187. $propose.append($(sample)
  188. .on('click', function(e) {
  189. e.preventDefault();
  190. $input.val(val.substr(0, from) + glyphDesc.recognition + val.substr(to));
  191. }));
  192. });
  193. });
  194. }
  195.  
  196. $(function() {
  197. window.glyphJson = window.glyphJson || scrapeSpecialGlyphs();
  198. $("#glyph-input").on('keyup', function(e) {
  199. var $input = $("#glyph-input");
  200. var from = $input[0].selectionStart;
  201. var to = $input[0].selectionEnd;
  202. if (from == to) {
  203. from -= 1;
  204. }
  205. showProposals($input, from, to);
  206. });
  207. $("#glyph-schema").on('click', function() {
  208. window.prompt("Ctrl-C to copy schema", JSON.stringify(SCHEMA, null, 2));
  209. });
  210. $("#glyph-json").on('click', function() {
  211. window.prompt("Ctrl-C to copy JSON data", JSON.stringify(SCHEMA, null, 2));
  212. });
  213. });