MRLookup

Extract BibTeX data automatically and modify BibTeX Key to AUTHOR_YEAR_TITLE.

当前为 2018-05-28 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @namespace vanabeljs
  3. // @name MRLookup
  4. // @description Extract BibTeX data automatically and modify BibTeX Key to AUTHOR_YEAR_TITLE.
  5. // @description:ZH-CN 自动提取BibTeX数据并修改BibTeX关键字为AUTHOR_YEAR_TITLE的形式.
  6. // @copyright 2017, Van Abel (https://home.vanabel.cn)
  7. // @license OSI-SPDX-Short-Identifier
  8. // @version 1.5.2
  9. // @include https://www.ams.org/mrlookup
  10. // @include https://mathscinet.ams.org/mrlookup
  11. // @include http://mathscinet.ams.org/mrlookup
  12. // @include http://www.ams.org/mathscinet/search/publications.html?fmt=bibtex*
  13. // @include http://mathscinet.ams.org/mathscinet/search/publications.html?fmt=bibtex*
  14. // @include https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=bibtex*
  15. // @include http://ams2.math.uni-bielefeld.de/mathscinet/search/publications.html?fmt=bibtex*
  16. // @include http://mathscinet.ams.org/mathscinet/clipboard.html
  17. // @grant GM_setClipboard
  18. // ==/UserScript==
  19.  
  20. // ==OpenUserJS==
  21. // @author Van Abel
  22. // ==/OpenUserJS==
  23.  
  24. /**
  25. *
  26. * Please begin typing or paste your Userscript now.
  27. *
  28. */
  29.  
  30. /*The first word to ignore in title*/
  31. var IgnoreStringInTitle = [
  32. 'a',
  33. 'an',
  34. 'on',
  35. 'the',
  36. 'another'
  37. ];
  38. function IgnoreStringToRegExp(arr) {
  39. var regexp = '^(';
  40. var arrlen = arr.length;
  41. for (var i = 0; i < arrlen; i++) {
  42. if (i == arrlen - 1) {
  43. regexp += '(' + arr[i] + ')';
  44. } else {
  45. regexp += '(' + arr[i] + ')|';
  46. }
  47. }
  48. regexp += ')\\s+';
  49. return regexp;
  50. }//console.log(IgnoreStringToRegExp(IgnoreStringInTitle));
  51. /*split bibdata*/
  52.  
  53. function parseBibTexLine(text) {
  54. var m = text.match(/^\s*(\S+)\s*=\s*/);
  55. if (!m) {
  56. console.log('line: "' + text + '"');
  57. throw new Error('Unrecogonised line format');
  58. }
  59. var name = m[1];
  60. var search = text.slice(m[0].length);
  61. var re = /[\n\r,{}]/g;
  62. var braceCount = 0;
  63. var length = m[0].length;
  64. do {
  65. m = re.exec(search);
  66. if (m[0] === '{') {
  67. braceCount++;
  68. } else if (m[0] === '}') {
  69. if (braceCount === 0) {
  70. throw new Error('Unexpected closing brace: "}"');
  71. }
  72. braceCount--;
  73. }
  74. } while (braceCount > 0);
  75. return {
  76. field: name,
  77. value: search.slice(0, re.lastIndex),
  78. length: length + re.lastIndex + m[0].length
  79. };
  80. }
  81. function parseBibTex(text) {
  82. var m = text.match(/^\s*@([^{]+){([^,\n]+)[,\n]/);
  83. if (!m) {
  84. throw new Error('Unrecogonised header format');
  85. }
  86. var result = {
  87. typeName: m[1].trim(),
  88. citationKey: m[2].trim()
  89. };
  90. text = text.slice(m[0].length).trim();
  91. while (text[0] !== '}') {
  92. var pair = parseBibTexLine(text);
  93. result[pair.field] = pair.value;
  94. text = text.slice(pair.length).trim();
  95. }
  96. return result;
  97. }
  98. var els = document.getElementsByTagName('pre');
  99. for (var i = 0, l = els.length; i < l; i++) {
  100. var el = els[i];
  101. var bibdata = parseBibTex(el.innerHTML);
  102. /*Extract author*/
  103. //var aureg = new RegExp('(?:(\\w+),(?:[^,}](?!and))+)+', 'g');
  104. //console.log(audata.match(aureg));
  105. var audata = bibdata.AUTHOR;
  106. //clean author include \v+space and $\cmd$
  107. audata = audata.replace(/\\[a-z]\s|(\$.*?\$)|(\\")/gi, '');
  108. //console.log(audata);
  109. //remove doublw , in one name
  110. audata = audata.replace(/(?:(?:and)?(\w+,\s*\w+\.)(,.*?)?)+/g, '$1');
  111. //console.log(audata);
  112. //extract first name
  113. audata = audata.replace(/(?:(\w+),(?:[^,}](?!and))+)+/g, '$1');
  114. //console.log(audata);
  115. var cleanreg = new RegExp('\\s*and\\s*|[{}\\\\\'\\\\\\"]', 'g');
  116. //console.log(cleanreg);
  117. var au = audata.replace(cleanreg, '');
  118. //console.log(au);
  119. /*Extract Year*/
  120. var year = bibdata.YEAR.replace(cleanreg, '');
  121. //console.log(year);
  122. /*Extract Title*/
  123. var title = bibdata.TITLE;
  124. //clean \cmd+space
  125. title = title.replace(/\\\w+\s*/g, '');
  126. var titleclean = new RegExp(IgnoreStringToRegExp(IgnoreStringInTitle), 'gi');
  127. //we don't need to clean and, but we need to clean -,{,},: and ,
  128. //var cleanreg = new RegExp('[{}:-]|,|(\\r\\s*)|(\\n\\s*)', 'g');
  129. title = title.replace(/[\\\'\"{}:-]|,|(\r\s*)|(\n\s*)/g, '');
  130. //var cleandollar = new RegExp('\\$.*?\\$', 'g');
  131. title = title.replace(/\$.*?\$/g, '');
  132. var titlefinal = title.replace(titleclean, '');
  133. while (title != titlefinal) {
  134. //recursively remove the ignore word
  135. title = titlefinal;
  136. titlefinal = title.replace(titleclean, '');
  137. } //var titlereg = new RegExp('(\\w+)(\\s*\\w+)*', 'gi');
  138.  
  139. title = title.replace(/(\w+).*$/gi, '$1').replace(/\s*/g, '');
  140. var bibkey = au + year + title;
  141. //console.log(bibkey);
  142. var bibkeyreg = new RegExp('MR\\d+', 'gi');
  143. el.innerHTML = el.innerHTML.replace(bibkeyreg, bibkey);
  144. /*click to copy to clipboard*/
  145. el.addEventListener('click', function () {
  146. //Actual new line break
  147. //var linebreakreg = new RegExp('\\r|\\n', 'g');
  148. var bibdata_lb = this.innerHTML.replace(/\r|\n/g, '\r\n').replace(/^\r\n/g, '').replace(/\s*$/g, '\r\n').replace(/\r\n\r\n/g, '\r\n');
  149. GM_setClipboard(bibdata_lb);
  150. });
  151. }