DSA Wiki Crawler

Crawling the Wiki and refactor it the good way

当前为 2017-02-20 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name DSA Wiki Crawler
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.1.2
  5. // @description Crawling the Wiki and refactor it the good way
  6. // @author David Mahl
  7. // @match http://www.ulisses-regelwiki.de/index.php/sonderfertigkeiten.html
  8. // @grant none
  9. // @require http://code.jquery.com/jquery-latest.js
  10. // @require https://cdn.jsdelivr.net/lodash/4.14.2/lodash.min.js
  11. // ==/UserScript==
  12.  
  13. (function() {
  14. 'use strict';
  15. var stop= false,
  16. storage = {},
  17. CONST = {
  18. 'CONTENT' : {
  19. 'PUBLIKATION':'Publikation:',
  20. 'ASP':'AsP-Kosten:',
  21. 'AP':'AP-Wert:',
  22. 'ERSCHWERNIS':'Erschwernis:',
  23. 'VORAUSSETZUNGEN':'Voraussetzungen:',
  24. 'KAMPFTECHNIKEN':'Kampftechniken:'
  25. },
  26. 'GLOBAL' : {
  27. 'CUTPARAGRAPH': 4,
  28. 'CUTSTRONG': 10,
  29. 'ARTICLEQUERY':'.mod_article .ce_text',
  30. 'ITEMQUERY':'nav table a[role="menuitem"]'
  31. }
  32. },
  33. app = {
  34. getAllNavigationEntrys: function(result, entryPoint) {
  35. var $itemQuery = $(CONST.GLOBAL.ITEMQUERY),
  36. $article = $(CONST.GLOBAL.ARTICLEQUERY),
  37. content = {
  38. 'publikation': undefined,
  39. 'asp':undefined,
  40. 'ap':undefined,
  41. 'erschwernis':undefined,
  42. 'vorraussetzungen':undefined,
  43. 'kampftechniken':undefined,
  44. 'info': ""
  45. };
  46.  
  47. if (result !== undefined) {
  48. $itemQuery = $(result).find(CONST.GLOBAL.ITEMQUERY);
  49. $article = $(result).find(CONST.GLOBAL.ARTICLEQUERY);
  50. }
  51.  
  52. //console.log('EntryPoint',entryPoint,' ',$article.length);
  53. if ($article.length === 1) {
  54. $article.find('p').each(function(i, e){
  55. var element = $(e).prop('outerText'),
  56. publikationIndex = element.indexOf(CONST.CONTENT.PUBLIKATION),
  57. aspIndex = element.indexOf(CONST.CONTENT.ASP),
  58. apIndex = element.indexOf(CONST.CONTENT.AP),
  59. erschwernisIndex = element.indexOf(CONST.CONTENT.ERSCHWERNIS),
  60. voraussetzungIndex = element.indexOf(CONST.CONTENT.VORAUSSETZUNGEN),
  61. kampftechIndex = element.indexOf(CONST.CONTENT.KAMPFTECHNIKEN),
  62. text = "";
  63.  
  64. if (publikationIndex >= 0) {
  65. text = element.substring(publikationIndex + CONST.CONTENT.PUBLIKATION.length +1, element.length);
  66. content.publikation = text;
  67. } else if (aspIndex >= 0) {
  68. content.asp = element.substring(aspIndex + CONST.CONTENT.ASP.length +1, element.length);
  69. } else if (apIndex >= 0) {
  70. content.ap = element.substring(apIndex + CONST.CONTENT.AP.length +1, element.length);
  71. } else if (erschwernisIndex >= 0) {
  72. content.ap = element.substring(erschwernisIndex + CONST.CONTENT.ERSCHWERNIS.length +1, element.length);
  73. } else if (voraussetzungIndex >= 0) {
  74. content.ap = element.substring(voraussetzungIndex + CONST.CONTENT.VORAUSSETZUNGEN.length +1, element.length);
  75. } else if (kampftechIndex >= 0) {
  76. content.ap = element.substring(kampftechIndex + CONST.CONTENT.KAMPFTECHNIKEN.length +1, element.length);
  77. } else {
  78. text = element.substring(0, element.length);
  79. content.info = content.info + text;
  80. }
  81. });
  82. _.setWith(storage, entryPoint+'.content', content);
  83. }
  84.  
  85. $itemQuery.each(function(i, e) {
  86. var title = $(this).attr('title'),
  87. href= $(this).attr('href'),
  88. objectPath= entryPoint === undefined?'root.'+title:entryPoint+'.children.'+title;
  89.  
  90. _.setWith(storage, objectPath, {'title':title, 'href':href});
  91. if (stop === false) {
  92. app.writeToLocalStorage();
  93. app.crawlSubNavigation(_.get(storage, objectPath + '.href'), objectPath);
  94. }
  95. });
  96. //stop = true;
  97. },
  98. crawlSubNavigation: function(url, parent) {
  99. app.loadPage(url)
  100. .then(app.parsePage.bind(null, parent))
  101. .fail(function (err) {
  102. throw new Error('Error while Loading Time');
  103. });
  104. },
  105. updateContent: function($text){
  106. return {'text':$text};
  107. },
  108. parsePage: function(entryPoint, result) {
  109. app.getAllNavigationEntrys(result, entryPoint);
  110. },
  111. loadPage: function(url) {
  112. return $.ajax( {
  113. url: url,
  114. type: "POST",
  115. dataType: "html"
  116. });
  117. },
  118. writeToLocalStorage() {
  119. // Put the object into storage
  120. var checksum = Object.keys(storage).length;
  121. // ToDo: add Checksum
  122. localStorage.setItem('wikiData', JSON.stringify(storage));
  123. localStorage.setItem('wikiData.checksum','123');
  124. },
  125. init: function() {
  126. // Retrieve the object from storage
  127. var retrievedObject = localStorage.getItem('wikiData'),
  128. checksum = localStorage.getItem('wikiData.checksum');
  129.  
  130. app.parsePage();
  131. if (retrievedObject === null) {
  132. //app.parsePage();
  133. } else {
  134. //storage = JSON.parse(retrievedObject);
  135. }
  136. console.log('Storage:',storage);
  137. }
  138. };
  139. app.init();
  140. })();