DSA Wiki Crawler

Crawling the Wiki and refactor it the good way

目前为 2017-02-20 提交的版本。查看 最新版本

// ==UserScript==
// @name         DSA Wiki Crawler
// @namespace    http://tampermonkey.net/
// @version      0.1.3
// @description  Crawling the Wiki and refactor it the good way
// @author       David Mahl
// @match        http://www.ulisses-regelwiki.de/index.php/sonderfertigkeiten.html
// @grant        none
// @require      http://code.jquery.com/jquery-latest.js
// @require      https://cdn.jsdelivr.net/lodash/4.14.2/lodash.min.js
// ==/UserScript==

(function() {
    'use strict';
    var stop= false,
        storage = {},
        CONST = {
            'CONTENT' : {
                'PUBLIKATION':'Publikation:',
                'ASP':'AsP-Kosten:',
                'AP':'AP-Wert:',
                'ERSCHWERNIS':'Erschwernis:',
                'VORAUSSETZUNGEN':'Voraussetzungen:',
                'KAMPFTECHNIKEN':'Kampftechniken:'
            },
            'GLOBAL' : {
                'CUTPARAGRAPH': 4,
                'CUTSTRONG': 10,
                'ARTICLEQUERY':'.mod_article .ce_text',
                'ITEMQUERY':'nav table a[role="menuitem"]'
            }
        },
        app = {
            getAllNavigationEntrys: function(result, entryPoint) {
                var $itemQuery = $(CONST.GLOBAL.ITEMQUERY),
                    $article = $(CONST.GLOBAL.ARTICLEQUERY),
                    content = {
                        'publikation': undefined,
                        'asp':undefined,
                        'ap':undefined,
                        'erschwernis':undefined,
                        'vorraussetzungen':undefined,
                        'kampftechniken':undefined,
                        'info': ""
                    };

                if (result !== undefined) {
                    $itemQuery = $(result).find(CONST.GLOBAL.ITEMQUERY);
                    $article = $(result).find(CONST.GLOBAL.ARTICLEQUERY);
                }

                if ($article.length === 1) {
                    $article.find('p').each(function(i, e){
                        var element = $(e).prop('outerText'),
                            publikationIndex = element.indexOf(CONST.CONTENT.PUBLIKATION),
                            aspIndex = element.indexOf(CONST.CONTENT.ASP),
                            apIndex = element.indexOf(CONST.CONTENT.AP),
                            erschwernisIndex = element.indexOf(CONST.CONTENT.ERSCHWERNIS),
                            voraussetzungIndex = element.indexOf(CONST.CONTENT.VORAUSSETZUNGEN),
                            kampftechIndex = element.indexOf(CONST.CONTENT.KAMPFTECHNIKEN),
                            text = "";

                        if (publikationIndex >= 0) {
                            text = element.substring(publikationIndex + CONST.CONTENT.PUBLIKATION.length +1, element.length);
                            content.publikation = text;
                        } else if (aspIndex >= 0) {
                            content.asp = element.substring(aspIndex + CONST.CONTENT.ASP.length +1, element.length);
                        } else if (apIndex >= 0) {
                            content.ap = element.substring(apIndex + CONST.CONTENT.AP.length +1, element.length);
                        } else if (erschwernisIndex >= 0) {
                            content.ap = element.substring(erschwernisIndex + CONST.CONTENT.ERSCHWERNIS.length +1, element.length);
                        } else if (voraussetzungIndex >= 0) {
                            content.ap = element.substring(voraussetzungIndex + CONST.CONTENT.VORAUSSETZUNGEN.length +1, element.length);
                        } else if (kampftechIndex >= 0) {
                            content.ap = element.substring(kampftechIndex + CONST.CONTENT.KAMPFTECHNIKEN.length +1, element.length);
                        } else {
                            text = element.substring(0, element.length);
                            content.info = content.info + text;
                        }
                    });
                    _.setWith(storage, entryPoint+'.content', content);
                }

                $itemQuery.each(function(i, e) {
                    var title = $(this).attr('title'),
                        href= $(this).attr('href'),
                        objectPath= entryPoint === undefined?'root.'+title:entryPoint+'.children.'+title;

                    _.setWith(storage, objectPath, {'title':title, 'href':href});
                    app.crawlSubNavigation(_.get(storage, objectPath + '.href'), objectPath);
                });
                app.writeToLocalStorage();
            },
            crawlSubNavigation: function(url, parent) {
                app.loadPage(url)
                    .then(app.parsePage.bind(null, parent))
                    .fail(function (err) {
                    throw new Error('Error while Loading Time');
                });
            },
            updateContent: function($text){
                return {'text':$text};
            },
            parsePage: function(entryPoint, result) {
                app.getAllNavigationEntrys(result, entryPoint);
            },
            loadPage: function(url) {
                return $.ajax( {
                    url: url,
                    type: "POST",
                    dataType: "html"
                });
            },
            writeToLocalStorage() {
                var checksum = Object.keys(storage).length;
                localStorage.setItem('wikiData', JSON.stringify(storage));
                localStorage.setItem('wikiData.checksum',JSON.stringify(storage).length);
            },
            init: function() {
                // Retrieve the object from storage
                var retrievedObject = localStorage.getItem('wikiData'),
                    checksum = localStorage.getItem('wikiData.checksum');

                app.parsePage();
                if (retrievedObject === null) {
                    app.parsePage();
                } else {
                    storage = JSON.parse(retrievedObject);
                }
                console.log('Storage:',storage);
            }
        };
    app.init();
})();