NaziGramatical

Correcção ortográfica e gramatical automática em português europeu.

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         NaziGramatical
// @namespace    http://nazigramatical.x10.bz/
// @version      0.6
// @description  Correcção ortográfica e gramatical automática em português europeu.
// @author       NaziGramatical
// @match        https://www.reddit.com/*
// @grant        GM_setValue
// @grant        GM_getValue
// @grant        GM_xmlhttpRequest
// @grant        GM_getResourceText
// @connect      nazigramatical.x10.bz
// @resource     pt.dic http://nazigramatical.x10.bz/pt.dic
// ==/UserScript==

(function() {
    'use strict';

    var parsedRules = [];
    var lastReplace = {count: 0};
    var dicWordsCache = {};
    var dic = false;
    var lastCheck = 0;
    var undos = 0;
    var disabled = false;

    var log = function () { /*console.log.apply(this, arguments);*/ };

    var loadRules = function () {
        var savedRules = GM_getValue('rules', []);
        for (var i = 0; i < savedRules.length; i++) {
            var regs = savedRules[i][0].match(/\/(.+)\/([^/]+)/);
            parsedRules.push([new RegExp(regs[1], regs[2]), savedRules[i][1]]);
        }
    };

    var dicWordExistsCS = function (word) {
        var time = Date.now();
        var result;
        if (dicWordsCache[word] !== undefined) result = dicWordsCache[word];
        else {
            result = (dic.indexOf("\n" + word + "\n") != -1);
        }
        log('Dic search: ' + word + ' Time: ' + (Date.now() - time) + ' ms. Result: ' + result);
        return result;
    };
    var dicWordExists = function (word) {
        if (dicWordExistsCS(word)) return true;
        var wordLC = word.toLowerCase();
        return word != wordLC && dicWordExistsCS(wordLC);
    };

    var dicCheckSentence = function (str) {
        var words = str.match(/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]{3,}/g);
        if (words) {
            for (var i = 0; i < words.length; i++) {
                if (!dicWordExists(words[i])) return false;
            }
        }
        return true;
    };

    var saveRules = function () {
        var savedRules = [];
        for (var i = 0; i < parsedRules.length; i++) {
            if (typeof parsedRules[i][1] != 'string') continue;
            savedRules.push([parsedRules[i][0].toString(), parsedRules[i][1]]);
        }
        GM_setValue('rules', savedRules);
    };

    var replaceMultiple = function (str, a, b) {
        for (var i = 0; i < a.length; i++) str = str.replace(a[i], b[i]);
        return str;
    };

    var regexGroups = function (regex) {
        return regex.replace(/\\pL/g, '[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]')
            .replace(/\\pBL/g, '(?<=^|[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ])')
            .replace(/\\pBR/g, '(?=$|[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ])')
            .replace(/\\pV/g, '[aeiouáâãéêíóôõúÁÂÃÉÊÍÓÔÕÚ]')
            .replace(/\\pVE/g, '(?:e|é|ê)')
            .replace(/\\pC/g, '[b-df-hj-np-tv-zçÇ]')
            .replace(/\\pNUM/g, '(?:[0-9]+(?:[,.][0-9]+)?|uma?|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|catorze|quinze|dezasseis|dezassete|dezoito|dezanove|vinte|trinta|quarenta|cinquenta|sessenta|setenta|oitenta|noventa|cem|duzentos|trezentos|quatrocentos|quinhentos|seiscentos|setecentos|oitocentos|novezentos|mil)');
    };

    var parseRules = function (rules) {
        var parsedRules = [];
        for (var i = 0; i < rules.length; i++) {
            var rule = regexGroups(rules[i][0].toString());
            var replace = (typeof rules[i][1] == 'string') ?
                '$1' + rules[i][1].replace(/\$(\d+)/g, function (match, p1) { return '$' + (Number(p1) + 1); }) :
            (function (cb) {
                return (function () {
                    var args = [];
                    for (var i = 0; i < arguments.length; i++) args.push(arguments[i] ? arguments[i] : '');
                    var r = cb([args[1] ? args[0].substr(1) : args[0]].concat(args.slice(2, -2)));
                    return r ? args[1] + r : '';
                });
            })(rules[i][1]);
            var flags = '';
            var regs = rule.match(/^\/(.+)\/(.*?)$/);
            if (regs) {
                rule = regs[1];
                flags = regs[2];
            }
            var regs = rule.match(/^\(\?<([!=])(.+?)\)(.+)/);
            if (regs) {
                //log('Lookbehind rule:', rule);
                if (regs[1] == '=') rule = '(^|[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]|'+regs[2]+')' + regs[3] + '(?=[^]$)';
                if (regs[1] == '!') {
                    if (/[^a-zA-Z0-9 ]/.test(regs[2])) {
                        log('Ignored imparsable rule with lookbehind:', rule);
                        continue;
                    }
                    rule = '(?!'+ regs[2] +')(.{'+regs[2].length+'}|^.{0,'+(regs[2].length-1)+'})' + regs[3] + '(?=[^]$)';
                }
                //log('Lookbehind rule parsed:', rule);
            }
            else if (/\(\?</.test(rule)) {
                //log('Ignored rule with lookbehind:', rule);
                continue;
            }
            else rule = '(^|[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ])' + rule + '(?=[^]$)';
            if (!flags) flags = 'i';
            parsedRules.push([new RegExp(rule, flags), replace]);
        }
        //log("ParsedRules: ", parsedRules);
        return parsedRules;
    };

    var isEnglish = function (text) {
        var regs = text.match(/(?:^| )(the|is|was|are|be|were|been|did|to|of|and|in|that|have|had|i|it|not|on|with|he|you|at|this|but|his|by|from|they|we|say|her|she|or|an|will|my|one|all|would|there|their|what|up|out|if|about|who|get|which|go|when|make|can|like|time|just|him|know|take|people|into|year|your|good|some|could|them|see|other|than|then|now|look|only|its|over|think|also|back|after|two|how|our|work|first|well|way|even|new|want|because|any|these|give|day|most|thanks?)(?=$|[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ])/ig);
        var count = regs ? regs.length : 0;
        if (count) {
            var words = text.match(/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ\'-]+/g);
            log("English words: "+count+" Total words: "+words.length+" Ratio: "+(count/words.length));
            return count / words.length > 0.1;
        }
        log("No English words found.");
        return false;
    };

    var isPortuguese = function (text) {
        var words = text.match(/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]{2,}/g);
        var errors = 0;
        if (words) {
            words = words.slice(-10);
            for (var i = 0; i < words.length; i++) {
                if (!dicWordExists(words[i])) errors++;
            }
            log("Portuguese words: "+(words.length - errors)+" Total words: "+words.length+" Non Portuguese Ratio: "+(errors / words.length));
            return errors / words.length <= 0.25;
        }

        return false;
    };

    var checkLanguage = function (text) {
        text = text.replace(/https?:\/\/[^ )]+/g, '');
        text = text.replace(/[ru]\/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ0-9_-]+/g, '');
        text = text.replace(/«.+?»/g, '');
        text = text.replace(/".+?"/g, '');
        text = text.replace(/\*\*(.+?)\*\*/g, '$1'); // remove bold
        text = text.replace(/\*.+?\*/g, '');
        text = text.replace(/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ-]+[^]$/, ''); // remove last word (including compound words)
        // get last 10 words longer than 2 chars
        var match = text.match(/([a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]+[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]+([a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]{1,1}[^a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]+)?){0,10}$/);
        if (!match) log('Error: no match on checkLanguage:', text);
        else text = match[0];
        log('checkLanguage', text);
        return isPortuguese(text);
        //return !isEnglish(text) && isPortuguese(text);
    };

    var scanLine = function (line) {
        log(`scanline: "${line}"`);
        var result = '';
        for (var i = 0; i < parsedRules.length; i++) {
            result = checkWord(line, parsedRules[i][0], parsedRules[i][1]);
            if (result) break;
        }
        log(`result: "${result}"`);
        //hyphenated word
        var match = (result || line).match(/^(.+-)([a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ]+[^])$/);
        if (match) {
            var result2 = scanLine(match[1]);
            if (result2) result = result2 + match[2];
        }
        return result;
    };

    var fixCase = function (str1, str2) {
        //log(`fixCase: "${str1}" "${str2}"`);
        var isUpperCase = function (s) { return (s == s.toUpperCase()); };
        var isLowerCase = function (s) { return (s == s.toLowerCase()); };
        var toTitleCase = function (s) { return s.charAt(0).toUpperCase() + s.substr(1); };
        var isTitleCase = function (s) { return (s == toTitleCase(s)); };

        if (isLowerCase(str1)) return str2;
        if (isUpperCase(str1)) return str2.toUpperCase();
        if (isTitleCase(str1)) return toTitleCase(str2);
        return str2;
    };

    var checkWord = function (line, regex, replace) {
        //log("Line: '"+line+"' Regex: "+regex+" Replace: "+replace);
        if (regex.test(line)) {
            var match = line.match(regex);
            var index = match.index ? match.index + 1 : 0;
            var replaced = line.replace(regex, replace).slice(index, -1);
            //log(`replaced: '${replaced}'`);
            if (replaced) {
                log("Line: '"+line+"'");
                log("Regex: "+regex+" Replace: "+replace+" Match:", match);
                replaced = fixCase(line.slice(index, -1), replaced);
                //log(`replaced: '${replaced}'`);
                //log('check correction:', dicCheckSentence(replaced));
                return line.slice(0, index) + replaced + line.slice(-1);
            }
        }
    };

    var removeDiacritics = function (str) {
        (function(a, b) { for (var i = 0; i < a.length; i++) str = str.replace(new RegExp(a[i], 'g'), b[i]); })(
            ['á', 'â', 'é', 'ê', 'í', 'ó', 'ô', 'ú', 'Á', 'Â', 'É', 'Ê', 'Í', 'Ó', 'Ô', 'Ú'],
            ['a', 'a', 'e', 'e', 'i', 'o', 'o', 'u', 'A', 'A', 'E', 'E', 'I', 'O', 'O', 'U']);
        return str;
    };

    var tryCorrection = function (w1, w2) {
        log(`tryCorrection "${w1}" "${w2}"`);
        if (!dicWordExists(w1) && dicWordExists(w2)) return w2;
    };

    document.addEventListener('input', function (e) {
        if (disabled) return;
        if (!dic) {
            dic = {};
            setTimeout(function () {
                var time = Date.now();
                dic = "\n"+GM_getResourceText('pt.dic')+"\n";
                log('Loaded dic in ' + (Date.now() - time) + ' ms.');
            }, 100);
            return;
        }
        lastReplace.count++;
        var input = e.target;
        var text = input.value;
        if (!text) return; // no text
        var sep = text.substr(input.selectionEnd - 1, 1);
        if (/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ-]/.test(sep)) return;
        var left = text.substr(0, input.selectionEnd);
        var leftLines = left.match(/.+(\n+|$)/g);
        var line = leftLines.pop();
        var right = text.substring(input.selectionEnd, text.length);
        if (/^ *>/.test(line)) return; // quotes
        if (/https?:\/\/[^ )]+[^]$/.test(line)) return; // urls
        if (/[ru]\/[a-zA-ZáâãéêíóôõúçÁÂÃÉÊÍÓÔÕÚÇ0-9_-]+[^]$/.test(line)) return; // users/subs
        if (/«[^»]+[^]$/.test(line)) return; // quoted
        if (/^[^"]*"[^"]*([^"]*"[^"]*"[^"]*)*[^]$/.test(line)) return; // quoted
        if (/^[^*]*\*[^*]*([^*]*\*[^*]*\*[^*]*)*[^]$/.test(line.replace(/\*\*.+?\*\*/g, ''))) return; // italic
        if (!checkLanguage(left)) return;
        lastCheck = Date.now(); // prevent cleanup
        log(`Checking: "${line}"`);
        var result = scanLine(line);
        if (!result) return;
        lastReplace = {input: input, value: input.value, selectionEnd: input.selectionEnd, count: 0};
        log('Replacing "'+line+'" with "'+result+'"');
        line = result;
        leftLines.push(line);
        left = leftLines.join('');
        input.value = left + right;
        input.selectionEnd = left.length;
    });

    document.addEventListener('keydown', function(e) {
        if (e.ctrlKey && e.keyCode == 90) {
            if  (!(lastReplace.count--)) {
                log("NaziGramatical: Undo.");
                lastReplace.input.value = lastReplace.value;
                lastReplace.input.selectionEnd = lastReplace.selectionEnd;
                if (++undos == 2 && confirm('The spellchecker will be disabled in this page.')) disabled = true;
                e.preventDefault();
            }
            lastReplace.count--;
        }
    });

    // update rules
    if (Date.now() - GM_getValue('rulesTime', 0) > GM_getValue('updateInterval', 0) * 1000) {
        log("NaziGramatical: Updating rules.");
        GM_xmlhttpRequest({
            method: 'GET',
            url: 'http://nazigramatical.x10.bz/rules.php',
            onload: function (response) {
                var data = JSON.parse(response.responseText);
                parsedRules = parsedRules.concat(parseRules(data.rules));
                GM_setValue('updateInterval', Math.min(604800, data.updateInterval));
                GM_setValue('dic.index', data.dic);
                saveRules();
                log("NaziGramatical: Rules updated. Next update: "+data.updateInterval+" secs.");
            }
        });
        GM_setValue('rulesTime', Date.now());
    }
    // load stored rules
    else loadRules();

    // static rules
    (function () {
        var rules = [
            [/(\pL*[áâéêíóôú]\pL*)(mente|zinh[ao]s?)/, function (r) { return removeDiacritics(r[1]) + r[2]; }], //sózinho, sómente
            [/(in)?d([ei])s(\pL{3,})/, function (r) { return tryCorrection(r[0], r[1] + (r[2] == 'e' ? 'dis' : 'des') + r[3]); }], //destraído, distoar
            [/(\pL+i)ss(es?)/, function (r) { return tryCorrection(r[0], r[1]+'c'+r[2]); }], //chatisse
            [/(\pL*[aeiou])([íú])([zlr])/, function (r) { return tryCorrection(r[0], r[1]+removeDiacritics(r[2])+r[3]); }], //saír
            [/(\pL*)([áâéêíóôú])([zlr])/, function (r) { return tryCorrection(r[0], r[1]+removeDiacritics(r[2])+r[3]); }], //metêr, cristál
            [/(\pL*[^aeiou])([áéíóúâêô])(\pC+[aeo](?:s|m|ns)?)/, function (r) { return tryCorrection(r[0], r[1]+removeDiacritics(r[2])+r[3]); }], //fála, páras
            [/(\pL*[^aeiou])([íú])(s|m|ns)?/, function (r) { return tryCorrection(r[0], r[1]+removeDiacritics(r[2])+r[3]); }], //perú, patíns
            //[/([aeiou])([iu]?\pC+[aeiou][oea]s?)/, function (r) { return tryCorrection(r[0], r[1]+removeDiacritics(r[2])+r[3]); }],
            [/(\pL+)a([eo]s?)/, function (r) { return tryCorrection(r[0], r[1]+'ã'+r[2]); }], //mao, mae, maos, maes
            [/(\pL+)o(es?)/, function (r) { return tryCorrection(r[0], r[1]+'õ'+r[2]); }], //poe, poes
            [/(\pL+)c[aã]([eo]s?)/, function (r) { return tryCorrection(r[0], r[1]+'çã'+r[2]); }], //bêncao, bêncão, bêncaos, bêncãos, opcoes, opcões
            [/(\pL+)c[oõ](es?)/, function (r) { return tryCorrection(r[0], r[1]+'çõ'+r[2]); }], //licoes, licões,
            [/(\pL*\pV)[cp]([tcç]\pV\pL*)/, function (r) { return tryCorrection(r[0], (r[1] + r[2]).replace(/m(?=[cçt])/i, 'n')); }], // inflacção
            // enclise
            [/(((\pL+)([aeiouô]))([srz]))-([oa]s?)(?!-)/, function (r) {
                if (dicWordExists(r[1])) {
                    var replace = '';
                    if (r[1] == 'quer') replace = 'quere-' + r[6];
                    else if (r[5] == 's') {
                        replace = r[2] + '-l' + r[6];
                    }
                    else {
                        replace = r[3] + r[4].replace('a', 'á').replace('e', 'ê').replace('o', 'ô') + '-l' + r[6];
                    }
                    return replace;
                }
            }],
            // hyphen
            [/(\pL{2,}?)(s?)-(\pL+)/, function (r) {
                if (!dicWordExists(r[1] + r[2]) || !dicWordExists(r[3])) {
                    var left = r[1].slice(0, -1) + removeDiacritics(r[1].slice(-1)) + r[2];
                    var word = left + (/[aeiou]$/i.test(left) && /^[sr][aeiouáâãéêíóôõú]/i.test(r[3]) ? r[3].replace(/^([sr])/, '$1$1') : r[3]);
                    if (dicWordExists(word)) return word;
                }
                else if (
                    (/^(contra|extra|auto|neo|proto|pseudo)$/i.test(r[1]+r[2]) && !/^[aeiouáâãéêíóôõúhrs]/i.test(r[3])) ||
                    (/^(anti|arqui|semi)$/i.test(r[1]+r[2]) && !/^[iíhrs]/i.test(r[3])) ||
                    (/^(ante|entre|sobre)$/i.test(r[1]+r[2]) && !/^h/i.test(r[3])) ||
                    (/^(hiper|inter|super)$/i.test(r[1]+r[2]) && !/^[hr]/i.test(r[3])) ||
                    (/^(com|mal)$/i.test(r[1]+r[2]) && !/^[aeiouáâãéêíóôõúh]/i.test(r[3]))
                ) {
                    var word = r[1]+r[2]+r[3];
                    if (dicWordExists(word)) return word;
                }
            }],
            // brute force
            [/(\pL+)/, function (r) {
                if (!dicWordExists(r[0])) {
                    var time = Date.now();
                    var rule = replaceMultiple(r[0], [/a/gi, /e/gi, /i/gi, /o/gi, /u/gi, /c/gi], ['[aáâã]', '[eéê]', '[ií]', '[oóôõ]', '[uú]', '[cç]']);
                    var regex = new RegExp('\\n' + rule + '\\n', 'gi');
                    var match = dic.match(regex);
                    log('Possible matches:', match, 'Time took:', Date.now() - time);
                    if (match && match.length == 1) return match[0].replace(/\n/g, '');
                }
            }]
        ];
        parsedRules = parsedRules.concat(parseRules(rules));
    })();

    // clean dictionary when it's not needed anymore
    setInterval(function () {
        if (Date.now() - lastCheck > 25000) {
            dic = false;
        }
    }, 10000);
})();