您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Инструмент для быстрого добавления и сохранения данных из НКРЯ в JSON формат
// ==UserScript== // @name НКРЯ Датасеты // @namespace http://tampermonkey.net/ // @version 0.2 // @description Инструмент для быстрого добавления и сохранения данных из НКРЯ в JSON формат // @author Fyodor Sizov // @match https://processing.ruscorpora.ru/* // @grant none // @run-at document-end // ==/UserScript== (function() { 'use strict'; class Item { constructor(title, text, itemURL, itemPageIndex, indexInPage) { this.title = title; this.text = text; this.itemURL = itemURL; this.itemPageIndex = itemPageIndex; this.indexInPage = indexInPage; } } class Dataset { constructor () { this.nameDefault = "nameDefault"; this.contentDefault = { items: [], processedNumber: 0 }; if (this.nameInMemory() === null) { this.name = this.nameDefault; this.content = this.contentDefault; } } get name() { console.log("get method called"); return localStorage.getItem("nk:datasetName"); } set name (value) { console.log("set method called: " + value); localStorage.setItem("nk:datasetName", value); } get content() { var j = localStorage.getItem("nk:datasetContent"); return JSON.parse(j); } set content(jsonValue) { localStorage.setItem("nk:datasetContent", JSON.stringify(jsonValue)); } nameInMemory () { return localStorage.getItem("nk:datasetName"); } loadToMemory () { } } class DatasetWizard { constructor (dataset) { this.url = window.location.href; this.dataset = dataset; if (this.canInsert()) { this.makeHolder(); this.putIndices(); for (var button of this.getButtons()) { button.style.marginLeft = "10px"; button.style.marginRight = "10px"; wizardControls.appendChild(button); } this.addSelectButtons(); } } canInsert () { return document.querySelector('[href*="stat.xml"]') !== null; } makeHolder () { var statLink = document.querySelector('[href*="stat.xml"]'); statLink.outerHTML = '<div id="wizardControls">' + statLink.outerHTML + '</div>'; wizardControls.style.display = "inline"; } putIndices () { var _ol = document.querySelector("ol"); var _i = Number(_ol.getAttribute("start")); for (var olChild of _ol.children) { olChild.setAttribute("index-in-page", _i.toString()); _i ++; } } addSelectButtons () { var ex = document.querySelectorAll(".b-doc-expl"); for (var exampleElement of ex) { var matches = exampleElement.parentNode.querySelectorAll("li"); for (var m of matches) { this.addButtonToMatch(m); } } } addButtonToMatch (matchLi) { var button = document.createElement("button"); button.setAttribute("onclick", "window.wizard.markAsSelected(this)"); button.style.marginRight = "10px"; button.innerHTML = "\u2713"; button.className = "matcher-button"; matchLi.innerHTML = button.outerHTML + matchLi.innerHTML; } getButtons () { var buttons = []; buttons.push(document.createElement("button")); buttons[0].innerHTML = "Переименовать датасет"; buttons.push(document.createElement("button")); buttons[1].innerHTML = "Очистить датасет"; buttons.push(document.createElement("button")); buttons[2].innerHTML = "Скачать датасет"; buttons.push(document.createElement("button")); buttons[3].innerHTML = "Выбрать все"; buttons.push(document.createElement("button")); buttons[4].innerHTML = "След."; return buttons; } renameDataset () { var name = prompt( "Имя датасета:", (localStorage.getItem("nk:datasetName") !== null ? localStorage.getItem("nk:datasetName") : "") ); console.log(name); this.dataset.name = name; } clearDataset () { localStorage.removeItem("nk:datasetName"); localStorage.removeItem("nk:datasetContent"); this.dataset.name = this.dataset.nameDefault; this.dataset.content = this.dataset.contentDefault; } loadDataset () { var wrapObjectString = JSON.stringify({ "nk:datasetName": this.dataset.name, "nk:datasetContent": this.dataset.content }) window.location.href = "data:application/octet-stream," + encodeURIComponent(wrapObjectString); } goNext () { var here = document.querySelectorAll(".matcher-button").length; var c = this.dataset.content; c.processedNumber += here; this.dataset.content = c; document.querySelector(".pager a:last-child").click(); } markAsSelected (matchLiButton) { var matchLi = matchLiButton.parentNode; matchLiButton.style.backgroundColor = "lightgreen"; var example = matchLi; while (example.nodeName != "LI" || example.parentNode.nodeName != "OL") { example = example.parentNode; } console.log(example); var item = new Item( example.querySelector(".b-doc-expl").innerText, this.getTextList(matchLi), window.location.href, document.querySelector(".pager b").innerText, example.getAttribute("index-in-page") ); var _content = this.dataset.content; _content.items.push(item); this.dataset.content = _content; } getTextList(matchLi) { matchLi.innerHTML = matchLi.innerHTML.replace(/(<\/span>)([^<]+)(<span)/g, '$1<span class="contains-text">$2</span>$3') for (var every of matchLi.querySelectorAll(".b-wrd-expl")) { every.className += " contains-text"; } var textList = []; for (var _every of matchLi.querySelectorAll(".contains-text")) { var localObject = {}; localObject.text = _every.innerText; if (_every.className.match(/b-wrd-expl/)) { if (_every.className.match(/g-em/)) { localObject.status = "selected"; } else { localObject.status = "token"; } } else { localObject.status = "special"; } textList.push(localObject); } return textList; } clickAllButtons () { for (var _ev of document.querySelectorAll(".matcher-button")) { _ev.click(); } } } window.dataset = new Dataset() window.wizard = new DatasetWizard(window.dataset); var btn = wizardControls.querySelectorAll("button"); btn[0].onclick = function() { window.wizard.renameDataset() }; btn[1].onclick = function () { window.wizard.clearDataset() }; btn[2].onclick = function () { window.wizard.loadDataset() }; btn[3].onclick = function () { window.wizard.clickAllButtons() }; btn[4].onclick = function () { window.wizard.goNext() }; })();