您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Scrapes the website in csv format
当前为
- // ==UserScript==
- // @name cgcom-interno.vuds-omc.es website scraper
- // @namespace http://tampermonkey.net/
- // @version 0.13
- // @description Scrapes the website in csv format
- // @author You
- // @match https://cgcom-interno.cgcom.es/RegistroMedicos/PUBBusquedaPublica_busqueda.action
- // ==/UserScript==
- function parseTab() {
- return new Promise((resolve) => {
- var checkExist = setInterval(() => {
- if($("[id^=tabBusqueda_] > table > tbody > tr > td:last-child").length) {
- clearInterval(checkExist);
- var row = "";
- $("[id^=tabBusqueda_] > table > tbody > tr > td:last-child").each(function() {
- // This accounts for multiple items in a field (i.e. Specialties)
- row += '"' + this.innerHTML.trim().replace(/(<br>)+/g, ";").replace(/;$/, "").replace(/<!--(.)+-->/g, '').replace(/"/g, "").trim() + '",';
- });
- // Sometimes there's an extra field in the table, so add it as blank if it doesn't exist
- if($("[id^=tabBusqueda_] > table > tbody > tr > td:last-child").length === 6) {
- row += '""';
- }
- else {
- row = row.substring(0, row.length - 1);
- }
- // Close the tab
- $("[id^=botonCerrar_tabBusqueda_]").click();
- resolve(row);
- }
- }, 100); // check every 100ms
- });
- }
- function waitLoading() {
- return new Promise((resolve) => {
- var checkExist = setInterval(() => {
- if(!$(".blockUI").length) {
- clearInterval(checkExist);
- resolve();
- }
- }, 100); // check every 100ms
- });
- }
- function downloadResultSet(resultSet, provIndex) {
- return new Promise((resolve) => {
- var results = ['"Colegiado","Nombre","Provincia","Especialidad","Estado","DireccionTrabajo","ValidaciónPeriódicaColegiación"'];
- results = results.concat(resultSet);
- let csvContent = "";
- results.forEach(function(line){
- var lines = line.split(',');
- lines[6] = lines[6].replace(/[\n]/g, ";");
- line = lines.join(',');
- var cleaned = line.replace(/[\n\t]/g, " ").replace(/[ ]+/g, " ");
- csvContent += cleaned + "\r\n";
- });
- // Need to create a Blob because of size of content
- var blob = new Blob([csvContent], {encoding:"UTF-8", type:"text/plain;charset=UTF-8"});
- var url = window.URL.createObjectURL(blob);
- var a = document.createElement("a");
- a.href = url;
- a.download = `cgcom_raw_${provIndex}.csv`;
- a.click();
- window.URL.revokeObjectURL(url);
- results = [];
- resolve();
- });
- }
- // Ripped from original website to remove the setTimeout capatcha refresh
- var myBuscar = function myBuscar() {
- $("#numeroColegiadoDetalle").val(null);
- $("#pagina").val(1);
- var idForm = "formBuscarColegiados";
- var action = "PUBBusquedaPublica_busqueda_ajax.action";
- var idCapaContenido = "resultadoBusquedaColegiados";
- enviarFormAjax(idForm, action, idCapaContenido, true);
- //setTimeout(function(){$("#recalcularImg").attr("src",$("#contextUrl").val()+"/Captcha.png?"+Date.now());},500);
- //console.log($("#contextUrl").val()+"/Captcha.png?"+Date.now());
- };
- function initializeForm() {
- $("#busquedaPublicaColegiadoBuscar").removeAttr('onclick').bind( "click", myBuscar);
- $("#contenidoPublico")
- .before("<span class='wrapAll' id='idProvidences' style='margin-left: 5px;'></span>")
- .before("<span class='wrapAll' id='idPages' style='margin-left: 20px;'></span>")
- .before("<span class='wrapAll' id='idResults' style='margin-left: 20px;'></span><br class='wrapAll'>")
- .before("<span class='wrapAll'>Province Scrape Range.... Start</span><input id='txtStartProvIndex' class='wrapAll' style='margin-right: 10px;' type='text'><span class='wrapAll'>End</span><input id='txtEndProvIndex' class='wrapAll' type='text'><br class='wrapAll'>")
- .before("<span class='wrapAll'>Start at Page:</span><input id='txtStartPage' class='wrapAll' style='margin-right: 10px;' type='text'>")
- .before("<span class='wrapAll'>End at Page:</span><input id='txtEndPage' class='wrapAll' style='margin-right: 10px;' type='text'>");
- $( ".wrapAll" ).wrapAll( "<div style='position: absolute; right: 100px; top: 100px;' />");
- $("#comboProvincia").children().eq(1).attr('selected', true);
- $("#txtStartProvIndex").change(() => {
- $("#comboProvincia").children().eq($("#txtStartProvIndex").val()).attr('selected', true);
- });
- }
- (() => {
- 'use strict';
- initializeForm();
- $("#txtStartProvIndex").val(1);
- $("#txtEndProvIndex").val($("#comboProvincia").children().length);
- $("#txtStartPage").val("1");
- // Set default values
- $("#Nombre").val("%");
- $("#Apellido1").val("%");
- $("#codigoCaptcha").focus();
- var firstRun = true;
- $("#busquedaPublicaColegiadoBuscar").click(async () => {
- await waitLoading();
- var provIndex = parseInt($("#txtStartProvIndex").val() || 1);
- var provStopIndex = parseInt($("#txtEndProvIndex").val() || $("#comboProvincia").children().length);
- // This code is only ran once
- if(firstRun) {
- // Set the page
- document.getElementById('formBuscarColegiados').pagina.value = parseInt($("#txtStartPage").val());
- buscarPagina();
- await waitLoading();
- firstRun = false;
- }
- if(provIndex <= provStopIndex) {
- var pagesLeft = true;
- var results = [];
- do {
- var providencesLeft = provStopIndex - provIndex;
- $("#idProvidences").text("Provinces left: " + providencesLeft);
- $("#idPages").text("Pages: " + $(".this-page").text() + " / " + $("a.enlacePaginacion:nth-last-child(2)").text());
- var images = $("table.resultados > tbody > tr > td:last-child > img");
- for(var i = 0; i < images.length; i++) {
- if($(images[i]).parent().siblings(':first').text() === '') {
- // Skip blank ids which cause errors
- continue;
- }
- $(images[i]).click();
- var row = await parseTab();
- results.push(row);
- var resultString = "Records Collected: " + results.length;
- $("#idResults").text(resultString);
- // console.log(row);
- };
- images = null;
- if($("a.enlacePaginacion:last").text() === "Siguiente" && ( $("#txtEndPage").val() === "" || $(".this-page").text() != $("#txtEndPage").val() ) ) {
- // Go to the next page
- pagesLeft = true;
- $("a.enlacePaginacion:last")[0].click();
- await waitLoading();
- }
- else {
- pagesLeft = false;
- }
- } while (pagesLeft);
- // Download the results
- await downloadResultSet(results, provIndex);
- // Current result set has finished
- provIndex += 1;
- results = [];
- if(provIndex <= provStopIndex) {
- $("#txtStartProvIndex").val(provIndex);
- $("#comboProvincia").children().eq(provIndex).attr('selected', true);
- $("#busquedaPublicaColegiadoBuscar").click();
- await waitLoading();
- }
- }
- });
- })();