Mturk Qualification Database and Scraper

Scrape, display, sort and search your Mturk qualifications

当前为 2023-02-10 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Mturk Qualification Database and Scraper
  3. // @namespace https://greasyfork.org/en/users/1004048-elias041
  4. // @version 0.76
  5. // @description Scrape, display, sort and search your Mturk qualifications
  6. // @author Elias041
  7. // @match https://worker.mturk.com/qualifications/assigned
  8. // @match https://worker.mturk.com/qt
  9. // @require https://code.jquery.com/jquery-3.6.3.js
  10. // @require https://code.jquery.com/ui/1.13.1/jquery-ui.min.js
  11. // @require https://unpkg.com/dexie/dist/dexie.js
  12. // @require https://unpkg.com/ag-grid-community@29.0.0/dist/ag-grid-community.min.js
  13. // @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.29.4/moment.min.js
  14. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-grid.css
  15. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-theme-apline.css
  16. // @icon https://www.google.com/s2/favicons?sz=64&domain=mturk.com
  17. // @license none
  18. // @grant none
  19. // ==/UserScript==
  20.  
  21.  
  22. let timeout = 1850;
  23. let counter = " ";
  24. let retry_count = 0;
  25. let page = "https://worker.mturk.com/qualifications/assigned.json?page_size=100";
  26. let timeoutId = undefined;
  27. let scraping = false;
  28. window.onload = function ()
  29. {
  30.  
  31.  
  32.  
  33. let t = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  34. e = t.parentNode,
  35. o = document.createElement("div");
  36. o.style.color = "#fff";
  37. o.style.padding = "10px";
  38. o.style.boxShadow = "2px 2px 4px #888888";
  39. o.style.background = "#33773A";
  40. o.style.opacity = "0.5";
  41. o.style.cursor = "pointer";
  42. o.id = "button";
  43. o.innerHTML = "Scrape&nbspQuals";
  44. e.insertBefore(o, t);
  45.  
  46. let c = document.createElement("div");
  47. c.style.color = "#fff";
  48. c.style.background = "#C78D99";
  49. c.style.padding = "10px";
  50. c.style.boxShadow = "2px 2px 4px #888888";
  51. c.style.background = "#383c44";
  52. c.style.opacity = "0.5";
  53. c.style.cursor = "pointer";
  54. c.innerHTML = "Cancel";
  55. c.id = "cancelButton";
  56. e.insertBefore(c, t);
  57.  
  58.  
  59. let d = document.createElement("div");
  60. d.style.color = "#fff";
  61. d.style.background = "#fc0f03";
  62. d.style.padding = "10px";
  63. d.style.boxShadow = "2px 2px 4px #888888";
  64. d.style.background = "#323552";
  65. d.style.opacity = "0.5";
  66. d.style.cursor = "pointer";
  67. d.innerHTML = "Database";
  68. d.id = "dbButton";
  69. e.insertBefore(d, t);
  70.  
  71. let f = document.createElement("div");
  72. f.style.color = "#fff";
  73. f.style.padding = "10px";
  74. f.style.boxShadow = "2px 2px 4px #888888";
  75. f.style.background = "#33773A";
  76. f.style.opacity = "0.5";
  77. f.id = "progress";
  78. f.innerHTML = "-";
  79. e.insertBefore(f, t);
  80.  
  81. document.getElementById("dbButton").addEventListener("click", function e()
  82. {
  83. window.open("https://worker.mturk.com/qt", "_blank");
  84. });
  85.  
  86.  
  87.  
  88. document.getElementById("cancelButton").addEventListener("click", function e()
  89. {
  90. retry_count=0;
  91. scraping = false;
  92. $("#cancelButton").css('background', '#383c44')
  93. $("#button").css('background', '#33773A')
  94. $("#progress").html("-")
  95. })
  96. document.getElementById("button").addEventListener("click", function e()
  97. {
  98. scraping = true;
  99. $("#button").css('background', '#383c44')
  100. $("#cancelButton").css('background', '#CE3132')
  101.  
  102.  
  103.  
  104. /*init db*/
  105. var db = new Dexie("qualifications");
  106. db.version(1).stores(
  107. {
  108. quals: `
  109. id,
  110. requester,
  111. description,
  112. score,
  113. date,
  114. qualName,
  115. reqURL,
  116. reqQURL,
  117. retURL,
  118. canRetake,
  119. hasTest,
  120. canRequest,
  121. isSystem`
  122. });
  123.  
  124. /*main loop*/
  125. function getAssignedQualifications(nextPageToken = "")
  126. {
  127. if (!scraping)
  128. {
  129. return;
  130. } //cancel trap
  131. counter++
  132. $("#progress").html(counter);
  133. //$("#progressBar").html("&nbsp&nbsp&nbspProcessing&nbsppage&nbsp" + counter + "&nbsp&nbsp&nbsp");
  134. //console.log("scraping")
  135. $.getJSON(page)
  136.  
  137. .then(function (data)
  138. {
  139. data.assigned_qualifications.forEach(function (t)
  140. {
  141. db.quals.bulkAdd([
  142. {
  143. id: t.request_qualification_url,
  144. requester: t.creator_name,
  145. description: t.description,
  146. canRetake: t.can_retake_test_or_rerequest,
  147. retry: t.earliest_retriable_time,
  148. score: t.value,
  149. date: t.grant_time,
  150. qualName: t.name,
  151. reqURL: t.creator_url,
  152. retURL: t.retake_test_url,
  153. isSystem: t.is_system_qualification,
  154. canRequest: t.is_requestable,
  155. hasTest: t.has_test
  156. }])
  157. })
  158.  
  159. if (data.next_page_token !== null)
  160. {
  161. timeoutId = setTimeout(() =>
  162. {
  163. page = `https://worker.mturk.com/qualifications/assigned.json?page_size=100&next_token=${encodeURIComponent(data.next_page_token)}`
  164. getAssignedQualifications(data.next_page_token);
  165. }, timeout);
  166.  
  167.  
  168. }
  169. else if (data.next_page_token === null)
  170. {
  171. console.log("Scraping completed");
  172. console.log(counter + "pages");
  173. console.log("Timeout" + timeout);
  174. console.log(retry_count + "timeouts");
  175. $("#cancelButton").css('background', '#383c44');
  176. $("#progress").css('background', '#25dc12');
  177. $("#progress").html('✓');
  178. $("#dbButton").css('background', '#57ab4f');
  179. return;
  180.  
  181.  
  182. } else {
  183. console.log("Timeout or abort. Clock was " + timeout);
  184. $("#progress").css('background', '#FF0000');
  185. $("#progress").html('X');
  186. return;
  187. }
  188.  
  189. })
  190.  
  191. .catch(function (error)
  192. { //handle timeouts
  193. if (error.status === 429 && retry_count < 10)
  194. {
  195. retry_count++;
  196. timeout += 500;
  197. console.log("timed out, incrementing clock to " + timeout + " milliseconds")
  198. setTimeout(() =>
  199. {
  200. getAssignedQualifications(nextPageToken);
  201. }, 10000);
  202. } else if (error.status === 503) {
  203. $("#progress").css('background', '#FFFF00');
  204. $("#progress").html('&#33;');
  205. if (confirm("Mturk responded with 503: Service Unavailable. Retry?")) {
  206. $("#progress").css('background', '#33773A');
  207. setTimeout(() => {
  208. getAssignedQualifications(nextPageToken);
  209. }, 10000);
  210. } else {
  211. $("#progress").css('background', '#FF0000');
  212. $("#progress").html('&#88;');
  213. console.log("User declined retry.");
  214. return;
  215. }
  216. }
  217. else
  218. {
  219.  
  220. // $("#progress").css('background', '#FF0000');
  221. // $("#progress").html('&#88;');
  222. //console.log("Timeout or abort. Clock was " + timeout);
  223. return;
  224.  
  225. }
  226. /* $("#button").html("Retry?");
  227. $("#button").css("background-color", "#e80c0f");
  228. document.getElementById("button").addEventListener("click", function e() {
  229. location.reload()
  230.  
  231.  
  232.  
  233. }*/
  234. }
  235.  
  236. )
  237.  
  238. }
  239.  
  240. getAssignedQualifications();
  241.  
  242. }
  243.  
  244.  
  245. )
  246.  
  247. };
  248.  
  249. /*ag-grid*/
  250. if (location.href === "https://worker.mturk.com/qt")
  251. {
  252. document.body.innerHTML = "";
  253. let gridDiv = document.createElement("div");
  254. gridDiv.setAttribute("id", "gridDiv");
  255. document.body.appendChild(gridDiv);
  256. document.title = "Qualifications";
  257.  
  258.  
  259. /*init db*/
  260. var db = new Dexie("qualifications");
  261. db.version(1).stores(
  262. {
  263. quals: `
  264. id,
  265. requester,
  266. description,
  267. score,
  268. date,
  269. qualName,
  270. reqURL,
  271. reqQURL,
  272. retURL,
  273. canRetake,
  274. hasTest,
  275. canRequest,
  276. isSystem`
  277. });
  278.  
  279. gridDiv.innerHTML = `
  280. <div id="myGrid" class="ag-theme-alpine">
  281. <style>
  282. .ag-theme-alpine {
  283. --ag-grid-size: 3px;
  284. width: 100%;
  285. height: 100%;
  286. position: absolute;
  287. top: 0;
  288. left: 0;
  289. right: 0;
  290. bottom: 0;
  291. }
  292. </style>
  293. </div>`
  294.  
  295.  
  296. const gridOptions = {
  297. columnDefs: [
  298. {
  299. headerName: 'Mturk Qualification Database and Scraper',
  300. children: [
  301. {
  302. field: "qualName",
  303. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  304. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  305. } },
  306. {
  307. headerName: "Requester",
  308. field: "requester",
  309. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  310. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  311. }
  312.  
  313. }]
  314. },
  315.  
  316.  
  317. {
  318. headerName: ' ',
  319. children: [
  320. {field: "description",
  321. width: 350,
  322. cellRenderer: function(params) {
  323. return '<span title="' + params.value + '">' + params.value + '</span>';
  324. },
  325. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  326. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  327. }},
  328. {
  329. headerName: "Value",
  330. field: "score",
  331. width: 100
  332. },
  333. {
  334. headerName: "Date",
  335. field: "date",
  336. width: 100,
  337. valueGetter: function (params)
  338. {
  339. var date = new Date(params.data.date);
  340. return (date.getMonth() + 1) + "/" + date.getDate() + "/" + date.getFullYear();
  341. },
  342. comparator: function (valueA, valueB, nodeA, nodeB, isInverted)
  343. {
  344. var dateA = new Date(valueA);
  345. var dateB = new Date(valueB);
  346. return dateA - dateB;
  347. },
  348. //valueFormatter: function(params) {
  349. // return new Date(params.value).toString().substring(4, 15);
  350. //}
  351. },
  352. {
  353.  
  354. headerName: "Requester ID",
  355. width: 150,
  356. field: "reqURL",
  357. valueFormatter: function (params)
  358. {
  359. var parts = params.value.split("/");
  360. return parts[2];
  361.  
  362. },
  363.  
  364. },
  365. {
  366. headerName: "Qual ID",
  367. field: "id",
  368.  
  369. valueFormatter: function (params)
  370. {
  371. if (!params.value || params.value === '') return '';
  372. var parts = params.value.split("/");
  373. return parts[2];
  374.  
  375. }
  376. }]
  377. },
  378. {
  379. headerName: 'More',
  380. children: [
  381. {
  382. headerName: " ",
  383. field: " ",
  384. width: 100,
  385. columnGroupShow: 'closed'
  386. },
  387. {
  388. headerName: "Retake",
  389. field: "canRetake",
  390. width: 100,
  391. columnGroupShow: 'open',
  392. suppressMenu: true
  393. },
  394. {
  395. headerName: "hasTest",
  396. field: "hasTest",
  397. width: 100,
  398. columnGroupShow: 'open',
  399. suppressMenu: true
  400. },
  401. {
  402. headerName: "canReq",
  403. field: "canRequest",
  404. width: 100,
  405. columnGroupShow: 'open',
  406. suppressMenu: true
  407. },
  408. {
  409. headerName: "System",
  410. field: "isSystem",
  411. width: 100,
  412. columnGroupShow: 'open',
  413. suppressMenu: true
  414. },
  415.  
  416. ]
  417. }
  418. ],
  419. defaultColDef:
  420. {
  421. sortable: true,
  422. filter: true,
  423. editable: true,
  424. resizable: true,
  425. },
  426. rowSelection: 'multiple',
  427. animateRows: true,
  428. rowData: []
  429. };
  430.  
  431. window.addEventListener('load', function ()
  432. {
  433. const gridDiv = document.querySelector('#myGrid');
  434. db.quals.toArray().then(data =>
  435. {
  436.  
  437. var filteredData = data.filter(function (row)
  438. {
  439. return !row.qualName.includes("Exc: [");
  440. });
  441. gridOptions.rowData = filteredData;
  442. new agGrid.Grid(gridDiv, gridOptions);
  443.  
  444. })
  445. })
  446. };