Mturk Qualification Database and Scraper

Scrape, display, sort and search your Mturk qualifications

当前为 2023-01-26 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Mturk Qualification Database and Scraper
  3. // @namespace https://greasyfork.org/en/users/1004048-elias041
  4. // @version 0.2
  5. // @description Scrape, display, sort and search your Mturk qualifications
  6. // @author Elias041
  7. // @match https://worker.mturk.com/qualifications/assigned
  8. // @match https://worker.mturk.com/qt
  9. // @require https://code.jquery.com/jquery-3.6.3.js
  10. // @require https://code.jquery.com/ui/1.13.1/jquery-ui.min.js
  11. // @require https://unpkg.com/dexie/dist/dexie.js
  12. // @require https://unpkg.com/ag-grid-community@29.0.0/dist/ag-grid-community.min.js
  13. // @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.29.4/moment.min.js
  14. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-grid.css
  15. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-theme-apline.css
  16. // @icon https://www.google.com/s2/favicons?sz=64&domain=mturk.com
  17. // @grant none
  18. // ==/UserScript==
  19.  
  20.  
  21. let timeout = 1850;
  22. let counter = " ";
  23. let retry_count = 0;
  24. let page = "https://worker.mturk.com/qualifications/assigned.json?page_size=100";
  25. let timeoutId = undefined;
  26. let scraping = false
  27. window.onload = function ()
  28. {
  29.  
  30.  
  31.  
  32. let t = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  33. e = t.parentNode,
  34. o = document.createElement("div");
  35. o.style.color = "#fff";
  36. o.style.padding = "10px";
  37. o.style.boxShadow = "2px 2px 4px #888888";
  38. o.style.background = "#33773A";
  39. o.style.opacity = "0.5";
  40. o.id = "button";
  41. o.innerHTML = "Scrape&nbspQuals";
  42. e.insertBefore(o, t);
  43.  
  44. let c = document.createElement("div");
  45. c.style.color = "#fff";
  46. c.style.background = "#C78D99";
  47. c.style.padding = "10px";
  48. c.style.boxShadow = "2px 2px 4px #888888";
  49. c.style.background = "#383c44";
  50. c.style.opacity = "0.5";
  51. c.innerHTML = "Cancel";
  52. c.id = "cancelButton";
  53. e.insertBefore(c, t);
  54.  
  55.  
  56. let d = document.createElement("div");
  57. d.style.color = "#fff";
  58. d.style.background = "#fc0f03";
  59. d.style.padding = "10px";
  60. d.style.boxShadow = "2px 2px 4px #888888";
  61. d.style.background = "#323552";
  62. d.style.opacity = "0.5";
  63. d.innerHTML = "Database";
  64. d.id = "dbButton";
  65. e.insertBefore(d, t);
  66.  
  67. let f = document.createElement("div");
  68. f.style.color = "#fff";
  69. f.style.padding = "10px";
  70. f.style.boxShadow = "2px 2px 4px #888888";
  71. f.style.background = "#33773A";
  72. f.style.opacity = "0.5";
  73. f.id = "progress";
  74. f.innerHTML = "-";
  75. e.insertBefore(f, t);
  76.  
  77. document.getElementById("dbButton").addEventListener("click", function e()
  78. {
  79. window.open("https://worker.mturk.com/qt", "_blank")
  80. })
  81.  
  82.  
  83.  
  84. document.getElementById("cancelButton").addEventListener("click", function e()
  85. {
  86.  
  87. scraping = false
  88. $("#cancelButton").css('background', '#383c44')
  89. $("#button").css('background', '#33773A')
  90. $("#progress").html("-")
  91. })
  92. document.getElementById("button").addEventListener("click", function e()
  93. {
  94. scraping = true;
  95. $("#button").css('background', '#383c44')
  96. $("#cancelButton").css('background', '#CE3132')
  97.  
  98.  
  99.  
  100. /*init db*/
  101. var db = new Dexie("qualifications");
  102. db.version(1).stores(
  103. {
  104. quals: `
  105. id,
  106. requester,
  107. description,
  108. score,
  109. date,
  110. qualName,
  111. reqURL,
  112. reqQURL,
  113. retURL,
  114. canRetake,
  115. hasTest,
  116. canRequest,
  117. isSystem`
  118. });
  119.  
  120. /*main loop*/
  121. function getAssignedQualifications(nextPageToken = "")
  122. {
  123. if (!scraping)
  124. {
  125. return;
  126. } //cancel trap
  127. counter++
  128. $("#progress").html(counter);
  129. //$("#progressBar").html("&nbsp&nbsp&nbspProcessing&nbsppage&nbsp" + counter + "&nbsp&nbsp&nbsp");
  130. console.log("scraping")
  131. $.getJSON(page)
  132.  
  133. .then(function (data)
  134. {
  135. data.assigned_qualifications.forEach(function (t)
  136. {
  137. db.quals.bulkAdd([
  138. {
  139. id: t.request_qualification_url,
  140. requester: t.creator_name,
  141. description: t.description,
  142. canRetake: t.can_retake_test_or_rerequest,
  143. retry: t.earliest_retriable_time,
  144. score: t.value,
  145. date: t.grant_time,
  146. qualName: t.name,
  147. reqURL: t.creator_url,
  148. retURL: t.retake_test_url,
  149. isSystem: t.is_system_qualification,
  150. canRequest: t.is_requestable,
  151. hasTest: t.has_test
  152. }])
  153. })
  154.  
  155. if (data.next_page_token !== null)
  156. {
  157. timeoutId = setTimeout(() =>
  158. {
  159. page = `https://worker.mturk.com/qualifications/assigned.json?page_size=100&next_token=${encodeURIComponent(data.next_page_token)}`
  160. getAssignedQualifications(data.next_page_token);
  161. }, timeout);
  162.  
  163.  
  164. }
  165. else
  166. {
  167. console.log("Scraping completed");
  168. console.log(counter + "pages");
  169. console.log("Timeout" + timeout);
  170. console.log(retry_count + "timeouts");
  171. $("#cancelButton").css('background', '#CE3132')
  172. $("#progress").css('background', '##25dc12')
  173. $("#progress").html('✓')
  174.  
  175. }
  176. })
  177.  
  178. .catch(function (error)
  179. { //handle timeouts
  180. if (error.status === 429 && retry_count < 5)
  181. {
  182. retry_count++;
  183. timeout += 500;
  184. console.log("timed out, incrementing clock to " + timeout + " milliseconds")
  185. setTimeout(() =>
  186. {
  187. getAssignedQualifications(nextPageToken);
  188. }, 10000);
  189. }
  190. else
  191. {
  192. $("#progressBar").html("Timed&nbspout&nbsp5&nbsptimes,&nbspaborting.&nbsp" + timeout + "&nbspmilliseconds.");
  193. console.log("Timed out 5 times, aborting. " + timeout + " milliseconds.");
  194.  
  195. }
  196. /* $("#button").html("Retry?");
  197. $("#button").css("background-color", "#e80c0f");
  198. document.getElementById("button").addEventListener("click", function e() {
  199. location.reload()
  200.  
  201.  
  202.  
  203. }*/
  204.  
  205. }
  206.  
  207. )
  208.  
  209. }
  210.  
  211. getAssignedQualifications();
  212.  
  213. }
  214.  
  215.  
  216. )
  217.  
  218. };
  219.  
  220. /*ag-grid*/
  221. if (location.href === "https://worker.mturk.com/qt")
  222. {
  223. document.body.innerHTML = "";
  224. let gridDiv = document.createElement("div");
  225. gridDiv.setAttribute("id", "gridDiv");
  226. document.body.appendChild(gridDiv);
  227. document.title = "Qualifications";
  228.  
  229.  
  230. /*init db*/
  231. var db = new Dexie("qualifications");
  232. db.version(1).stores(
  233. {
  234. quals: `
  235. id,
  236. requester,
  237. description,
  238. score,
  239. date,
  240. qualName,
  241. reqURL,
  242. reqQURL,
  243. retURL,
  244. canRetake,
  245. hasTest,
  246. canRequest,
  247. isSystem`
  248. });
  249.  
  250. gridDiv.innerHTML = `
  251. <div id="myGrid" class="ag-theme-alpine">
  252. <style>
  253. .ag-theme-alpine {
  254. --ag-grid-size: 3px;
  255. width: 100%;
  256. height: 100%;
  257. position: absolute;
  258. top: 0;
  259. left: 0;
  260. right: 0;
  261. bottom: 0;
  262. }
  263. </style>
  264. </div>`
  265.  
  266.  
  267. const gridOptions = {
  268. columnDefs: [
  269. {
  270. headerName: 'Mturk Qualification Database and Scraper',
  271. children: [
  272. {
  273. field: "qualName"
  274. },
  275. {
  276. field: "requester"
  277. }]
  278. },
  279.  
  280.  
  281. {
  282. headerName: ' ',
  283. children: [
  284. {
  285.  
  286.  
  287. field: "description",
  288. width: 350
  289. },
  290. {
  291. headerName: "Value",
  292. field: "score",
  293. width: 100
  294. },
  295. {
  296. headerName: "Date",
  297. field: "date",
  298. width: 100,
  299. valueGetter: function (params)
  300. {
  301. var date = new Date(params.data.date);
  302. return (date.getMonth() + 1) + "/" + date.getDate() + "/" + date.getFullYear();
  303. },
  304. comparator: function (valueA, valueB, nodeA, nodeB, isInverted)
  305. {
  306. var dateA = new Date(valueA);
  307. var dateB = new Date(valueB);
  308. return dateA - dateB;
  309. },
  310. //valueFormatter: function(params) {
  311. // return new Date(params.value).toString().substring(4, 15);
  312. //}
  313. },
  314. {
  315.  
  316. headerName: "Requester ID",
  317. width: 150,
  318. field: "reqURL",
  319. valueFormatter: function (params)
  320. {
  321. var parts = params.value.split("/");
  322. return parts[2];
  323.  
  324. },
  325.  
  326. },
  327. {
  328. headerName: "Qual ID",
  329. field: "id",
  330.  
  331. valueFormatter: function (params)
  332. {
  333. if (!params.value || params.value === '') return '';
  334. var parts = params.value.split("/");
  335. return parts[2];
  336.  
  337. }
  338. }]
  339. },
  340. {
  341. headerName: 'More',
  342. children: [
  343. {
  344. headerName: " ",
  345. field: " ",
  346. width: 100,
  347. columnGroupShow: 'closed'
  348. },
  349. {
  350. headerName: "Retake",
  351. field: "canRetake",
  352. width: 100,
  353. columnGroupShow: 'open',
  354. suppressMenu: true
  355. },
  356. {
  357. headerName: "hasTest",
  358. field: "hasTest",
  359. width: 100,
  360. columnGroupShow: 'open',
  361. suppressMenu: true
  362. },
  363. {
  364. headerName: "canReq",
  365. field: "canRequest",
  366. width: 100,
  367. columnGroupShow: 'open',
  368. suppressMenu: true
  369. },
  370. {
  371. headerName: "System",
  372. field: "isSystem",
  373. width: 100,
  374. columnGroupShow: 'open',
  375. suppressMenu: true
  376. },
  377.  
  378. ]
  379. }
  380. ],
  381. defaultColDef:
  382. {
  383. sortable: true,
  384. filter: true,
  385. editable: true,
  386. resizable: true,
  387. },
  388. rowSelection: 'multiple',
  389. animateRows: true,
  390. rowData: []
  391. };
  392.  
  393. window.addEventListener('load', function ()
  394. {
  395. const gridDiv = document.querySelector('#myGrid');
  396. db.quals.toArray().then(data =>
  397. {
  398.  
  399. var filteredData = data.filter(function (row)
  400. {
  401. return !row.qualName.includes("Exc: [");
  402. });
  403. gridOptions.rowData = filteredData;
  404. new agGrid.Grid(gridDiv, gridOptions);
  405.  
  406. })
  407. })
  408. };
  409.  
  410.  
  411.