Mturk Qualification Database and Scraper

Scrape, display, sort and search your Mturk qualifications

当前为 2023-01-20 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Mturk Qualification Database and Scraper
  3. // @namespace https://greasyfork.org/en/users/1004048-elias041
  4. // @version 0.1
  5. // @description Scrape, display, sort and search your Mturk qualifications
  6. // @author Elias041
  7. // @license none
  8. // @match https://worker.mturk.com/qualifications/assigned
  9. // @match https://worker.mturk.com/qt
  10. // @require https://code.jquery.com/jquery-3.6.3.js
  11. // @require https://unpkg.com/dexie/dist/dexie.js
  12. // @require https://unpkg.com/ag-grid-community@29.0.0/dist/ag-grid-community.min.js
  13. // @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.29.4/moment.min.js
  14. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-grid.css
  15. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-theme-apline.css
  16. // @icon https://www.google.com/s2/favicons?sz=64&domain=mturk.com
  17. // @grant none
  18. // ==/UserScript==
  19.  
  20. /*variables*/
  21. let timeout = 1850;
  22. let counter = " ";
  23. let retry_count = 0;
  24. let page = "https://worker.mturk.com/qualifications/assigned.json?page_size=100";
  25. let timeoutId = undefined;
  26. let scraping = false
  27. window.onload = function() { //wait for page to load
  28.  
  29. /*buttons*/
  30. let t = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  31. e = t.parentNode,
  32. o = document.createElement("button");
  33. (o.style.background = "#343aeb"),
  34. (o.style.color = "#fff"),
  35. (o.id = "button"),
  36. (o.innerHTML = "Scrape&nbspQuals"),
  37. e.insertBefore(o, t);
  38. document.getElementById("button").addEventListener("click", function e() {
  39. scraping = true;
  40. //$("#cancelButton").dialog({})
  41. $("#button").remove();
  42. let t = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  43. e = t.parentNode,
  44. c = document.createElement("button");
  45. (c.style.color = "#fff"),
  46. (c.style.background = "#fc0f03"),
  47. (c.innerHTML = "Cancel"),
  48. (c.id = "cancelButton"),
  49. e.insertBefore(c, t);
  50. let b = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  51. bParent = b.parentNode,
  52. bar = document.createElement("div");
  53. (bar.id = "progressBar"),
  54. (bar.innerHTML = "&nbsp&nbsp&nbsp" + counter + "&nbsp&nbsp&nbsp")
  55. bParent.insertBefore(bar, b);;
  56. document.getElementById("cancelButton").addEventListener("click", function e() {
  57. $("#cancelButton").remove();
  58. $("#progressBar").html("&nbsp&nbsp&nbsp&nbspCanceled&nbsp&nbsp<br>" + counter + "&nbsppages&nbspscraped");
  59. scraping = false
  60. })
  61.  
  62. /*init db*/
  63. var db = new Dexie("qualifications");
  64. db.version(1).stores({
  65. quals: `
  66. id,
  67. requester,
  68. description,
  69. score,
  70. date,
  71. qualName,
  72. reqURL,
  73. reqQURL,
  74. retURL,
  75. canRetake,
  76. hasTest,
  77. canRequest,
  78. isSystem`
  79. });
  80.  
  81. /*main loop*/
  82. function getAssignedQualifications(nextPageToken = "") {
  83. if (!scraping) {
  84. return;
  85. } //cancel trap
  86. counter++
  87. $("#progressBar").html("&nbsp&nbsp&nbspProcessing&nbsppage&nbsp" + counter + "&nbsp&nbsp&nbsp");
  88.  
  89. $.getJSON(page)
  90.  
  91. .then(function(data) {
  92. data.assigned_qualifications.forEach(function(t) {
  93. db.quals.bulkAdd([{
  94. id: t.request_qualification_url,
  95. requester: t.creator_name,
  96. description: t.description,
  97. canRetake: t.can_retake_test_or_rerequest,
  98. retry: t.earliest_retriable_time,
  99. score: t.value,
  100. date: t.grant_time,
  101. qualName: t.name,
  102. reqURL: t.creator_url,
  103. retURL: t.retake_test_url,
  104. isSystem: t.is_system_qualification,
  105. canRequest: t.is_requestable,
  106. hasTest: t.has_test
  107. }])
  108. })
  109.  
  110. if (data.next_page_token !== null) {
  111. timeoutId = setTimeout(() => {
  112. page = `https://worker.mturk.com/qualifications/assigned.json?page_size=100&next_token=${encodeURIComponent(data.next_page_token)}`
  113. getAssignedQualifications(data.next_page_token);
  114. }, timeout);
  115.  
  116.  
  117. } else {
  118. console.log("Scraping completed");
  119. console.log(counter + "pages");
  120. console.log("Timeout" + timeout);
  121. console.log(retry_count + "timeouts");
  122. $("#progressBar").html("&nbsp&nbsp&nbspScrape&nbspComplete<br>&nbsp&nbsp&nbsp" + counter + "&nbspPages<br>&nbsp&nbsp&nbsp<a href='https://worker.mturk.com/qt' target='_blank'>Click&nbspHere</a>");
  123. }
  124. })
  125.  
  126. .catch(function(error) { //handle timeouts
  127. if (error.status === 429 && retry_count < 5) {
  128. retry_count++;
  129. timeout += 500;
  130. console.log("timed out, incrementing clock to " + timeout + " milliseconds")
  131. setTimeout(() => {
  132. getAssignedQualifications(nextPageToken);
  133. }, 10000);
  134. } else {
  135. $("#progressBar").html("Timed&nbspout&nbsp5&nbsptimes,&nbspaborting.&nbsp" + timeout + "&nbspmilliseconds.");
  136. console.log("Timed out 5 times, aborting. " + timeout + " milliseconds.");
  137.  
  138. }
  139. /* $("#button").html("Retry?");
  140. $("#button").css("background-color", "#e80c0f");
  141. document.getElementById("button").addEventListener("click", function e() {
  142. location.reload()
  143.  
  144.  
  145.  
  146. }*/
  147.  
  148. }
  149.  
  150. )
  151.  
  152. }
  153.  
  154. getAssignedQualifications();
  155.  
  156. }
  157.  
  158.  
  159. )
  160.  
  161. };
  162.  
  163. /*ag-grid*/
  164. if (location.href === "https://worker.mturk.com/qt") {
  165. document.body.innerHTML = "";
  166. let gridDiv = document.createElement("div");
  167. gridDiv.setAttribute("id", "gridDiv");
  168. document.body.appendChild(gridDiv);
  169. document.title = "Qualifications";
  170.  
  171.  
  172. /*init db*/
  173. var db = new Dexie("qualifications");
  174. db.version(1).stores({
  175. quals: `
  176. id,
  177. requester,
  178. description,
  179. score,
  180. date,
  181. qualName,
  182. reqURL,
  183. reqQURL,
  184. retURL,
  185. canRetake,
  186. hasTest,
  187. canRequest,
  188. isSystem`
  189. });
  190.  
  191. gridDiv.innerHTML = `
  192. <div id="myGrid" class="ag-theme-alpine">
  193. <style>
  194. .ag-theme-alpine {
  195. --ag-grid-size: 3px;
  196. width: 100%;
  197. height: 100%;
  198. position: absolute;
  199. top: 0;
  200. left: 0;
  201. right: 0;
  202. bottom: 0;
  203. }
  204. </style>
  205. </div>`
  206.  
  207. /*cellRenderer: 'btnCellRenderer',
  208. cellRendererParams: {
  209. clicked: function(field) {
  210. alert(`${field} was clicked`);
  211. }
  212. },*/
  213.  
  214. const gridOptions = {
  215. columnDefs: [{
  216. headerName: 'Mturk Qualification Database and Scraper',
  217. children: [{
  218. field: "qualName"
  219. },
  220. {
  221. field: "requester"
  222. }]},
  223.  
  224.  
  225. {
  226. headerName: ' ',
  227. children: [{
  228.  
  229.  
  230. field: "description",
  231. width: 350
  232. },
  233. {
  234. headerName: "Value",
  235. field: "score",
  236. width: 100
  237. },
  238. {
  239. headerName: "Date",
  240. field: "date",
  241. width: 100,
  242. valueGetter: function(params) {
  243. var date = new Date(params.data.date);
  244. return (date.getMonth() + 1) + "/" + date.getDate() + "/" + date.getFullYear();
  245. },
  246. comparator: function(valueA, valueB, nodeA, nodeB, isInverted) {
  247. var dateA = new Date(valueA);
  248. var dateB = new Date(valueB);
  249. return dateA - dateB;
  250. },
  251. //valueFormatter: function(params) {
  252. // return new Date(params.value).toString().substring(4, 15);
  253. //}
  254. },
  255. {
  256.  
  257. headerName: "Requester ID",
  258. width: 150,
  259. field: "reqURL",
  260. valueFormatter: function(params) {
  261. var parts = params.value.split("/");
  262. return parts[2];
  263.  
  264. },
  265.  
  266. },
  267. {
  268. headerName: "Qual ID",
  269. field: "id",
  270.  
  271. valueFormatter: function(params) {
  272. if (!params.value || params.value === '') return '';
  273. var parts = params.value.split("/");
  274. return parts[2];
  275.  
  276. }}]
  277. },
  278. {
  279. headerName: 'More',
  280. children: [{
  281. headerName: " ",
  282. field: " ",
  283. width: 100,
  284. columnGroupShow: 'closed'
  285. },
  286. {
  287. headerName: "Retake",
  288. field: "canRetake",
  289. width: 100,
  290. columnGroupShow: 'open',
  291. suppressMenu: true
  292. },
  293. {
  294. headerName: "hasTest",
  295. field: "hasTest",
  296. width: 100,
  297. columnGroupShow: 'open',
  298. suppressMenu: true
  299. },
  300. {
  301. headerName: "canReq",
  302. field: "canRequest",
  303. width: 100,
  304. columnGroupShow: 'open',
  305. suppressMenu: true
  306. },
  307. {
  308. headerName: "System",
  309. field: "isSystem",
  310. width: 100,
  311. columnGroupShow: 'open',
  312. suppressMenu: true
  313. },
  314. /*{
  315. headerName: "id",
  316. field: "id",
  317. hidden: "true",
  318. width: 0,
  319. columnGroupShow: 'open',
  320. suppressMenu: true
  321.  
  322. }*/
  323. ]
  324. }
  325. ],
  326. defaultColDef: {
  327. sortable: true,
  328. filter: true,
  329. editable: true,
  330. resizable: true,
  331. },
  332. rowSelection: 'multiple',
  333. animateRows: true,
  334. rowData: []
  335. };
  336.  
  337. window.addEventListener('load', function() {
  338. const gridDiv = document.querySelector('#myGrid');
  339. db.quals.toArray().then(data => {
  340. gridOptions.rowData = data;
  341. new agGrid.Grid(gridDiv, gridOptions);
  342.  
  343. })
  344. })
  345. }
  346.  
  347.  
  348.