Mturk Qualification Database and Scraper

Scrape, display, sort and search your Mturk qualifications

当前为 2023-02-18 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Mturk Qualification Database and Scraper
  3. // @namespace https://greasyfork.org/en/users/1004048-elias041
  4. // @version 0.77
  5. // @description Scrape, display, sort and search your Mturk qualifications
  6. // @author Elias041
  7. // @match https://worker.mturk.com/qualifications/assigned
  8. // @match https://worker.mturk.com/qt
  9. // @require https://code.jquery.com/jquery-3.6.3.js
  10. // @require https://code.jquery.com/ui/1.13.1/jquery-ui.min.js
  11. // @require https://unpkg.com/dexie/dist/dexie.js
  12. // @require https://unpkg.com/ag-grid-community@29.0.0/dist/ag-grid-community.min.js
  13. // @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.29.4/moment.min.js
  14. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-grid.css
  15. // @resource https://cdn.jsdelivr.net/npm/ag-grid-community/styles/ag-theme-apline.css
  16. // @icon https://www.google.com/s2/favicons?sz=64&domain=mturk.com
  17. // @license none
  18. // @grant none
  19. // ==/UserScript==
  20.  
  21.  
  22. let timeout = 1850;
  23. let counter = " ";
  24. let retry_count = 0;
  25. let page = "https://worker.mturk.com/qualifications/assigned.json?page_size=100";
  26. let timeoutId;
  27. let scraping = false;
  28. window.onload = function ()
  29. {
  30.  
  31.  
  32.  
  33. let t = document.getElementsByClassName("col-xs-5 col-md-3 text-xs-right p-l-0")[0],
  34. e = t.parentNode,
  35. o = document.createElement("div");
  36. o.style.color = "#fff";
  37. o.style.padding = "10px";
  38. o.style.boxShadow = "2px 2px 4px #888888";
  39. o.style.background = "#33773A";
  40. o.style.opacity = "0.5";
  41. o.style.cursor = "pointer";
  42. o.id = "button";
  43. o.innerHTML = "Scrape&nbspQuals";
  44. e.insertBefore(o, t);
  45.  
  46. let c = document.createElement("div");
  47. c.style.color = "#fff";
  48. c.style.background = "#C78D99";
  49. c.style.padding = "10px";
  50. c.style.boxShadow = "2px 2px 4px #888888";
  51. c.style.background = "#383c44";
  52. c.style.opacity = "0.5";
  53. c.style.cursor = "pointer";
  54. c.innerHTML = "Cancel";
  55. c.id = "cancelButton";
  56. e.insertBefore(c, t);
  57.  
  58.  
  59. let d = document.createElement("div");
  60. d.style.color = "#fff";
  61. d.style.background = "#fc0f03";
  62. d.style.padding = "10px";
  63. d.style.boxShadow = "2px 2px 4px #888888";
  64. d.style.background = "#323552";
  65. d.style.opacity = "0.5";
  66. d.style.cursor = "pointer";
  67. d.innerHTML = "Database";
  68. d.id = "dbButton";
  69. e.insertBefore(d, t);
  70.  
  71. let f = document.createElement("div");
  72. f.style.color = "#fff";
  73. f.style.padding = "10px";
  74. f.style.boxShadow = "2px 2px 4px #888888";
  75. f.style.background = "#33773A";
  76. f.style.opacity = "0.5";
  77. f.id = "progress";
  78. f.innerHTML = "-";
  79. e.insertBefore(f, t);
  80.  
  81. document.getElementById("dbButton").addEventListener("click", function e()
  82. {
  83. window.open("https://worker.mturk.com/qt", "_blank");
  84. });
  85.  
  86.  
  87.  
  88. document.getElementById("cancelButton").addEventListener("click", function e()
  89. {
  90. retry_count=0;
  91. scraping = false;
  92. $("#cancelButton").css('background', '#383c44')
  93. $("#button").css('background', '#33773A')
  94. $("#progress").html("-")
  95. })
  96. document.getElementById("button").addEventListener("click", function e()
  97. {
  98. scraping = true;
  99. $("#button").css('background', '#383c44')
  100. $("#cancelButton").css('background', '#CE3132')
  101.  
  102.  
  103.  
  104. /*init db*/
  105. var db = new Dexie("qualifications");
  106. db.version(1).stores(
  107. {
  108. quals: `
  109. id,
  110. requester,
  111. description,
  112. score,
  113. date,
  114. qualName,
  115. reqURL,
  116. reqQURL,
  117. retURL,
  118. canRetake,
  119. hasTest,
  120. canRequest,
  121. isSystem`
  122. });
  123.  
  124. /*main loop*/
  125. function getAssignedQualifications(nextPageToken = "")
  126. {
  127. if (!scraping)
  128. {
  129. return;
  130. } //cancel trap
  131. counter++
  132. $("#progress").html(counter);
  133. $.getJSON(page)
  134.  
  135. .then(function (data)
  136. {
  137. data.assigned_qualifications.forEach(function (t)
  138. {
  139. db.quals.bulkAdd([
  140. {
  141. id: t.request_qualification_url,
  142. requester: t.creator_name,
  143. description: t.description,
  144. canRetake: t.can_retake_test_or_rerequest,
  145. retry: t.earliest_retriable_time,
  146. score: t.value,
  147. date: t.grant_time,
  148. qualName: t.name,
  149. reqURL: t.creator_url,
  150. retURL: t.retake_test_url,
  151. isSystem: t.is_system_qualification,
  152. canRequest: t.is_requestable,
  153. hasTest: t.has_test
  154. }])
  155. })
  156.  
  157. if (data.next_page_token !== null)
  158. {
  159. timeoutId = setTimeout(() =>
  160. {
  161. page = `https://worker.mturk.com/qualifications/assigned.json?page_size=100&next_token=${encodeURIComponent(data.next_page_token)}`
  162. getAssignedQualifications(data.next_page_token);
  163. }, timeout);
  164.  
  165.  
  166. }
  167. else if (data.next_page_token === null)
  168. {
  169. console.log("Scraping completed");
  170. console.log(counter + "pages");
  171. console.log("Timeout" + timeout);
  172. console.log(retry_count + "timeouts");
  173. $("#cancelButton").css('background', '#383c44');
  174. $("#progress").css('background', '#25dc12');
  175. $("#progress").html('✓');
  176. $("#dbButton").css('background', '#57ab4f');
  177. return;
  178.  
  179.  
  180. } else {
  181. console.log("Timeout or abort. Clock was " + timeout);
  182. $("#progress").css('background', '#FF0000');
  183. $("#progress").html('X');
  184. return;
  185. }
  186.  
  187. })
  188.  
  189. .catch(function (error)
  190. { //handle timeouts
  191. if (error.status === 429 && retry_count < 10)
  192. {
  193. retry_count++;
  194. timeout += 500;
  195. console.log("timed out, incrementing clock to " + timeout + " milliseconds")
  196. setTimeout(() =>
  197. {
  198. getAssignedQualifications(nextPageToken);
  199. }, 10000);
  200. } else if (error.status === 503) {
  201. $("#progress").css('background', '#FFFF00');
  202. $("#progress").html('&#33;');
  203. if (confirm("Mturk responded with 503: Service Unavailable. Retry?")) {
  204. $("#progress").css('background', '#33773A');
  205. setTimeout(() => {
  206. getAssignedQualifications(nextPageToken);
  207. }, 10000);
  208. } else {
  209. $("#progress").css('background', '#FF0000');
  210. $("#progress").html('&#88;');
  211. console.log("User declined retry.");
  212. return;
  213. }
  214. }
  215. else
  216. {
  217.  
  218. return;
  219.  
  220. }
  221. }
  222.  
  223. )
  224.  
  225. }
  226.  
  227. getAssignedQualifications();
  228.  
  229. }
  230.  
  231.  
  232. )
  233.  
  234. };
  235.  
  236. /*ag-grid*/
  237. if (location.href === "https://worker.mturk.com/qt")
  238. {
  239. document.body.innerHTML = "";
  240. let gridDiv = document.createElement("div");
  241. gridDiv.setAttribute("id", "gridDiv");
  242. document.body.appendChild(gridDiv);
  243. document.title = "Qualifications";
  244.  
  245.  
  246. /*init db*/
  247. var db = new Dexie("qualifications");
  248. db.version(1).stores(
  249. {
  250. quals: `
  251. id,
  252. requester,
  253. description,
  254. score,
  255. date,
  256. qualName,
  257. reqURL,
  258. reqQURL,
  259. retURL,
  260. canRetake,
  261. hasTest,
  262. canRequest,
  263. isSystem`
  264. });
  265.  
  266. gridDiv.innerHTML = `
  267. <div id="myGrid" class="ag-theme-alpine">
  268. <style>
  269. .ag-theme-alpine {
  270. --ag-grid-size: 3px;
  271. width: 100%;
  272. height: 100%;
  273. position: absolute;
  274. top: 0;
  275. left: 0;
  276. right: 0;
  277. bottom: 0;
  278. }
  279. </style>
  280. </div>`
  281.  
  282.  
  283. const gridOptions = {
  284. columnDefs: [
  285. {
  286. headerName: 'Mturk Qualification Database and Scraper',
  287. children: [
  288. {
  289. field: "qualName",
  290. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  291. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  292. } },
  293. {
  294. headerName: "Requester",
  295. field: "requester",
  296. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  297. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  298. }
  299.  
  300. }]
  301. },
  302.  
  303.  
  304. {
  305. headerName: ' ',
  306. children: [
  307. {field: "description",
  308. width: 350,
  309. cellRenderer: function(params) {
  310. return '<span title="' + params.value + '">' + params.value + '</span>';
  311. },
  312. comparator: function (valueA, valueB, nodeA, nodeB, isInverted) {
  313. return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
  314. }},
  315. {
  316. headerName: "Value",
  317. field: "score",
  318. width: 100
  319. },
  320. {
  321. headerName: "Date",
  322. field: "date",
  323. width: 100,
  324. valueGetter: function (params)
  325. {
  326. var date = new Date(params.data.date);
  327. return (date.getMonth() + 1) + "/" + date.getDate() + "/" + date.getFullYear();
  328. },
  329. comparator: function (valueA, valueB, nodeA, nodeB, isInverted)
  330. {
  331. var dateA = new Date(valueA);
  332. var dateB = new Date(valueB);
  333. return dateA - dateB;
  334. },
  335. //valueFormatter: function(params) {
  336. // return new Date(params.value).toString().substring(4, 15);
  337. //}
  338. },
  339. {
  340.  
  341. headerName: "Requester ID",
  342. width: 150,
  343. field: "reqURL",
  344. valueFormatter: function (params)
  345. {
  346. var parts = params.value.split("/");
  347. return parts[2];
  348.  
  349. },
  350.  
  351. },
  352. {
  353. headerName: "Qual ID",
  354. field: "id",
  355.  
  356. valueFormatter: function (params)
  357. {
  358. if (!params.value || params.value === '') return '';
  359. var parts = params.value.split("/");
  360. return parts[2];
  361.  
  362. }
  363. }]
  364. },
  365. {
  366. headerName: 'More',
  367. children: [
  368. {
  369. headerName: " ",
  370. field: " ",
  371. width: 100,
  372. columnGroupShow: 'closed'
  373. },
  374. {
  375. headerName: "Retake",
  376. field: "canRetake",
  377. width: 100,
  378. columnGroupShow: 'open',
  379. suppressMenu: true
  380. },
  381. {
  382. headerName: "hasTest",
  383. field: "hasTest",
  384. width: 100,
  385. columnGroupShow: 'open',
  386. suppressMenu: true
  387. },
  388. {
  389. headerName: "canReq",
  390. field: "canRequest",
  391. width: 100,
  392. columnGroupShow: 'open',
  393. suppressMenu: true
  394. },
  395. {
  396. headerName: "System",
  397. field: "isSystem",
  398. width: 100,
  399. columnGroupShow: 'open',
  400. suppressMenu: true
  401. },
  402.  
  403. ]
  404. }
  405. ],
  406. defaultColDef:
  407. {
  408. sortable: true,
  409. filter: true,
  410. editable: true,
  411. resizable: true,
  412. },
  413. rowSelection: 'multiple',
  414. animateRows: true,
  415. rowData: []
  416. };
  417.  
  418. window.addEventListener('load', function ()
  419. {
  420. const gridDiv = document.querySelector('#myGrid');
  421. db.quals.toArray().then(data =>
  422. {
  423.  
  424. var filteredData = data.filter(function (row)
  425. {
  426. return !row.qualName.includes("Exc: [");
  427. });
  428. gridOptions.rowData = filteredData;
  429. new agGrid.Grid(gridDiv, gridOptions);
  430.  
  431. })
  432. })
  433. };