mmmturkeybacon Export Mturk History

Exports Mturk history to tab separated values so you can easily save the HITs you've worked on in a spreadsheet.

  1. // ==UserScript==
  2. // @name mmmturkeybacon Export Mturk History
  3. // @version 2.02
  4. // @description Exports Mturk history to tab separated values so you can easily save the HITs you've worked on in a spreadsheet.
  5. // @author mmmturkeybacon
  6. // @namespace http://userscripts.org/users/523367
  7. // @match https://www.mturk.com/mturk/status
  8. // @match https://www.mturk.com/mturk/statusdetail*
  9. // @require https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js
  10. // @grant GM_setClipboard
  11. // ==/UserScript==
  12.  
  13. var BACKGROUND_COLOR = "#FFFFFF";
  14. var DATE_LIST_DELAY = 500;
  15. var STATUSDETAIL_DELAY = 500;
  16. var MPRE_DELAY = 2000;
  17.  
  18. var control_panel_HTML = '<div id="control_panel" style="margin: 0 auto 0 auto;' +
  19. 'border-bottom: 1px solid #000000; margin-bottom: 5px; ' +
  20. 'background-color: ' + BACKGROUND_COLOR + ';"></div>';
  21.  
  22. $('body').prepend(control_panel_HTML);
  23. var control_panel = document.getElementById("control_panel");
  24. var date_input = document.createElement("INPUT");
  25. var radio_all = document.createElement("INPUT");
  26. var radio_pending = document.createElement("INPUT");
  27. var radio_rejected = document.createElement("INPUT");
  28. var big_red_button = document.createElement("BUTTON");
  29. var progress_report = document.createTextNode("Stopped");
  30. var p = document.createElement("P");
  31. var copy_button = document.createElement("BUTTON");
  32.  
  33. big_red_button.textContent = "Show Interface";
  34. big_red_button.onclick = function(){show_interface();};
  35. control_panel.appendChild(big_red_button);
  36.  
  37. copy_button.textContent = "Copy Table To Clipboard";
  38. copy_button.onclick = function(){GM_setClipboard(output_tsv);};
  39.  
  40. var global_run = false;
  41. var statusdetail_loop_finished = false;
  42. var page_num = 0;
  43. var date_header = "";
  44. var status_filter = "";
  45. var history = {};
  46.  
  47. var output_tsv = '';
  48. var output_table = document.createElement("TABLE");
  49.  
  50. function set_progress_report(text, force)
  51. {
  52. if (global_run == true || force == true)
  53. {
  54. progress_report.textContent = text;
  55. }
  56. }
  57.  
  58. function last_sixty_days($date_URLs)
  59. {
  60. var date_URLs = $.makeArray($date_URLs);
  61. var date_URL_parts = date_URLs[date_URLs.length-1].toString().split("encodedDate=");
  62. var last_date = date_URL_parts[1];
  63. var month = last_date.substring(0,2);
  64. var day = parseInt(last_date.substring(2,4), "10");
  65. var year = last_date.substring(4,8);
  66. var i = 1;
  67. while(i <= day && i <= 15)
  68. {
  69. var past_day = String("0" + (day - i)).substr(-2);
  70. date_URLs.push(date_URL_parts[0]+"encodedDate="+month+past_day+year);
  71. i++;
  72. }
  73.  
  74. if (i < 16)
  75. {
  76. var $src;
  77. jQuery.ajax({
  78. url: date_URLs[date_URLs.length-1],
  79. success: function(data) {$src = $(data);},
  80. async: false
  81. });
  82.  
  83. var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
  84. if (maxpagerate.length == 0)
  85. {
  86. last_date = $src.find('td[class="white_text_14_bold"]:contains("HITs You Worked On For")').clone().children().remove().end().text().trim().substring(22);
  87. month = (month == "01") ? "12" : String("0" + (month - 1)).substr(-2);
  88. day = parseInt(last_date.split(",")[0].substr(-2), "10");
  89. year = last_date.substr(-4);
  90. var j = 1;
  91. while(i <= 15)
  92. {
  93. var past_day = String("0" + (day - j)).substr(-2);
  94. date_URLs.push(date_URL_parts[0]+"encodedDate="+month+past_day+year);
  95. i++;
  96. j++;
  97. }
  98. }
  99. else
  100. {
  101. date_URLs = $.makeArray($date_URLs); // error, output last 45 days.
  102. }
  103. }
  104. return date_URLs.reverse();
  105. }
  106.  
  107. function wait_until_stopped()
  108. {
  109. if (global_run == true)
  110. {
  111. if (statusdetail_loop_finished == true)
  112. {
  113. big_red_button.textContent = "Start";
  114. set_progress_report("Finished", false);
  115. copy_button.disabled = false;
  116. }
  117. else
  118. {
  119. setTimeout(function(){wait_until_stopped();}, 500);
  120. }
  121. }
  122. }
  123.  
  124. function scrape($src)
  125. {
  126. var $requester = $src.find('a[href^="/mturk/contact?"]');
  127. var $title = $src.find('td[class="statusdetailTitleColumnValue"]');
  128. var $reward = $src.find('td[class="statusdetailAmountColumnValue"]');
  129. var $approval = $src.find('td[class="statusdetailStatusColumnValue"]');
  130. var $feedback = $src.find('td[class="statusdetailRequesterFeedbackColumnValue"]');
  131.  
  132. for (var j = 0; j < $requester.length; j++)
  133. {
  134. var requester_name = $requester.eq(j).text();
  135. var requester_id = $requester.eq(j).attr("href").split(/requesterId=|&/)[1];
  136. var title = $title.eq(j).text();
  137. var reward = $reward.eq(j).text();
  138. var approval = $approval.eq(j).text();
  139. var feedback = $feedback.eq(j).text().trim().replace(/\r\n|\n|\r|\t/g, ' ');
  140.  
  141. key = requester_id+title+reward;
  142. if (history[key] == undefined)
  143. {
  144. history[key] = {submitted:0, approved:0, rejected:0, pending:0, requester_name:"", requester_id:"", title:"", reward:"", approved_feedback:{}, rejected_feedback:{}};
  145. history[key].requester_name = requester_name;
  146. history[key].requester_id = requester_id;
  147. history[key].title = title;
  148. history[key].reward = reward;
  149. }
  150.  
  151. history[key].submitted = history[key].submitted+1;
  152.  
  153. if (approval == "Rejected")
  154. {
  155. history[key].rejected = history[key].rejected+1;
  156. if (feedback != "")
  157. {
  158. if (history[key].rejected_feedback[feedback])
  159. {
  160. history[key].rejected_feedback[feedback] = history[key].rejected_feedback[feedback]+1;
  161. }
  162. else
  163. {
  164. history[key].rejected_feedback[feedback] = 1;
  165. }
  166. }
  167. }
  168. else if (approval == "Pending Approval")
  169. {
  170. history[key].pending = history[key].pending+1;
  171. }
  172. else // Approved or Paid
  173. {
  174. history[key].approved = history[key].approved+1;
  175. if (feedback != "")
  176. {
  177. if (history[key].approved_feedback[feedback])
  178. {
  179. history[key].approved_feedback[feedback] = history[key].approved_feedback[feedback]+1;
  180. }
  181. else
  182. {
  183. history[key].approved_feedback[feedback] = 1;
  184. }
  185. }
  186. }
  187. }
  188. }
  189.  
  190. function statusdetail_loop(next_URL)
  191. {
  192. if (global_run == true)
  193. {
  194. if (next_URL.length != 0)
  195. {
  196. $.get(next_URL, function(data)
  197. {
  198. var $src = $(data);
  199. var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
  200. if (maxpagerate.length == 0)
  201. {
  202. date_header = $src.find('td[class="white_text_14_bold"]:contains("HITs You Worked On For")').clone().children().remove().end().text().trim();
  203. page_num++;
  204. set_progress_report("Processing " + date_header + " page " + page_num, false);
  205. console.log(progress_report.textContent);
  206. scrape($src);
  207. $next_URL = $src.find('a[href^="/mturk/statusdetail"]:contains("Next")');
  208. next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
  209. setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
  210. }
  211. else
  212. {
  213. setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
  214. }
  215. });
  216. }
  217. else
  218. {
  219. output_tsv += date_header + "\n";
  220. var row = document.createElement('TR');
  221. var cell = document.createElement('TD');
  222. cell.colSpan = 9;
  223. cell.innerHTML = date_header;
  224. row.appendChild(cell);
  225. output_table.appendChild(row);
  226.  
  227. for (var key in history)
  228. {
  229. var row = document.createElement('TR');
  230. var obj = history[key];
  231. for (var prop in obj)
  232. {
  233. // important check that this is objects own property
  234. // not from prototype prop inherited
  235. if(obj.hasOwnProperty(prop))
  236. {
  237. var cell = document.createElement('TD');
  238. // submitted, approved, rejected, pending, requester_name (requester_id), title, reward, approved_feedback, rejected_feedback
  239. if (prop == "approved_feedback" || prop == "rejected_feedback")
  240. {
  241. var feedback_obj = obj[prop];
  242. var fb_str = "";
  243. for (var feedback in feedback_obj)
  244. {
  245. var count = feedback_obj[feedback];
  246. if (count > 1)
  247. {
  248. fb_str += "("+count+") "+feedback+" | ";
  249. }
  250. else
  251. {
  252. fb_str += feedback+" | ";
  253. }
  254. }
  255. output_tsv += fb_str.substring(0, fb_str.length-3)+"\t";
  256. cell.innerHTML = fb_str.substring(0, fb_str.length-3);
  257. }
  258. else
  259. {
  260. if ((status_filter == "&sortType=Approved" && (prop == "submitted" || prop == "rejected" || prop == "pending")) ||
  261. (status_filter == "&sortType=Rejected" && (prop == "submitted" || prop == "approved" || prop == "pending")) )
  262. {
  263. output_tsv += "-\t";
  264. cell.innerHTML = "-\t";
  265. }
  266. else
  267. {
  268. output_tsv += obj[prop]+"\t";
  269. cell.innerHTML = obj[prop]+"\t";
  270. }
  271. }
  272. row.appendChild(cell);
  273. }
  274. }
  275. output_tsv += "\n";
  276. output_table.appendChild(row);
  277. }
  278.  
  279. statusdetail_loop_finished = true;
  280. }
  281. }
  282. }
  283.  
  284. function date_list_loop(date_URLs)
  285. {
  286. if (global_run == true)
  287. {
  288. if (date_URLs.length != 0)
  289. {
  290. if (statusdetail_loop_finished == true)
  291. {
  292. page_num = 0;
  293. statusdetail_loop_finished = false;
  294. history = {};
  295. var next_URL = date_URLs.pop();
  296. statusdetail_loop(next_URL+status_filter);
  297.  
  298. setTimeout(function(){date_list_loop(date_URLs);}, DATE_LIST_DELAY);
  299. }
  300. else
  301. {
  302. setTimeout(function(){date_list_loop(date_URLs);}, DATE_LIST_DELAY);
  303. }
  304. }
  305. else
  306. {
  307. wait_until_stopped();
  308. }
  309. }
  310. }
  311.  
  312. function start_running()
  313. {
  314. if (big_red_button.textContent == 'Start')
  315. {
  316. global_run = true;
  317. statusdetail_loop_finished = true;
  318. big_red_button.textContent = 'Stop';
  319. set_progress_report("Running", false);
  320. copy_button.disabled = true;
  321. output_table.innerHTML = '';
  322. output_tsv = '';
  323. output_tsv += 'Submitted\tApproved\tRejected\tPending\tRequester Name\tRequester ID\tTitle\tReward\tApproved Feedback\tRejected Feedback\n';
  324. control_panel.appendChild(copy_button);
  325. control_panel.appendChild(output_table);
  326.  
  327. var row = document.createElement('TR');
  328. var cell = document.createElement('TD');
  329. cell.innerHTML = 'Submitted\t';
  330. row.appendChild(cell);
  331. var cell = document.createElement('TD');
  332. cell.innerHTML = 'Approved\t';
  333. row.appendChild(cell);
  334. var cell = document.createElement('TD');
  335. cell.innerHTML = 'Rejected\t';
  336. row.appendChild(cell);
  337. var cell = document.createElement('TD');
  338. cell.innerHTML = 'Pending\t';
  339. row.appendChild(cell);
  340. var cell = document.createElement('TD');
  341. cell.innerHTML = 'Requester Name\t';
  342. row.appendChild(cell);
  343. var cell = document.createElement('TD');
  344. cell.innerHTML = 'Requester ID\t';
  345. row.appendChild(cell);
  346. var cell = document.createElement('TD');
  347. cell.innerHTML = 'Title\t';
  348. row.appendChild(cell)
  349. var cell = document.createElement('TD');
  350. cell.innerHTML = 'Reward\t';
  351. row.appendChild(cell)
  352. var cell = document.createElement('TD');
  353. cell.innerHTML = 'Approved Feedback\t';
  354. row.appendChild(cell)
  355. var cell = document.createElement('TD');
  356. cell.innerHTML = 'Rejected Feedback\t';
  357. row.appendChild(cell)
  358. output_table.appendChild(row);
  359.  
  360. var date_URLs = '';
  361.  
  362. if (document.location.href == 'https://www.mturk.com/mturk/status')
  363. {
  364. if (date_input.value != '')
  365. {
  366. var $date_URLs = $('a[href^="/mturk/statusdetail"]');
  367. var date_input_parts = date_input.value.split(":");
  368. if (date_input_parts.length == 1)
  369. {
  370. var $date1_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date_input.value+'"]');
  371. if (date_input.value.substring(0,1).toLowerCase() == "t")
  372. {
  373. date_URLs = $.makeArray($date_URLs.eq(0)); // today
  374. }
  375. else if (date_input.value.substring(0,1).toLowerCase() == "y")
  376. {
  377. date_URLs = $.makeArray($date_URLs.eq(1)); // yesterday
  378. }
  379. else if ($date1_URL.length != 0)
  380. {
  381. date_URLs = $.makeArray($date1_URL); // single date
  382. }
  383. else if (date_input.value == "45")
  384. {
  385. date_URLs = $.makeArray($date_URLs).reverse(); // last 45 days
  386. }
  387. else if (date_input.value == "60")
  388. {
  389. date_URLs = last_sixty_days($date_URLs); // last 60 days
  390. }
  391. else
  392. {
  393. set_progress_report("Input Error!", false);
  394. }
  395. }
  396. else if (date_input_parts.length == 2)
  397. {
  398. var date1 = date_input_parts[0];
  399. var date2 = date_input_parts[1];
  400.  
  401. var $date1_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date1+'"]');
  402. var $date2_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date2+'"]');
  403. var date1_idx = $date_URLs.index($date1_URL);
  404. var date2_idx = $date_URLs.index($date2_URL);
  405.  
  406. if (date1_idx != -1 && date2_idx != -1)
  407. {
  408. if (date1_idx > date2_idx)
  409. { // ascending
  410. $date_URLs = $date_URLs.slice(date2_idx, date1_idx+1);
  411. date_URLs = $.makeArray($date_URLs);
  412. }
  413. else
  414. { // descending
  415. $date_URLs = $date_URLs.slice(date1_idx, date2_idx+1);
  416. date_URLs = $.makeArray($date_URLs).reverse();
  417. }
  418. }
  419. else
  420. {
  421. set_progress_report("Input Error!", false);
  422. }
  423.  
  424. }
  425. }
  426. else
  427. {
  428. date_URLs = $.makeArray($date_URLs.eq(0)); // today
  429. }
  430. }
  431. else if (document.location.href.indexOf('https://www.mturk.com/mturk/statusdetail') > -1)
  432. {
  433. var date1 = document.location.href.split('encodedDate=')[1].split('&')[0];
  434. date_URLs = ['https://www.mturk.com/mturk/statusdetail?encodedDate='+date1];
  435. }
  436.  
  437. status_filter = $('input[type="radio"][name="mtb_hit_status"]:checked').val();
  438. date_list_loop(date_URLs);
  439. }
  440. else
  441. {
  442. global_run = false;
  443. big_red_button.textContent = "Start";
  444. set_progress_report("Stopped", true);
  445. copy_button.disabled = false;
  446. }
  447. }
  448.  
  449. function show_interface()
  450. {
  451. control_panel.removeChild(big_red_button);
  452.  
  453. control_panel.appendChild(document.createTextNode("Please note in version 2.0 and up the order of Pending and Rejected columns has been switched and the requester field has been split into separate Requester Name and Requester ID fields."));
  454. control_panel.appendChild(document.createElement("BR"));
  455. control_panel.appendChild(document.createTextNode("Date: "));
  456.  
  457. if (document.location.href == 'https://www.mturk.com/mturk/status')
  458. {
  459. date_input.value = $('a[href^="/mturk/statusdetail"]:eq(0)').attr('href').split('=')[1];
  460. }
  461. else if (document.location.href.indexOf('https://www.mturk.com/mturk/statusdetail') > -1)
  462. {
  463. date_input.value = 'This date only.';
  464. date_input.disabled = true;
  465. date_input.style.color = '#555555';
  466. date_input.style.background = '#EEEEEE';
  467. }
  468. date_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  469. date_input.title = "Leave blank to get today\'s completed HITs\n"
  470. +"Enter 'yesterday' to get yesterday\'s HITs\n"
  471. +"To get a single day use mmddyyyy\n"
  472. +"To get a range of days use mmddyyyy:mmddyyyy\n"
  473. +"Enter 45 to get the last 45 days\n"
  474. +"Enter 60 to get the last 60 days";
  475. control_panel.appendChild(date_input);
  476.  
  477. radio_all.type = 'radio';
  478. radio_all.name = 'mtb_hit_status';
  479. radio_all.title = 'All';
  480. radio_all.value = '';
  481. radio_all.checked = true;
  482. radio_pending.type = 'radio';
  483. radio_pending.name = 'mtb_hit_status';
  484. radio_pending.title = 'Approved - Pending Payment';
  485. radio_pending.value = '&sortType=Approved';
  486. radio_rejected.type = 'radio';
  487. radio_rejected.name = 'mtb_hit_status';
  488. radio_rejected.title = 'Rejected';
  489. radio_rejected.value = '&sortType=Rejected';
  490. control_panel.appendChild(radio_all);
  491. control_panel.appendChild(radio_pending);
  492. control_panel.appendChild(radio_rejected);
  493. big_red_button.textContent = "Start";
  494. big_red_button.onclick = function(){start_running();};
  495. control_panel.appendChild(big_red_button);
  496. control_panel.appendChild(document.createTextNode(" "));
  497. control_panel.appendChild(progress_report);
  498. control_panel.appendChild(p);
  499. }
  500.  
  501. show_interface();