HIT Scraper WITH EXPORT

Snag HITs.

目前为 2014-06-04 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name HIT Scraper WITH EXPORT
  3. // @author Kerek and TJ
  4. // @description Snag HITs.
  5. // Based in part on code from mmmturkeybacon Export Mturk History and mmmturkeybacon Color Coded Search with Checkpoints
  6. // @namespace http://userscripts.org/users/536998
  7. // @match https://www.mturk.com/mturk/findhits?match=true#hit_scraper*
  8. // @match https://www.mturk.com/mturk/findhits?match=true?hit_scraper*
  9. // @version 1.3.0.7
  10. // @grant GM_xmlhttpRequest
  11. // @grant GM_getValue
  12. // @grant GM_setValue
  13. // @grant GM_deleteValue
  14. // @require http://code.jquery.com/jquery-latest.min.js
  15. // ==/UserScript==
  16.  
  17. //alter the requester ignore last as you desire, just follow the format below and use EXACT capitalization e.g., CrowdSource
  18. var ignore_list = ["oscar smith", "jonathon weber", "jerry torres", "crowdsource", "we-pay-you-fast", "turk experiment", "jon brelig"];
  19.  
  20. //this searches extra pages if you skip too much, helps fill out results if you hit a chunk of ignored HITs. Change to true for this behavior.
  21. var correct_for_skips = false;
  22.  
  23. //weight the four TO ratings for the coloring. Default has pay twice as important as fairness and nothing for communication and fast.
  24. var COMM_WEIGHT = 0;
  25. var PAY_WEIGHT = 10;
  26. var FAIR_WEIGHT = 5;
  27. var FAST_WEIGHT = 0;
  28.  
  29. //display your hitdb records if applicable
  30. var check_hitDB = true;
  31.  
  32. //default text size
  33. var default_text_size=11;
  34.  
  35.  
  36.  
  37. var HITStorage = {};
  38. var indexedDB = window.indexedDB || window.webkitIndexedDB ||
  39. window.mozIndexedDB;
  40. window.IDBTransaction = window.IDBTransaction || window.webkitIDBTransaction || window.mozIDBTransaction;
  41. window.IDBKeyRange = window.IDBKeyRange || window.webkitIDBKeyRange || window.mozIDBKeyRange;
  42. HITStorage.IDBTransactionModes = { "READ_ONLY": "readonly", "READ_WRITE": "readwrite", "VERSION_CHANGE": "versionchange" };
  43. var IDBKeyRange = window.IDBKeyRange;
  44.  
  45. HITStorage.indexedDB = {};
  46. HITStorage.indexedDB = {};
  47. HITStorage.indexedDB.db = null;
  48.  
  49. HITStorage.indexedDB.onerror = function(e) {
  50. console.log(e);
  51. };
  52.  
  53. var v=4;
  54.  
  55. HITStorage.indexedDB.checkTitle = function(title,button) {
  56. var request = indexedDB.open("HITDB", v);
  57. request.onsuccess = function(e) {
  58. HITStorage.indexedDB.db = e.target.result;
  59. var db = HITStorage.indexedDB.db;
  60. if (!db.objectStoreNames.contains("HIT"))
  61. {
  62. db.close();
  63. return;
  64. }
  65. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  66. var store = trans.objectStore("HIT");
  67.  
  68. var index = store.index("title");
  69. index.get(title).onsuccess = function(event)
  70. {
  71. if (event.target.result === undefined)
  72. {
  73. console.log(title + ' not found');
  74. history[button].titledb=false;
  75. }
  76. else
  77. {
  78. console.log(title + ' found');
  79. history[button].titledb=true;
  80. }
  81. db.close();
  82. };
  83. };
  84. request.onerror = HITStorage.indexedDB.onerror;
  85. };
  86.  
  87. HITStorage.indexedDB.checkRequester = function(id,button) {
  88. var request = indexedDB.open("HITDB", v);
  89. request.onsuccess = function(e) {
  90. HITStorage.indexedDB.db = e.target.result;
  91. var db = HITStorage.indexedDB.db;
  92. if (!db.objectStoreNames.contains("HIT"))
  93. {
  94. db.close();
  95. return;
  96. }
  97. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  98. var store = trans.objectStore("HIT");
  99.  
  100. var index = store.index("requesterId");
  101. index.get(id).onsuccess = function(event)
  102. {
  103. if (event.target.result === undefined)
  104. {history[button].reqdb=false;
  105. console.log(id + ' not found');
  106. }
  107. else
  108. {
  109. history[button].reqdb=true;
  110. console.log(id + ' found');
  111. }
  112. db.close();
  113. };
  114. };
  115. request.onerror = HITStorage.indexedDB.onerror;
  116. };
  117.  
  118. var PAGES_TO_SCRAPE = 3;
  119. var MINIMUM_HITS = 100;
  120. var SEARCH_REFRESH=0;
  121. var URL_BASE = "/mturk/searchbar?searchWords=&selectedSearchType=hitgroups";
  122. var initial_url = URL_BASE;
  123. var TO_REQ_URL = "http://turkopticon.ucsd.edu/reports?id=";
  124. var found_key_list=[];
  125. var last_clear_time = new Date().getTime();
  126. var searched_once = false;
  127. var save_new_results_time = 120;
  128. var save_results_time = 3600;
  129. var default_type = 0;
  130. var cur_loc = window.location.href;
  131. var time_input = document.createElement("INPUT");
  132. time_input.value = 0;
  133. var page_input = document.createElement("INPUT");
  134. page_input.value = 3;
  135. var min_input = document.createElement("INPUT");
  136. var new_time_display_input = document.createElement("INPUT");
  137. new_time_display_input.value = 300;
  138. var reward_input = document.createElement("INPUT");
  139. var qual_input = document.createElement("INPUT");
  140. qual_input.type = "checkbox";
  141. qual_input.checked = true;
  142. var masters_input = document.createElement("INPUT");
  143. masters_input.type = "checkbox";
  144. var sort_input1 = document.createElement("INPUT");
  145. sort_input1.type = "radio";
  146. sort_input1.name = "sort_type";
  147. sort_input1.value = "latest";
  148. sort_input1.checked = true;
  149. var sort_input2 = document.createElement("INPUT");
  150. sort_input2.type = "radio";
  151. sort_input2.name = "sort_type";
  152. sort_input2.value = "most";
  153. var sort_input3 = document.createElement("INPUT");
  154. sort_input3.type = "radio";
  155. sort_input3.name = "sort_type";
  156. sort_input3.value = "amount";
  157.  
  158. var search_input = document.createElement("INPUT");
  159.  
  160. var LINK_BASE = "https://www.mturk.com";
  161. var BACKGROUND_COLOR = "rgb(19, 19, 19)";
  162. var STATUSDETAIL_DELAY = 250;
  163. var MPRE_DELAY = 3000;
  164.  
  165. var next_page = 1;
  166.  
  167. var GREEN = '#66CC66'; // > 4
  168. var LIGHTGREEN = '#ADFF2F'; // > 3 GREEN YELLOW
  169. var YELLOW = '#FFD700';
  170. var ORANGE = '#FF9900'; // > 2
  171. var RED = '#FF3030'; // <= 2
  172. var BLUE = '#C0D9D9'; // no TO
  173. var GREY = 'lightGrey';
  174. var BROWN = '#94704D';
  175. var DARKGREY = '#9F9F9F';
  176. $('body').css('background', BACKGROUND_COLOR);
  177.  
  178. var API_PROXY_BASE = 'https://api.turkopticon.istrack.in/';
  179. var API_MULTI_ATTRS_URL = API_PROXY_BASE + 'multi-attrs.php?ids=';
  180. var REVIEWS_BASE = 'http://turkopticon.ucsd.edu/';
  181.  
  182. var control_panel_HTML = '<div id="control_panel" style="margin: 0 auto 0 auto;' +
  183. 'border-bottom: 1px solid #000000; margin-bottom: 5px; ' +
  184. 'background-color: ' + BACKGROUND_COLOR + ';"></div>';
  185. $('body > :not(#control_panel)').hide(); //hide all nodes directly under the body
  186. $('body').prepend(control_panel_HTML);
  187.  
  188. var control_panel = document.getElementById("control_panel");
  189. var big_red_button = document.createElement("BUTTON");
  190. var progress_report = document.createTextNode("Stopped");
  191. var text_area = document.createElement("TABLE");
  192. big_red_button.textContent = "Show Interface";
  193. big_red_button.onclick = function(){show_interface();};
  194. control_panel.appendChild(big_red_button);
  195.  
  196. show_interface();
  197.  
  198. var global_run = false;
  199. var statusdetail_loop_finished = false;
  200. var date_header = "";
  201. var history = {};
  202. var wait_loop;
  203.  
  204. function set_progress_report(text, force)
  205. {
  206. if (global_run == true || force == true)
  207. {
  208. progress_report.textContent = text;
  209. }
  210. }
  211.  
  212. function get_progress_report()
  213. {
  214. return progress_report.textContent;
  215. }
  216.  
  217. function wait_until_stopped()
  218. {
  219. if (global_run == true)
  220. {
  221. if (statusdetail_loop_finished == true)
  222. {
  223. big_red_button.textContent = "Start";
  224. set_progress_report("Finished", false);
  225. }
  226. else
  227. {
  228. setTimeout(function(){wait_until_stopped();}, 500);
  229. }
  230. }
  231. }
  232.  
  233. function display_wait_time(wait_time)
  234. {
  235. if (global_run == true)
  236. {
  237. var current_progress = get_progress_report();
  238. if (current_progress.indexOf("Searching again in")!==-1)
  239. {
  240. set_progress_report(current_progress.replace(/Searching again in \d+ seconds/ , "Searching again in " + wait_time + " seconds"),false);
  241. }
  242. else
  243. set_progress_report(current_progress + " Searching again in " + wait_time + " seconds.", false);
  244. if (wait_time>1)
  245. setTimeout(function(){display_wait_time(wait_time-1);}, 1000);
  246. }
  247. }
  248.  
  249. function dispArr(ar)
  250. {
  251. var disp = "";
  252. for (var z = 0; z < ar.length; z++)
  253. {
  254. disp += "id " + z + " is " + ar[z] + " ";
  255. }
  256. console.log(disp);
  257. }
  258.  
  259. function scrape($src)
  260. {
  261. var $requester = $src.find('a[href^="/mturk/searchbar?selectedSearchType=hitgroups&requester"]');
  262. var $title = $src.find('a[class="capsulelink"]');
  263. var $reward = $src.find('span[class="reward"]');
  264. var $preview = $src.find('a[href^="/mturk/preview?"]');
  265. var $qualified = $src.find('a[href^="/mturk/notqualified?"]');
  266. var $times = $src.find('a[id^="duration_to_complete"]');
  267. var $descriptions = $src.find('a[id^="description"]');
  268. var not_qualified_group_IDs=[];
  269. var $quals = $src.find('a[id^="qualificationsRequired"]');
  270. $qualified.each(function(){
  271. var groupy = $(this).attr('href');
  272. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  273. not_qualified_group_IDs.push(groupy);
  274. });
  275. var $mixed = $src.find('a[href^="/mturk/preview?"],a[href^="/mturk/notqualified?"]');
  276. var listy =[];
  277. $mixed.each(function(){
  278. var groupy = $(this).attr('href');
  279. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  280. groupy = groupy.replace("/mturk/preview?groupId=","");
  281. listy.push(groupy);
  282. });
  283. listy = listy.filter(function(elem, pos) {
  284. return listy.indexOf(elem) == pos;
  285. });
  286.  
  287. for (var j = 0; j < $requester.length; j++)
  288. {
  289. var $hits = $requester.eq(j).parent().parent().parent().parent().parent().parent().find('td[class="capsule_field_text"]');
  290. var requester_name = $requester.eq(j).text().trim();
  291. var requester_link = $requester.eq(j).attr('href');
  292. var group_ID=listy[j];
  293. var preview_link = "/mturk/preview?groupId=" + group_ID;
  294. var title = $title.eq(j).text().trim();
  295. var reward = $reward.eq(j).text().trim();
  296. var hits = $hits.eq(4).text().trim();
  297. var time = $times.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  298. var description = $descriptions.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  299. //console.log(description);
  300. var requester_id = requester_link.replace('/mturk/searchbar?selectedSearchType=hitgroups&requesterId=','');
  301. var accept_link;
  302. accept_link = preview_link.replace('preview','previewandaccept');
  303. /*HIT SCRAPER ADDITION*/
  304. var qElements = $quals.eq(j).parent().parent().parent().find('tr');
  305. //console.log(qElements);
  306.  
  307. var qualifications = [];
  308. for (var i = 1; i < qElements.length; i++) {
  309. qualifications.push((qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ').indexOf("Masters") != -1 ? "[color=red][b]"+qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')+"[/b][/color]" : qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')));
  310. }
  311. var qualList = (qualifications.join(', ') ? qualifications.join(', ') : "None");
  312.  
  313. key = requester_name+title+reward+group_ID;
  314. found_key_list.push(key);
  315. if (history[key] == undefined)
  316. {
  317. history[key] = {requester:"", title:"", description:"", reward:"", hits:"", req_link:"", quals:"", prev_link:"", rid:"", acc_link:"", new_result:"", qualified:"", found_this_time:"", initial_time:"", reqdb:"",titledb:"",time:""};
  318. history[key].req_link = requester_link;
  319. history[key].prev_link = preview_link;
  320. history[key].requester = requester_name;
  321. history[key].title = title;
  322. history[key].reward = reward;
  323. history[key].hits = hits;
  324. history[key].rid = requester_id;
  325. history[key].acc_link = accept_link;
  326. history[key].time = time;
  327. history[key].quals = qualList;
  328. history[key].description = description;
  329. HITStorage.indexedDB.checkRequester(requester_id,key);
  330. HITStorage.indexedDB.checkTitle(title,key);
  331. if (searched_once)
  332. {
  333. history[key].initial_time = new Date().getTime();//-1000*(save_new_results_time - SEARCH_REFRESH);
  334. history[key].new_result = 0;
  335. }
  336. else
  337. {
  338. history[key].initial_time = new Date().getTime()-1000*save_new_results_time;
  339. history[key].new_result = 1000*save_new_results_time;
  340. }
  341. if (not_qualified_group_IDs.indexOf(group_ID)!==-1)
  342. history[key].qualified = false;
  343. else
  344. history[key].qualified = true;
  345.  
  346. history[key].found_this_time = true;
  347. }
  348. else
  349. {
  350. history[key].new_result = new Date().getTime() - history[key].initial_time;
  351. history[key].found_this_time = true;
  352. history[key].hits = hits;
  353. }
  354. }
  355. }
  356.  
  357. function statusdetail_loop(next_URL)
  358. {
  359. if (global_run == true)
  360. {
  361. if (next_URL.length != 0)
  362. {
  363. $.get(next_URL, function(data)
  364. {
  365. var $src = $(data);
  366. var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
  367. if (maxpagerate.length == 0)
  368. {
  369. set_progress_report("Processing page " + next_page, false);
  370. scrape($src);
  371. $next_URL = $src.find('a[href^="/mturk/viewsearchbar"]:contains("Next")');
  372. next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
  373. next_page++;
  374. if (default_type == 1)
  375. {
  376. var hmin = MINIMUM_HITS+1;
  377. for (j = 0; j < found_key_list.length; j++)
  378. {
  379. if (history[found_key_list[j]].hits < hmin)
  380. {
  381. next_URL = "";
  382. next_page = -1;
  383. break;
  384. }
  385. }
  386. }
  387. else if (next_page > PAGES_TO_SCRAPE && correct_for_skips)
  388. {
  389. var skipped_hits = 0;
  390. var added_pages = 0;
  391. for (j = 0; j < found_key_list.length; j++)
  392. {
  393. var obj = history[found_key_list[j]];
  394. if (! ignore_check(obj.requester,obj.title))
  395. skipped_hits++;
  396. }
  397. added_pages = Math.floor(skipped_hits/10);
  398. if (skipped_hits%10 >6)
  399. added_pages++;
  400. if (next_page > PAGES_TO_SCRAPE + added_pages)
  401. {
  402. next_URL = "";
  403. next_page = -1;
  404. }
  405. }
  406. else if (next_page > PAGES_TO_SCRAPE)
  407. {
  408. next_URL = "";
  409. next_page = -1;
  410. }
  411. setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
  412. }
  413. else
  414. {
  415. console.log("MPRE");
  416. setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
  417. }
  418. });
  419. }
  420. else
  421. {
  422. searched_once = true;
  423. var found_hits = found_key_list.length;
  424. var shown_hits = 0;
  425. var new_hits = 0;
  426. var url = API_MULTI_ATTRS_URL;
  427. var rids = [];
  428. var lastRow = text_area.rows.length - 1;
  429. for (i = lastRow; i>0; i--)
  430. text_area.deleteRow(i);
  431. for (j = 0; j < found_key_list.length; j++)
  432. {
  433. //(function(url,rids,j) {
  434. var obj = history[found_key_list[j]];
  435. if (ignore_check(obj.requester,obj.title) && obj.found_this_time){
  436. ++shown_hits;
  437. //console.log(obj);
  438. //hit export will update col_heads[1]
  439. var col_heads = ["<a href='"+ LINK_BASE+obj.req_link +"' target='_blank'>" + obj.requester + "</a>","<a href='"+ LINK_BASE+obj.prev_link +"' target='_blank'>" + obj.title + "</a>",obj.reward,obj.hits,"TO down","<a href='"+ LINK_BASE+obj.acc_link +"' target='_blank'>Accept</a>"];
  440. var row = text_area.insertRow(text_area.rows.length);
  441. url += obj.rid + ',';
  442. rids.push(obj.rid);
  443. if (check_hitDB)
  444. {
  445. col_heads.push("R");
  446. col_heads.push("T");
  447. }
  448. if (!obj.qualified)
  449. {
  450. col_heads.push("Not Qualified");
  451. }
  452. for (i=0; i<col_heads.length; i++)
  453. {
  454. var this_cell = row.insertCell(i);
  455. row.cells[i].style.fontSize = default_text_size;
  456. this_cell.innerHTML = col_heads[i];
  457. if(i>1)
  458. this_cell.style.textAlign = 'center';
  459. if (check_hitDB)
  460. {
  461. if (i==6)
  462. {
  463. if (obj.reqdb)
  464. this_cell.style.backgroundColor = GREEN;
  465. else
  466. this_cell.style.backgroundColor = RED;
  467. }
  468. else if (i==7)
  469. {
  470. if (obj.titledb)
  471. this_cell.style.backgroundColor = GREEN;
  472. else
  473. this_cell.style.backgroundColor = RED;
  474. }
  475. else if (i==8)
  476. this_cell.style.backgroundColor = DARKGREY;
  477. }
  478. else if (i==6)
  479. this_cell.style.backgroundColor = DARKGREY;
  480. }
  481. if (Object.keys(history).length>0)
  482. {
  483. if (obj.new_result < 1000*save_new_results_time)
  484. {
  485. new_hits++;
  486. for (i in col_heads)
  487. {
  488. row.cells[i].style.fontSize = default_text_size + 1;
  489. row.cells[i].style.fontWeight = "bold";
  490. }
  491. }
  492. }
  493. button = document.createElement('button'); //HIT SCRAPER ADDITION
  494. button.textContent = 'vB';
  495. button.title = 'Export this HIT description as vBulletin formatted text';
  496. button.style.height = '14px';
  497. button.style.width = '30px';
  498. button.style.fontSize = '8px';
  499. button.style.border = '1px solid';
  500. button.style.padding = '0px';
  501. button.style.backgroundColor = 'transparent';
  502. //button.addEventListener("click", function() {export_func_deleg(j);}.bind(null,j), false);
  503. button.addEventListener("click", (function (obj,j) { return function() {export_func_deleg(obj,j);}})(obj,j));
  504. row.cells[1].appendChild(button);
  505. }
  506. //});
  507. }
  508. set_progress_report("Scrape complete. " + shown_hits + " HITs found (" + new_hits + " new results). " + (found_hits - shown_hits) + " HITs ignored.", false);
  509. url = url.substring(0,url.length - 1);
  510. //console.log(url);
  511. var success_flag = false;
  512. GM_xmlhttpRequest(
  513. {
  514. method: "GET",
  515. url: url,
  516. onload: function (results)
  517. {
  518. //console.log(results.responseText);
  519. rdata = $.parseJSON(results.responseText);
  520. for (i = 0; i < rids.length; i++)
  521. {
  522. text_area.rows[i+1].style.backgroundColor = GREY;
  523. if (rdata[rids[i]])
  524. {
  525. var pay = rdata[rids[i]].attrs.pay
  526. var reviews = rdata[rids[i]].reviews
  527. var average = 0;
  528. var sum = 0;
  529. var divisor = 0;
  530. var comm = rdata[rids[i]].attrs.comm;
  531. var fair = rdata[rids[i]].attrs.fair;
  532. var fast = rdata[rids[i]].attrs.fast;
  533. if (comm > 0)
  534. {
  535. sum += COMM_WEIGHT*comm;
  536. divisor += COMM_WEIGHT;
  537. }
  538. if (pay > 0)
  539. {
  540. sum += PAY_WEIGHT*pay;
  541. divisor += PAY_WEIGHT;
  542. }
  543. if (fair > 0)
  544. {
  545. sum += FAIR_WEIGHT*fair;
  546. divisor += FAIR_WEIGHT;
  547. }
  548. if (fast > 0)
  549. {
  550. sum += FAST_WEIGHT*fast;
  551. divisor += FAST_WEIGHT;
  552. }
  553. if (divisor > 0)
  554. {
  555. average = sum/divisor;
  556. }
  557. text_area.rows[i+1].cells[4].innerHTML = "<a href='"+ TO_REQ_URL+rids[i] +"' target='_blank'>" + pay + "</a>";
  558. if (reviews > 4)
  559. {
  560. if (average > 4.49)
  561. text_area.rows[i+1].style.backgroundColor = GREEN;
  562. else if (average > 3.49)
  563. text_area.rows[i+1].style.backgroundColor = LIGHTGREEN;
  564. //else if (average > 2.99)
  565. // text_area.rows[i+1].style.backgroundColor = YELLOW;
  566. else if (average > 1.99)
  567. text_area.rows[i+1].style.backgroundColor = ORANGE;
  568. else if (average > 0)
  569. text_area.rows[i+1].style.backgroundColor = RED;
  570. }
  571. }
  572. else
  573. {
  574. text_area.rows[i+1].cells[4].innerHTML = "No data";
  575. }
  576. }
  577. success_flag = true;
  578. }
  579. });
  580. if (!success_flag)
  581. for (i = 0; i < rids.length; i++) text_area.rows[i+1].style.backgroundColor = GREY;
  582. statusdetail_loop_finished = true;
  583. if (SEARCH_REFRESH>0)
  584. {
  585. wait_loop = setTimeout(function(){if (global_run) start_it();}, 1000*SEARCH_REFRESH);
  586. display_wait_time(SEARCH_REFRESH);
  587. }
  588. else
  589. {
  590. global_run = false;
  591. big_red_button.textContent = "Start";
  592. }
  593. }
  594. }
  595. }
  596.  
  597. function ignore_check(r,t){
  598. if (ignore_list.indexOf(r.toLowerCase())==-1)
  599. {
  600. return true;
  601. }
  602. return false;
  603. }
  604.  
  605. function start_running()
  606. {
  607. if (big_red_button.textContent == "Start")
  608. {
  609. global_run = true;
  610. initial_url = URL_BASE;
  611. if (search_input.value.length>0)
  612. {
  613. initial_url = initial_url.replace("searchWords=", "searchWords=" + search_input.value);
  614. }
  615. if (time_input.value.replace(/[^0-9]+/g,"") != "")
  616. {
  617. SEARCH_REFRESH = Number(time_input.value);
  618. }
  619. if (page_input.value.replace(/[^0-9]+/g,"") != "")
  620. {
  621. PAGES_TO_SCRAPE = Number(page_input.value);
  622. }
  623. if (min_input.value.replace(/[^0-9]+/g,"") != "")
  624. {
  625. MINIMUM_HITS = Number(min_input.value);
  626. }
  627. if (new_time_display_input.value.replace(/[^0-9]+/g,"") != "")
  628. {
  629. save_new_results_time = Number(new_time_display_input.value);
  630. }
  631. if (reward_input.value.replace(/[^0-9]+/g,"") != "")
  632. {
  633. initial_url += "&minReward=" + reward_input.value;
  634. }
  635. else
  636. {
  637. initial_url += "&minReward=0.00";
  638. }
  639. if (qual_input.checked)
  640. {
  641. initial_url += "&qualifiedFor=on"
  642. }
  643. else
  644. {
  645. initial_url += "&qualifiedFor=off"
  646. }
  647. if (masters_input.checked)
  648. {
  649. initial_url += "&requiresMasterQual=on"
  650. }
  651. if (sort_input1.checked)
  652. {
  653. initial_url+= "&sortType=LastUpdatedTime%3A1";
  654. default_type = 0;
  655. }
  656. else if (sort_input2.checked)
  657. {
  658. initial_url+= "&sortType=NumHITs%3A1";
  659. default_type = 1;
  660. }
  661. else if (sort_input3.checked)
  662. {
  663. initial_url+= "&sortType=Reward%3A1";
  664. default_type = 0;
  665. }
  666. initial_url+="&pageNumber=1&searchSpec=HITGroupSearch"
  667. start_it();
  668. }
  669. else
  670. {
  671. global_run = false;
  672. clearTimeout(wait_loop);
  673. big_red_button.textContent = "Start";
  674. set_progress_report("Stopped", true);
  675. }
  676. }
  677.  
  678. function start_it()
  679. {
  680. statusdetail_loop_finished = false;
  681. big_red_button.textContent = "Stop";
  682. found_key_list=[];
  683. var ctime = new Date().getTime()
  684. if (ctime - last_clear_time > save_results_time*666)
  685. {
  686. var last_history=history;
  687. history = {};
  688. for (var key in last_history)
  689. {
  690. if (last_history[key].new_result<save_results_time*1000)
  691. {
  692. history[key]=last_history[key];
  693. if (last_history[key].found_this_time)
  694. {
  695. last_history[key].found_this_time = false;
  696. if (last_history[key].new_result>save_new_results_time*1000)
  697. last_history[key].initial_time = ctime-1000*save_new_results_time;
  698. }
  699. }
  700.  
  701. }
  702. last_clear_time = ctime;
  703. }
  704. next_page = 1;
  705. statusdetail_loop(initial_url);
  706. }
  707.  
  708.  
  709. function show_interface()
  710. {
  711. control_panel.style.color = BROWN;
  712. control_panel.style.fontSize = 14;
  713. control_panel.removeChild(big_red_button);
  714. control_panel.appendChild(document.createTextNode("Auto-refresh delay: "));
  715. time_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  716. time_input.title = "Enter search refresh delay in seconds\n" + "Enter 0 for no auto-refresh\n" + "Default is 0 (no auto-refresh)";
  717. time_input.size = 3;
  718. control_panel.appendChild(time_input);
  719. control_panel.appendChild(document.createTextNode(" "));
  720. control_panel.appendChild(document.createTextNode("Pages to scrape: "));
  721. page_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  722. page_input.title = "Enter number of pages to scrape\n" + "Default is 4";
  723. page_input.size = 3;
  724. control_panel.appendChild(page_input);
  725. control_panel.appendChild(document.createTextNode(" "));
  726. control_panel.appendChild(document.createTextNode("Minimum batch size: "));
  727. min_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  728. min_input.title = "Enter minimum HITs for batch search\n" + "Default is 100";
  729. min_input.size = 3;
  730. control_panel.appendChild(min_input);
  731. control_panel.appendChild(document.createTextNode(" "));
  732. control_panel.appendChild(document.createTextNode("New HIT highlighting: "));
  733. new_time_display_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  734. new_time_display_input.title = "Enter time (in seconds) to keep new HITs highlighted\n" + "Default is 300 (5 minutes)";
  735. new_time_display_input.size = 6;
  736. control_panel.appendChild(new_time_display_input);
  737. control_panel.appendChild(document.createElement("P"));
  738. control_panel.appendChild(document.createTextNode("Minimum reward: "));
  739. reward_input.size = 6;
  740. control_panel.appendChild(reward_input);
  741. control_panel.appendChild(document.createTextNode(" "));
  742.  
  743. control_panel.appendChild(document.createTextNode("Qualified"));
  744. control_panel.appendChild(qual_input);
  745. control_panel.appendChild(document.createTextNode(" "));
  746. control_panel.appendChild(document.createTextNode("Masters"));
  747. control_panel.appendChild(masters_input);
  748. control_panel.appendChild(document.createTextNode(" "));
  749. control_panel.appendChild(document.createTextNode("Sort types: "));
  750. control_panel.appendChild(sort_input1);
  751. control_panel.appendChild(document.createTextNode("Latest"));
  752. control_panel.appendChild(sort_input2);
  753. control_panel.appendChild(document.createTextNode("Most Available"));
  754. control_panel.appendChild(sort_input3);
  755. control_panel.appendChild(document.createTextNode("Amount"));
  756. control_panel.appendChild(document.createElement("P"));
  757. control_panel.appendChild(search_input);
  758. search_input.size = 20;
  759. search_input.title = "Enter a search term to include\n" + "Default is blank (no included terms)";
  760. search_input.placeholder="Enter search terms here";
  761. control_panel.appendChild(document.createTextNode(" "));
  762. big_red_button.textContent = "Start";
  763. big_red_button.onclick = function(){start_running();};
  764. control_panel.appendChild(big_red_button);
  765. control_panel.appendChild(document.createTextNode(" "));
  766. control_panel.appendChild(progress_report);
  767. control_panel.appendChild(document.createElement("P"));
  768. text_area.style.fontWeight = 400;
  769. text_area.createCaption().innerHTML = "HITs";
  770. var col_heads = ['Requester','Title','Reward','HITs Available','TO pay',"Accept HIT"];
  771. var row = text_area.createTHead().insertRow(0);
  772. text_area.caption.style.fontWeight = 800;
  773. text_area.caption.style.color = BROWN;
  774. if (default_text_size > 10)
  775. text_area.cellPadding=Math.min(Math.max(1,Math.floor((default_text_size-10)/2)),5);
  776. //console.log(text_area.cellPadding);
  777. //text_area.cellPadding=2;
  778. text_area.caption.style.fontSize = 28;
  779. text_area.rows[0].style.fontWeight = 800;
  780. text_area.rows[0].style.color = BROWN;
  781. for (i=0; i<col_heads.length; i++)
  782. {
  783. var this_cell = row.insertCell(i);
  784. this_cell.innerHTML = col_heads[i];
  785. this_cell.style.fontSize = 14;
  786. if (i > 1)
  787. this_cell.style.textAlign = 'center';
  788. }
  789. control_panel.appendChild(text_area);
  790. }
  791.  
  792.  
  793. /********HIT EXPORT ADDITIONS*****/
  794.  
  795. var EDIT = false;
  796. var HIT;
  797.  
  798. var TO_BASE = "http://turkopticon.ucsd.edu/";
  799. var API_BASE = "https://api.turkopticon.istrack.in/";
  800. var API_URL = API_BASE + "multi-attrs.php?ids=";
  801. DEFAULT_TEMPLATE = '[table][tr][td][b]Title:[/b] [url={prev_link}][COLOR=blue]{title}[/COLOR][/url]\n';
  802. DEFAULT_TEMPLATE += '[b]Requester:[/b] [url=https://www.mturk.com/mturk/searchbar?selectedSearchType=hitgroups&requesterId={rid}][COLOR=blue]{requester}[/COLOR][/url]';
  803. DEFAULT_TEMPLATE += ' [{rid}] ([url='+TO_BASE+'{rid}][COLOR=blue]TO[/COLOR][/url])';
  804. DEFAULT_TEMPLATE += '\n[b]TO Ratings:[/b]{to_stuff}';
  805. DEFAULT_TEMPLATE += '\n[b]Description:[/b] {description}';
  806. DEFAULT_TEMPLATE += '\n[b]Time:[/b] {time}';
  807. DEFAULT_TEMPLATE += '\n[b]Reward:[/b] [COLOR=green][b]{reward}[/b][/COLOR]';
  808. DEFAULT_TEMPLATE += '\n[b]Qualifications:[/b] {quals}[/td][/tr][/table]';
  809.  
  810. var TEMPLATE;
  811. var EASYLINK;
  812.  
  813. if (typeof GM_getValue === 'undefined')
  814. TEMPLATE = null;
  815. else {
  816. TEMPLATE = GM_getValue('HITScraper Template');
  817. EASYLINK = GM_getValue('HITScraper Easylink');
  818. }
  819. if (TEMPLATE == null) {
  820. TEMPLATE = DEFAULT_TEMPLATE;
  821. }
  822.  
  823. function buildXhrUrl(rai) {
  824. var url = API_URL;
  825. var ri = rai;
  826. url += rai;
  827. return url;
  828. }
  829.  
  830. function makeXhrQuery(url) {
  831. var xhr = new XMLHttpRequest();
  832. try{
  833. xhr.open('GET', url, false);
  834. xhr.send(null);
  835. return $.parseJSON(xhr.response);
  836. }
  837. catch(err){
  838. return "TO DOWN";
  839. }
  840. }
  841.  
  842. function getNamesForEmptyResponses(rai, resp) {
  843. for (var rid in rai) {
  844. if (rai.hasOwnProperty(rid) && resp[rid] == "") {
  845. resp[rid] = $.parseJSON('{"name": "' + rai[rid][0].innerHTML + '"}');
  846. }
  847. }
  848. return resp;
  849. }
  850.  
  851. function getKeys(obj) {
  852. var keys = [];
  853. for (var key in obj) {
  854. keys.push(key);
  855. }
  856. return keys;
  857. }
  858.  
  859. function export_func_deleg(item,index) {
  860. //console.log(item);
  861. export_func(item);
  862. }
  863.  
  864. function export_func(item) {
  865. HIT = item;
  866. edit_button.textContent = 'Edit Template';
  867. apply_template(item);
  868. div.style.display = 'block';
  869. textarea.select();
  870. }
  871.  
  872. function apply_template(hit_data) {
  873. var txt = TEMPLATE;
  874.  
  875. var vars = ['title', 'requester', 'rid', 'description', 'reward', 'quals', 'prev_link', 'time', 'hits', 'to_stuff', 'to_text'];
  876.  
  877. var resp = null;
  878. if (txt.indexOf('{to_text}') >= 0 || txt.indexOf('{to_stuff}') >= 0){
  879. var url = buildXhrUrl(hit_data["rid"]);
  880. resp = makeXhrQuery(url);
  881. //console.log(resp);
  882. }
  883. var toText = "";
  884. var toStuff = "";
  885. var toData = "";
  886. var numResp = (resp == null || resp == "TO DOWN" ? "n/a" : resp[hit_data["rid"]].reviews);
  887. if (resp == "TO DOWN"){
  888. toStuff = " [URL=\""+TO_BASE+hit_data['rid']+"\"]TO down.[/URL]";
  889. toText = toStuff;
  890. }
  891. else if (resp == null || resp[hit_data["rid"]].attrs == null && resp != "TO DOWN") {
  892. toStuff = " No TO ";
  893. toText = " No TO ";
  894. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  895. toStuff += "(Submit a new TO rating for this requester)[/URL]";
  896. }
  897. else {
  898. for (var key in resp[hit_data["rid"]].attrs) {
  899. //toText += "\n[*]"+key+": "+resp[hit_data["requesterId"]].attrs[key]+"\n";
  900. var i = 0;
  901. var color = "green";
  902. var name = key;
  903. var num = Math.floor(resp[hit_data["rid"]].attrs[key]);
  904. switch (key){
  905. case "comm":
  906. name = "Communicativity";
  907. break;
  908. case "pay":
  909. name = "Generosity";
  910. break;
  911. case "fast":
  912. name = "Promptness";
  913. break;
  914. case "fair":
  915. name = "Fairness";
  916. break;
  917. default:
  918. name = key;
  919. break;
  920. }
  921. switch (num){
  922. case 0:
  923. color = "red";
  924. break;
  925. case 1:
  926. color = "red";
  927. break;
  928. case 2:
  929. color = "orange";
  930. break;
  931. case 3:
  932. color = "yellow";
  933. break;
  934. default:
  935. break;
  936. }
  937. toText += (num > 0 ? "\n[color="+color+"]" : "\n");
  938. for (i; i < num; i++){
  939. toText += "[b]☢[/b]"
  940. }
  941. toText += (num > 0 ? "[/color]" : "")
  942. if (i < 5){
  943. toText += "[color=white]";
  944. for (i; i < 5; i++)
  945. toText += "[b]☢[/b]";
  946. toText += "[/color]";
  947. }
  948. toText += " "+Number(resp[hit_data["rid"]].attrs[key]).toFixed(2)+" "+name;
  949. toData += Number(resp[hit_data["rid"]].attrs[key]).toFixed(2) + ",";
  950. }
  951. //toText += "[/list]";
  952. toText += (txt.indexOf('{to_stuff}') >= 0 ? "" : "\nNumber of Reviews: "+numResp+"\n[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"](Submit a new TO rating for this requester)[/URL]");
  953. toStuff = '\n[img]http://data.istrack.in/to/' + toData.slice(0,-1) + '.png[/img]';
  954. toStuff += (txt.indexOf('{to_stuff}') >= 0 ? (txt.indexOf('{to_text}') >= 0 ? "" : toText) : "");
  955. toStuff += "\nNumber of Reviews: "+numResp;
  956. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  957. toStuff += "\n(Submit a new TO rating for this requester)[/URL]";
  958. }
  959. for (var i = 0; i < vars.length; i++) {
  960. t = new RegExp('\{' + vars[i] + '\}', 'g');
  961. if (vars[i] == "to_stuff") {
  962. txt = txt.replace(t, toStuff);
  963. }
  964. else if (vars[i] == "to_text"){
  965. txt = txt.replace(t, toText);
  966. }
  967. else if (vars[i] == "prev_link"){
  968. txt = txt.replace(t,"https://www.mturk.com/"+hit_data[vars[i]]);
  969. }
  970. else if (vars[i] == "acc_link"){
  971. txt = txt.replace(t,"https://www.mturk.com/"+hit_data[vars[i]]);
  972. }
  973. else
  974. txt = txt.replace(t, hit_data[vars[i]]);
  975. }
  976. textarea.value = txt;
  977. }
  978.  
  979. function hide_func(div) {
  980. if (EDIT == false)
  981. div.style.display = 'none';
  982. }
  983.  
  984. function edit_func() {
  985. if (EDIT == true) {
  986. EDIT = false;
  987. TEMPLATE = textarea.value;
  988. edit_button.textContent = 'Edit Template';
  989. apply_template(HIT);
  990. }
  991. else {
  992. console.log("Editing");
  993. EDIT = true;
  994. edit_button.textContent = 'Show Changes';
  995. save_button.disabled = false;
  996. textarea.value = TEMPLATE;
  997. }
  998. }
  999.  
  1000. function default_func() {
  1001. GM_deleteValue('HITScraper Template');
  1002. TEMPLATE = DEFAULT_TEMPLATE;
  1003. EDIT = false;
  1004. edit_button.textContent = 'Edit Template';
  1005. apply_template(HIT);
  1006. }
  1007.  
  1008. function save_func() {
  1009. if (EDIT)
  1010. TEMPLATE = textarea.value;
  1011. GM_setValue('HITScraper Template', TEMPLATE);
  1012. }
  1013.  
  1014. var div = document.createElement('div');
  1015. var textarea = document.createElement('textarea');
  1016. var div2 = document.createElement('label');
  1017.  
  1018. div.style.position = 'fixed';
  1019. div.style.width = '500px';
  1020. div.style.height = '235px';
  1021. div.style.left = '50%';
  1022. div.style.right = '50%';
  1023. div.style.margin = '-250px 0px 0px -250px';
  1024. div.style.top = '300px';
  1025. div.style.padding = '5px';
  1026. div.style.border = '2px';
  1027. div.style.backgroundColor = 'black';
  1028. div.style.color = 'white';
  1029. div.style.zIndex = '100';
  1030.  
  1031. textarea.style.padding = '2px';
  1032. textarea.style.width = '500px';
  1033. textarea.style.height = '200px';
  1034. textarea.title = '{title}\n{requester}\n{rid}\n{description}\n{reward}\n{quals}\n{prev_link}\n{time}\n{hit}\n{to_stuff}\n{to_text}';
  1035.  
  1036. div.textContent = 'Press Ctrl+C to copy to clipboard. Click textarea to close';
  1037. div.style.fontSize = '12px';
  1038. div.appendChild(textarea);
  1039.  
  1040. var edit_button = document.createElement('button');
  1041. var save_button = document.createElement('button');
  1042. var default_button = document.createElement('button');
  1043. var easy_button = document.createElement('button');
  1044.  
  1045. edit_button.textContent = 'Edit Template';
  1046. edit_button.setAttribute('id', 'edit_button');
  1047. edit_button.style.height = '18px';
  1048. edit_button.style.width = '100px';
  1049. edit_button.style.fontSize = '10px';
  1050. edit_button.style.paddingLeft = '3px';
  1051. edit_button.style.paddingRight = '3px';
  1052. edit_button.style.backgroundColor = 'white';
  1053.  
  1054. save_button.textContent = 'Save Template';
  1055. save_button.setAttribute('id', 'save_button');
  1056. save_button.style.height = '18px';
  1057. save_button.style.width = '100px';
  1058. save_button.style.fontSize = '10px';
  1059. save_button.style.paddingLeft = '3px';
  1060. save_button.style.paddingRight = '3px';
  1061. save_button.style.backgroundColor = 'white';
  1062. save_button.style.marginLeft = '5px';
  1063.  
  1064. easy_button.textContent = 'Change Adfly Url';
  1065. easy_button.setAttribute('id', 'easy_button');
  1066. easy_button.style.height = '18px';
  1067. easy_button.style.width = '100px';
  1068. easy_button.style.fontSize = '10px';
  1069. easy_button.style.paddingLeft = '3px';
  1070. default_button.textContent = ' D ';
  1071. default_button.setAttribute('id', 'default_button');
  1072. default_button.style.height = '18px';
  1073. default_button.style.width = '20px';
  1074. default_button.style.fontSize = '10px';
  1075. default_button.style.paddingLeft = '3px';
  1076. default_button.style.paddingRight = '3px';
  1077. default_button.style.backgroundColor = 'white';
  1078. default_button.style.marginLeft = '5px';
  1079. default_button.title = 'Return default template';
  1080. div.appendChild(edit_button);
  1081. div.appendChild(save_button);
  1082. div.appendChild(default_button);
  1083. div.appendChild(easy_button);
  1084. save_button.disabled = true;
  1085.  
  1086. div.style.display = 'none';
  1087. textarea.addEventListener("click", function() {hide_func(div);}, false);
  1088. edit_button.addEventListener("click", function() {edit_func();}, false);
  1089. save_button.addEventListener("click", function() {save_func();}, false);
  1090. default_button.addEventListener("click", function() {default_func();}, false);
  1091. document.body.insertBefore(div, document.body.firstChild);