HIT Scraper WITH EXPORT

Snag HITs.

目前为 2014-10-27 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name HIT Scraper WITH EXPORT
  3. // @author Kerek and TJ
  4. // @description Snag HITs.
  5. // Based in part on code from mmmturkeybacon Export Mturk History and mmmturkeybacon Color Coded Search with Checkpoints
  6. // @namespace http://userscripts.org/users/536998
  7. // @match https://www.mturk.com/mturk/findhits?match=true#hit_scraper*
  8. // @match https://www.mturk.com/mturk/findhits?match=true?hit_scraper*
  9. // @version 1.3.1.1
  10. // @grant GM_xmlhttpRequest
  11. // @grant GM_getValue
  12. // @grant GM_setValue
  13. // @grant GM_deleteValue
  14. // @require http://code.jquery.com/jquery-latest.min.js
  15. // ==/UserScript==
  16.  
  17. //alter the requester ignore last as you desire, case insensitive
  18. var default_list = ["oscar smith", "Diamond Tip Research LLC", "jonathon weber", "jerry torres", "Crowdsource", "we-pay-you-fast", "turk experiment", "jon brelig"];
  19. var ignore_list = default_list;
  20. if (GM_getValue("scraper_ignore_list"))
  21. ignore_list = GM_getValue("scraper_ignore_list");
  22.  
  23. //This is to update the hit export symbol
  24. var symbol = "☭";
  25.  
  26. //this searches extra pages if you skip too much, helps fill out results if you hit a chunk of ignored HITs. Change to true for this behavior.
  27. var correct_for_skips = true;
  28.  
  29. //weight the four TO ratings for the coloring. Default has pay twice as important as fairness and nothing for communication and fast.
  30. var COMM_WEIGHT = 0;
  31. var PAY_WEIGHT = 10;
  32. var FAIR_WEIGHT = 5;
  33. var FAST_WEIGHT = 0;
  34.  
  35. //display your hitdb records if applicable
  36. var check_hitDB = true;
  37.  
  38. //default text size
  39. var default_text_size=11;
  40.  
  41.  
  42. //DO NOT EDIT ANYTHING BELOW THIS LINE UNLESS YOU KNOW WHAT YOU ARE DOING!
  43.  
  44. //For editing the blocklist
  45. var div = document.createElement('div');
  46. var textarea = document.createElement('textarea');
  47.  
  48. div.style.position = 'fixed';
  49. div.style.width = '500px';
  50. div.style.height = '235px';
  51. div.style.left = '50%';
  52. div.style.right = '50%';
  53. div.style.margin = '-250px 0px 0px -250px';
  54. div.style.top = '300px';
  55. div.style.padding = '5px';
  56. div.style.border = '2px';
  57. div.style.backgroundColor = 'black';
  58. div.style.color = 'white';
  59. div.style.zIndex = '100';
  60. div.setAttribute('id','block_div');
  61.  
  62. textarea.style.padding = '2px';
  63. textarea.style.width = '500px';
  64. textarea.style.height = '200px';
  65. textarea.title = 'Block list';
  66. textarea.setAttribute('id','block_text');
  67.  
  68. div.textContent = 'Change the blocklist to be whatever you like, save to save it. Separate requesters by commas. After clicking "Save", you\'ll need to scrape again to apply the changes.';
  69. div.style.fontSize = '12px';
  70. div.appendChild(textarea);
  71.  
  72. var save_button = document.createElement('button');
  73.  
  74. save_button.textContent = 'Save';
  75. save_button.setAttribute('id', 'save_blocklist');
  76. save_button.style.height = '18px';
  77. save_button.style.width = '100px';
  78. save_button.style.fontSize = '10px';
  79. save_button.style.paddingLeft = '3px';
  80. save_button.style.paddingRight = '3px';
  81. save_button.style.backgroundColor = 'white';
  82. save_button.style.marginLeft = '5px';
  83.  
  84. div.appendChild(save_button);
  85.  
  86. $("#block_div").hide();
  87. save_button.addEventListener("click", function() {save_blocklist();}, false);
  88. document.body.insertBefore(div, document.body.firstChild);
  89.  
  90. function save_blocklist() {
  91. console.log("Save");
  92. var textarea = $("#block_text");
  93. var text = textarea.val();
  94. var block_list = text.split(",");
  95. console.log(block_list);
  96. var trimmed_list = [];
  97. for (var requester in block_list){
  98. if (block_list[requester].trim().length != 0)
  99. trimmed_list.push(block_list[requester].toLowerCase().trim());
  100. }
  101. GM_setValue("scraper_ignore_list",trimmed_list);
  102. ignore_list = GM_getValue("scraper_ignore_list");
  103. console.log(ignore_list);
  104. $("#block_div").hide();
  105. }
  106.  
  107. var HITStorage = {};
  108. var indexedDB = window.indexedDB || window.webkitIndexedDB ||
  109. window.mozIndexedDB;
  110. window.IDBTransaction = window.IDBTransaction || window.webkitIDBTransaction || window.mozIDBTransaction;
  111. window.IDBKeyRange = window.IDBKeyRange || window.webkitIDBKeyRange || window.mozIDBKeyRange;
  112. HITStorage.IDBTransactionModes = { "READ_ONLY": "readonly", "READ_WRITE": "readwrite", "VERSION_CHANGE": "versionchange" };
  113. var IDBKeyRange = window.IDBKeyRange;
  114.  
  115. HITStorage.indexedDB = {};
  116. HITStorage.indexedDB = {};
  117. HITStorage.indexedDB.db = null;
  118.  
  119. HITStorage.indexedDB.onerror = function(e) {
  120. console.log(e);
  121. };
  122.  
  123. var v=4;
  124.  
  125. HITStorage.indexedDB.checkTitle = function(title,button) {
  126. var request = indexedDB.open("HITDB", v);
  127. request.onsuccess = function(e) {
  128. HITStorage.indexedDB.db = e.target.result;
  129. var db = HITStorage.indexedDB.db;
  130. if (!db.objectStoreNames.contains("HIT"))
  131. {
  132. db.close();
  133. return;
  134. }
  135. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  136. var store = trans.objectStore("HIT");
  137.  
  138. var index = store.index("title");
  139. index.get(title).onsuccess = function(event)
  140. {
  141. if (event.target.result === undefined)
  142. {
  143. console.log(title + ' not found');
  144. history[button].titledb=false;
  145. }
  146. else
  147. {
  148. console.log(title + ' found');
  149. history[button].titledb=true;
  150. }
  151. db.close();
  152. };
  153. };
  154. request.onerror = HITStorage.indexedDB.onerror;
  155. };
  156.  
  157. HITStorage.indexedDB.checkRequester = function(id,button) {
  158. var request = indexedDB.open("HITDB", v);
  159. request.onsuccess = function(e) {
  160. HITStorage.indexedDB.db = e.target.result;
  161. var db = HITStorage.indexedDB.db;
  162. if (!db.objectStoreNames.contains("HIT"))
  163. {
  164. db.close();
  165. return;
  166. }
  167. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  168. var store = trans.objectStore("HIT");
  169.  
  170. var index = store.index("requesterId");
  171. index.get(id).onsuccess = function(event)
  172. {
  173. if (event.target.result === undefined)
  174. {history[button].reqdb=false;
  175. console.log(id + ' not found');
  176. }
  177. else
  178. {
  179. history[button].reqdb=true;
  180. console.log(id + ' found');
  181. }
  182. db.close();
  183. };
  184. };
  185. request.onerror = HITStorage.indexedDB.onerror;
  186. };
  187.  
  188. var PAGES_TO_SCRAPE = 3;
  189. var MINIMUM_HITS = 100;
  190. var SEARCH_REFRESH=0;
  191. var URL_BASE = "/mturk/searchbar?searchWords=&selectedSearchType=hitgroups";
  192. var initial_url = URL_BASE;
  193. var TO_REQ_URL = "http://turkopticon.ucsd.edu/reports?id=";
  194. var found_key_list=[];
  195. var last_clear_time = new Date().getTime();
  196. var searched_once = false;
  197. var save_new_results_time = 120;
  198. var save_results_time = 3600;
  199. var default_type = 0;
  200. var cur_loc = window.location.href;
  201. var time_input = document.createElement("INPUT");
  202. time_input.value = 0;
  203. var page_input = document.createElement("INPUT");
  204. page_input.value = 3;
  205. var min_input = document.createElement("INPUT");
  206. var new_time_display_input = document.createElement("INPUT");
  207. new_time_display_input.value = 300;
  208. var reward_input = document.createElement("INPUT");
  209. var qual_input = document.createElement("INPUT");
  210. qual_input.type = "checkbox";
  211. qual_input.checked = true;
  212. var masters_input = document.createElement("INPUT");
  213. masters_input.type = "checkbox";
  214. var sort_input1 = document.createElement("INPUT");
  215. sort_input1.type = "radio";
  216. sort_input1.name = "sort_type";
  217. sort_input1.value = "latest";
  218. sort_input1.checked = true;
  219. var sort_input2 = document.createElement("INPUT");
  220. sort_input2.type = "radio";
  221. sort_input2.name = "sort_type";
  222. sort_input2.value = "most";
  223. var sort_input3 = document.createElement("INPUT");
  224. sort_input3.type = "radio";
  225. sort_input3.name = "sort_type";
  226. sort_input3.value = "amount";
  227.  
  228. var search_input = document.createElement("INPUT");
  229.  
  230. var LINK_BASE = "https://www.mturk.com";
  231. var BACKGROUND_COLOR = "rgb(19, 19, 19)";
  232. var STATUSDETAIL_DELAY = 250;
  233. var MPRE_DELAY = 3000;
  234.  
  235. var next_page = 1;
  236.  
  237. var GREEN = '#66CC66'; // > 4
  238. var LIGHTGREEN = '#ADFF2F'; // > 3 GREEN YELLOW
  239. var YELLOW = '#FFD700';
  240. var ORANGE = '#FF9900'; // > 2
  241. var RED = '#FF3030'; // <= 2
  242. var BLUE = '#C0D9D9'; // no TO
  243. var GREY = 'lightGrey';
  244. var BROWN = '#94704D';
  245. var DARKGREY = '#9F9F9F';
  246. $('body').css('background', BACKGROUND_COLOR);
  247.  
  248. var API_PROXY_BASE = 'https://mturk-api.istrack.in/';
  249. var API_MULTI_ATTRS_URL = API_PROXY_BASE + 'multi-attrs.php?ids=';
  250. var REVIEWS_BASE = 'http://turkopticon.ucsd.edu/';
  251.  
  252. var control_panel_HTML = '<div id="control_panel" style="margin: 0 auto 0 auto;' +
  253. 'border-bottom: 1px solid #000000; margin-bottom: 5px; ' +
  254. 'background-color: ' + BACKGROUND_COLOR + ';"></div>';
  255. $('body > :not(#control_panel)').hide(); //hide all nodes directly under the body
  256. $('body').prepend(control_panel_HTML);
  257.  
  258. var control_panel = document.getElementById("control_panel");
  259. var big_red_button = document.createElement("BUTTON");
  260. var reset_blocks = document.createElement("BUTTON");
  261. var progress_report = document.createTextNode("Stopped");
  262. var text_area = document.createElement("TABLE");
  263. big_red_button.textContent = "Show Interface";
  264. big_red_button.onclick = function(){show_interface();};
  265. control_panel.appendChild(big_red_button);
  266.  
  267. show_interface();
  268.  
  269. var global_run = false;
  270. var statusdetail_loop_finished = false;
  271. var date_header = "";
  272. var history = {};
  273. var wait_loop;
  274.  
  275. function set_progress_report(text, force)
  276. {
  277. if (global_run == true || force == true)
  278. {
  279. progress_report.textContent = text;
  280. }
  281. }
  282.  
  283. function get_progress_report()
  284. {
  285. return progress_report.textContent;
  286. }
  287.  
  288. function wait_until_stopped()
  289. {
  290. if (global_run == true)
  291. {
  292. if (statusdetail_loop_finished == true)
  293. {
  294. big_red_button.textContent = "Start";
  295. set_progress_report("Finished", false);
  296. }
  297. else
  298. {
  299. setTimeout(function(){wait_until_stopped();}, 500);
  300. }
  301. }
  302. }
  303.  
  304. function display_wait_time(wait_time)
  305. {
  306. if (global_run == true)
  307. {
  308. var current_progress = get_progress_report();
  309. if (current_progress.indexOf("Searching again in")!==-1)
  310. {
  311. set_progress_report(current_progress.replace(/Searching again in \d+ seconds/ , "Searching again in " + wait_time + " seconds"),false);
  312. }
  313. else
  314. set_progress_report(current_progress + " Searching again in " + wait_time + " seconds.", false);
  315. if (wait_time>1)
  316. setTimeout(function(){display_wait_time(wait_time-1);}, 1000);
  317. }
  318. }
  319.  
  320. function dispArr(ar)
  321. {
  322. var disp = "";
  323. for (var z = 0; z < ar.length; z++)
  324. {
  325. disp += "id " + z + " is " + ar[z] + " ";
  326. }
  327. console.log(disp);
  328. }
  329.  
  330. function scrape($src)
  331. {
  332. var $requester = $src.find('a[href^="/mturk/searchbar?selectedSearchType=hitgroups&requester"]');
  333. var $title = $src.find('a[class="capsulelink"]');
  334. var $reward = $src.find('span[class="reward"]');
  335. var $preview = $src.find('a[href^="/mturk/preview?"]');
  336. var $qualified = $src.find('a[href^="/mturk/notqualified?"]');
  337. var $times = $src.find('a[id^="duration_to_complete"]');
  338. var $descriptions = $src.find('a[id^="description"]');
  339. var not_qualified_group_IDs=[];
  340. var $quals = $src.find('a[id^="qualificationsRequired"]');
  341. $qualified.each(function(){
  342. var groupy = $(this).attr('href');
  343. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  344. not_qualified_group_IDs.push(groupy);
  345. });
  346. var $mixed = $src.find('a[href^="/mturk/preview?"],a[href^="/mturk/notqualified?"]');
  347. var listy =[];
  348. $mixed.each(function(){
  349. var groupy = $(this).attr('href');
  350. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  351. groupy = groupy.replace("/mturk/preview?groupId=","");
  352. listy.push(groupy);
  353. });
  354. listy = listy.filter(function(elem, pos) {
  355. return listy.indexOf(elem) == pos;
  356. });
  357.  
  358. for (var j = 0; j < $requester.length; j++)
  359. {
  360. var $hits = $requester.eq(j).parent().parent().parent().parent().parent().parent().find('td[class="capsule_field_text"]');
  361. var requester_name = $requester.eq(j).text().trim();
  362. var requester_link = $requester.eq(j).attr('href');
  363. var group_ID=listy[j];
  364. var preview_link = "/mturk/preview?groupId=" + group_ID;
  365. var title = $title.eq(j).text().trim();
  366. var reward = $reward.eq(j).text().trim();
  367. var hits = $hits.eq(4).text().trim();
  368. var time = $times.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  369. var description = $descriptions.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  370. //console.log(description);
  371. var requester_id = requester_link.replace('/mturk/searchbar?selectedSearchType=hitgroups&requesterId=','');
  372. var accept_link;
  373. accept_link = preview_link.replace('preview','previewandaccept');
  374. /*HIT SCRAPER ADDITION*/
  375. var qElements = $quals.eq(j).parent().parent().parent().find('tr');
  376. //console.log(qElements);
  377.  
  378. var qualifications = [];
  379. for (var i = 1; i < qElements.length; i++) {
  380. qualifications.push((qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ').indexOf("Masters") != -1 ? "[color=red][b]"+qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')+"[/b][/color]" : qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')));
  381. }
  382. var qualList = (qualifications.join(', ') ? qualifications.join(', ') : "None");
  383.  
  384. key = requester_name+title+reward+group_ID;
  385. found_key_list.push(key);
  386. if (history[key] == undefined)
  387. {
  388. history[key] = {requester:"", title:"", description:"", reward:"", hits:"", req_link:"", quals:"", prev_link:"", rid:"", acc_link:"", new_result:"", qualified:"", found_this_time:"", initial_time:"", reqdb:"",titledb:"",time:""};
  389. history[key].req_link = requester_link;
  390. history[key].prev_link = preview_link;
  391. history[key].requester = requester_name;
  392. history[key].title = title;
  393. history[key].reward = reward;
  394. history[key].hits = hits;
  395. history[key].rid = requester_id;
  396. history[key].acc_link = accept_link;
  397. history[key].time = time;
  398. history[key].quals = qualList;
  399. history[key].description = description;
  400. HITStorage.indexedDB.checkRequester(requester_id,key);
  401. HITStorage.indexedDB.checkTitle(title,key);
  402. if (searched_once)
  403. {
  404. history[key].initial_time = new Date().getTime();//-1000*(save_new_results_time - SEARCH_REFRESH);
  405. history[key].new_result = 0;
  406. }
  407. else
  408. {
  409. history[key].initial_time = new Date().getTime()-1000*save_new_results_time;
  410. history[key].new_result = 1000*save_new_results_time;
  411. }
  412. if (not_qualified_group_IDs.indexOf(group_ID)!==-1)
  413. history[key].qualified = false;
  414. else
  415. history[key].qualified = true;
  416.  
  417. history[key].found_this_time = true;
  418. }
  419. else
  420. {
  421. history[key].new_result = new Date().getTime() - history[key].initial_time;
  422. history[key].found_this_time = true;
  423. history[key].hits = hits;
  424. }
  425. }
  426. }
  427.  
  428. function statusdetail_loop(next_URL)
  429. {
  430. if (global_run == true)
  431. {
  432. if (next_URL.length != 0)
  433. {
  434. $.get(next_URL, function(data)
  435. {
  436. var $src = $(data);
  437. var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
  438. if (maxpagerate.length == 0)
  439. {
  440. set_progress_report("Processing page " + next_page, false);
  441. scrape($src);
  442. $next_URL = $src.find('a[href^="/mturk/viewsearchbar"]:contains("Next")');
  443. next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
  444. next_page++;
  445. if (default_type == 1)
  446. {
  447. var hmin = MINIMUM_HITS+1;
  448. for (j = 0; j < found_key_list.length; j++)
  449. {
  450. if (history[found_key_list[j]].hits < hmin)
  451. {
  452. next_URL = "";
  453. next_page = -1;
  454. break;
  455. }
  456. }
  457. }
  458. else if (next_page > PAGES_TO_SCRAPE && correct_for_skips)
  459. {
  460. var skipped_hits = 0;
  461. var added_pages = 0;
  462. for (j = 0; j < found_key_list.length; j++)
  463. {
  464. var obj = history[found_key_list[j]];
  465. if (! ignore_check(obj.requester,obj.title))
  466. skipped_hits++;
  467. }
  468. added_pages = Math.floor(skipped_hits/10);
  469. if (skipped_hits%10 >6)
  470. added_pages++;
  471. if (next_page > PAGES_TO_SCRAPE + added_pages)
  472. {
  473. next_URL = "";
  474. next_page = -1;
  475. }
  476. }
  477. else if (next_page > PAGES_TO_SCRAPE)
  478. {
  479. next_URL = "";
  480. next_page = -1;
  481. }
  482. setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
  483. }
  484. else
  485. {
  486. console.log("MPRE");
  487. setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
  488. }
  489. });
  490. }
  491. else
  492. {
  493. searched_once = true;
  494. var found_hits = found_key_list.length;
  495. var shown_hits = 0;
  496. var new_hits = 0;
  497. var url = API_MULTI_ATTRS_URL;
  498. var rids = [];
  499. var lastRow = text_area.rows.length - 1;
  500. for (i = lastRow; i>0; i--)
  501. text_area.deleteRow(i);
  502. for (j = 0; j < found_key_list.length; j++)
  503. {
  504. //(function(url,rids,j) {
  505. var obj = history[found_key_list[j]];
  506. if (ignore_check(obj.requester,obj.title) && obj.found_this_time){
  507. ++shown_hits;
  508. //console.log(obj);
  509. //hit export will update col_heads[1]
  510. var col_heads = ["<a href='"+ LINK_BASE+obj.req_link +"' target='_blank'>" + obj.requester + "</a>","<a href='"+ LINK_BASE+obj.prev_link +"' target='_blank' title='"+ obj.description +"'>" + obj.title + "</a>",obj.reward,obj.hits,"TO down","<a href='"+ LINK_BASE+obj.acc_link +"' target='_blank'>Accept</a>"];
  511. var row = text_area.insertRow(text_area.rows.length);
  512. url += obj.rid + ',';
  513. rids.push(obj.rid);
  514. if (check_hitDB)
  515. {
  516. col_heads.push("R");
  517. col_heads.push("T");
  518. }
  519. if (!obj.qualified)
  520. {
  521. col_heads.push("Not Qualified");
  522. }
  523. for (i=0; i<col_heads.length; i++)
  524. {
  525. var this_cell = row.insertCell(i);
  526. row.cells[i].style.fontSize = default_text_size;
  527. this_cell.innerHTML = col_heads[i];
  528. if(i>1)
  529. this_cell.style.textAlign = 'center';
  530. if (check_hitDB)
  531. {
  532. if (i==6)
  533. {
  534. if (obj.reqdb)
  535. this_cell.style.backgroundColor = GREEN;
  536. else
  537. this_cell.style.backgroundColor = RED;
  538. }
  539. else if (i==7)
  540. {
  541. if (obj.titledb)
  542. this_cell.style.backgroundColor = GREEN;
  543. else
  544. this_cell.style.backgroundColor = RED;
  545. }
  546. else if (i==8)
  547. this_cell.style.backgroundColor = DARKGREY;
  548. }
  549. else if (i==6)
  550. this_cell.style.backgroundColor = DARKGREY;
  551. }
  552. if (Object.keys(history).length>0)
  553. {
  554. if (obj.new_result < 1000*save_new_results_time)
  555. {
  556. new_hits++;
  557. for (i in col_heads)
  558. {
  559. row.cells[i].style.fontSize = default_text_size + 1;
  560. row.cells[i].style.fontWeight = "bold";
  561. }
  562. }
  563. }
  564. button = document.createElement('button'); //HIT SCRAPER ADDITION
  565. button.textContent = 'vB';
  566. button.title = 'Export this HIT description as vBulletin formatted text';
  567. button.style.height = '14px';
  568. button.style.width = '30px';
  569. button.style.fontSize = '8px';
  570. button.style.border = '1px solid';
  571. button.style.padding = '0px';
  572. button.style.backgroundColor = 'transparent';
  573. button2 = document.createElement('button'); //BUTTON TO BLOCK REQUESTER
  574. button2.textContent = '☢';
  575. button2.title = 'Add requester to block list';
  576. button2.style.height = '14px';
  577. button2.style.width = '30px';
  578. button2.style.fontSize = '10px';
  579. button2.style.border = '1px solid';
  580. button2.style.padding = '0px';
  581. button2.style.backgroundColor = 'transparent';
  582. //button.addEventListener("click", function() {export_func_deleg(j);}.bind(null,j), false);
  583. button.addEventListener("click", (function (obj,j) { return function() {export_func_deleg(obj,j);}})(obj,j));
  584. row.cells[1].appendChild(button);
  585. button2.addEventListener("click", (function (obj,j) { return function() {block_deleg(obj,j);}})(obj,j));
  586. row.cells[0].appendChild(button2);
  587. }
  588. //});
  589. }
  590. set_progress_report("Scrape complete. " + shown_hits + " HITs found (" + new_hits + " new results). " + (found_hits - shown_hits) + " HITs ignored.", false);
  591. url = url.substring(0,url.length - 1);
  592. //console.log(url);
  593. var success_flag = false;
  594. GM_xmlhttpRequest(
  595. {
  596. method: "GET",
  597. url: url,
  598. onload: function (results)
  599. {
  600. //console.log(results.responseText);
  601. rdata = $.parseJSON(results.responseText);
  602. for (i = 0; i < rids.length; i++)
  603. {
  604. text_area.rows[i+1].style.backgroundColor = GREY;
  605. if (rdata[rids[i]])
  606. {
  607. var pay = rdata[rids[i]].attrs.pay
  608. var reviews = rdata[rids[i]].reviews
  609. var average = 0;
  610. var sum = 0;
  611. var divisor = 0;
  612. var comm = rdata[rids[i]].attrs.comm;
  613. var fair = rdata[rids[i]].attrs.fair;
  614. var fast = rdata[rids[i]].attrs.fast;
  615. if (comm > 0)
  616. {
  617. sum += COMM_WEIGHT*comm;
  618. divisor += COMM_WEIGHT;
  619. }
  620. if (pay > 0)
  621. {
  622. sum += PAY_WEIGHT*pay;
  623. divisor += PAY_WEIGHT;
  624. }
  625. if (fair > 0)
  626. {
  627. sum += FAIR_WEIGHT*fair;
  628. divisor += FAIR_WEIGHT;
  629. }
  630. if (fast > 0)
  631. {
  632. sum += FAST_WEIGHT*fast;
  633. divisor += FAST_WEIGHT;
  634. }
  635. if (divisor > 0)
  636. {
  637. average = sum/divisor;
  638. }
  639. text_area.rows[i+1].cells[4].innerHTML = "<a href='"+ TO_REQ_URL+rids[i] +"' target='_blank'>" + pay + "</a>";
  640. if (reviews > 4)
  641. {
  642. if (average > 4.49)
  643. text_area.rows[i+1].style.backgroundColor = GREEN;
  644. else if (average > 3.49)
  645. text_area.rows[i+1].style.backgroundColor = LIGHTGREEN;
  646. //else if (average > 2.99)
  647. // text_area.rows[i+1].style.backgroundColor = YELLOW;
  648. else if (average > 1.99)
  649. text_area.rows[i+1].style.backgroundColor = ORANGE;
  650. else if (average > 0)
  651. text_area.rows[i+1].style.backgroundColor = RED;
  652. }
  653. }
  654. else
  655. {
  656. text_area.rows[i+1].cells[4].innerHTML = "No data";
  657. }
  658. }
  659. success_flag = true;
  660. }
  661. });
  662. if (!success_flag)
  663. for (i = 0; i < rids.length; i++) text_area.rows[i+1].style.backgroundColor = GREY;
  664. statusdetail_loop_finished = true;
  665. if (SEARCH_REFRESH>0)
  666. {
  667. wait_loop = setTimeout(function(){if (global_run) start_it();}, 1000*SEARCH_REFRESH);
  668. display_wait_time(SEARCH_REFRESH);
  669. }
  670. else
  671. {
  672. global_run = false;
  673. big_red_button.textContent = "Start";
  674. }
  675. }
  676. }
  677. }
  678.  
  679. function ignore_check(r,t){
  680. return -1 == ignore_list.map(function(item) { return item.toLowerCase(); }).indexOf(r.toLowerCase());
  681. }
  682.  
  683. function start_running()
  684. {
  685. if (big_red_button.textContent == "Start")
  686. {
  687. global_run = true;
  688. initial_url = URL_BASE;
  689. if (search_input.value.length>0)
  690. {
  691. initial_url = initial_url.replace("searchWords=", "searchWords=" + search_input.value);
  692. }
  693. if (time_input.value.replace(/[^0-9]+/g,"") != "")
  694. {
  695. SEARCH_REFRESH = Number(time_input.value);
  696. }
  697. if (page_input.value.replace(/[^0-9]+/g,"") != "")
  698. {
  699. PAGES_TO_SCRAPE = Number(page_input.value);
  700. }
  701. if (min_input.value.replace(/[^0-9]+/g,"") != "")
  702. {
  703. MINIMUM_HITS = Number(min_input.value);
  704. }
  705. if (new_time_display_input.value.replace(/[^0-9]+/g,"") != "")
  706. {
  707. save_new_results_time = Number(new_time_display_input.value);
  708. }
  709. if (reward_input.value.replace(/[^0-9]+/g,"") != "")
  710. {
  711. initial_url += "&minReward=" + reward_input.value;
  712. }
  713. else
  714. {
  715. initial_url += "&minReward=0.00";
  716. }
  717. if (qual_input.checked)
  718. {
  719. initial_url += "&qualifiedFor=on"
  720. }
  721. else
  722. {
  723. initial_url += "&qualifiedFor=off"
  724. }
  725. if (masters_input.checked)
  726. {
  727. initial_url += "&requiresMasterQual=on"
  728. }
  729. if (sort_input1.checked)
  730. {
  731. initial_url+= "&sortType=LastUpdatedTime%3A1";
  732. default_type = 0;
  733. }
  734. else if (sort_input2.checked)
  735. {
  736. initial_url+= "&sortType=NumHITs%3A1";
  737. default_type = 1;
  738. }
  739. else if (sort_input3.checked)
  740. {
  741. initial_url+= "&sortType=Reward%3A1";
  742. default_type = 0;
  743. }
  744. initial_url+="&pageNumber=1&searchSpec=HITGroupSearch"
  745. start_it();
  746. }
  747. else
  748. {
  749. global_run = false;
  750. clearTimeout(wait_loop);
  751. big_red_button.textContent = "Start";
  752. set_progress_report("Stopped", true);
  753. }
  754. }
  755.  
  756. function start_it()
  757. {
  758. statusdetail_loop_finished = false;
  759. big_red_button.textContent = "Stop";
  760. found_key_list=[];
  761. var ctime = new Date().getTime()
  762. if (ctime - last_clear_time > save_results_time*666)
  763. {
  764. var last_history=history;
  765. history = {};
  766. for (var key in last_history)
  767. {
  768. if (last_history[key].new_result<save_results_time*1000)
  769. {
  770. history[key]=last_history[key];
  771. if (last_history[key].found_this_time)
  772. {
  773. last_history[key].found_this_time = false;
  774. if (last_history[key].new_result>save_new_results_time*1000)
  775. last_history[key].initial_time = ctime-1000*save_new_results_time;
  776. }
  777. }
  778.  
  779. }
  780. last_clear_time = ctime;
  781. }
  782. next_page = 1;
  783. statusdetail_loop(initial_url);
  784. }
  785.  
  786.  
  787. function show_interface()
  788. {
  789. control_panel.style.color = BROWN;
  790. control_panel.style.fontSize = 14;
  791. control_panel.removeChild(big_red_button);
  792. control_panel.appendChild(document.createTextNode("Auto-refresh delay: "));
  793. time_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  794. time_input.title = "Enter search refresh delay in seconds\n" + "Enter 0 for no auto-refresh\n" + "Default is 0 (no auto-refresh)";
  795. time_input.size = 3;
  796. control_panel.appendChild(time_input);
  797. control_panel.appendChild(document.createTextNode(" "));
  798. control_panel.appendChild(document.createTextNode("Pages to scrape: "));
  799. page_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  800. page_input.title = "Enter number of pages to scrape\n" + "Default is 4";
  801. page_input.size = 3;
  802. control_panel.appendChild(page_input);
  803. control_panel.appendChild(document.createTextNode(" "));
  804. control_panel.appendChild(document.createTextNode("Minimum batch size: "));
  805. min_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  806. min_input.title = "Enter minimum HITs for batch search\n" + "Default is 100";
  807. min_input.size = 3;
  808. control_panel.appendChild(min_input);
  809. control_panel.appendChild(document.createTextNode(" "));
  810. control_panel.appendChild(document.createTextNode("New HIT highlighting: "));
  811. new_time_display_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  812. new_time_display_input.title = "Enter time (in seconds) to keep new HITs highlighted\n" + "Default is 300 (5 minutes)";
  813. new_time_display_input.size = 6;
  814. control_panel.appendChild(new_time_display_input);
  815. control_panel.appendChild(document.createElement("P"));
  816. control_panel.appendChild(document.createTextNode("Minimum reward: "));
  817. reward_input.size = 6;
  818. control_panel.appendChild(reward_input);
  819. control_panel.appendChild(document.createTextNode(" "));
  820.  
  821. control_panel.appendChild(document.createTextNode("Qualified"));
  822. control_panel.appendChild(qual_input);
  823. control_panel.appendChild(document.createTextNode(" "));
  824. control_panel.appendChild(document.createTextNode("Masters"));
  825. control_panel.appendChild(masters_input);
  826. control_panel.appendChild(document.createTextNode(" "));
  827. control_panel.appendChild(document.createTextNode("Sort types: "));
  828. control_panel.appendChild(sort_input1);
  829. control_panel.appendChild(document.createTextNode("Latest"));
  830. control_panel.appendChild(sort_input2);
  831. control_panel.appendChild(document.createTextNode("Most Available"));
  832. control_panel.appendChild(sort_input3);
  833. control_panel.appendChild(document.createTextNode("Amount"));
  834. control_panel.appendChild(document.createElement("P"));
  835. control_panel.appendChild(search_input);
  836. search_input.size = 20;
  837. search_input.title = "Enter a search term to include\n" + "Default is blank (no included terms)";
  838. search_input.placeholder="Enter search terms here";
  839. control_panel.appendChild(document.createTextNode(" "));
  840. big_red_button.textContent = "Start";
  841. big_red_button.onclick = function(){start_running();};
  842. reset_blocks.textContent = "Edit blocklist";
  843. reset_blocks.onclick = function(){
  844. console.log("in");
  845. var div = $("#block_div");
  846. var textarea = $("#block_text");
  847. textarea.val(ignore_list.join());
  848. $("#block_div").show();
  849. };
  850. control_panel.appendChild(big_red_button);
  851. control_panel.appendChild(reset_blocks);
  852. control_panel.appendChild(document.createTextNode(" "));
  853. control_panel.appendChild(progress_report);
  854. control_panel.appendChild(document.createElement("P"));
  855. text_area.style.fontWeight = 400;
  856. text_area.createCaption().innerHTML = "HITs";
  857. var col_heads = ['Requester','Title','Reward','HITs Available','TO pay',"Accept HIT"];
  858. var row = text_area.createTHead().insertRow(0);
  859. text_area.caption.style.fontWeight = 800;
  860. text_area.caption.style.color = BROWN;
  861. if (default_text_size > 10)
  862. text_area.cellPadding=Math.min(Math.max(1,Math.floor((default_text_size-10)/2)),5);
  863. //console.log(text_area.cellPadding);
  864. //text_area.cellPadding=2;
  865. text_area.caption.style.fontSize = 28;
  866. text_area.rows[0].style.fontWeight = 800;
  867. text_area.rows[0].style.color = BROWN;
  868. for (i=0; i<col_heads.length; i++)
  869. {
  870. var this_cell = row.insertCell(i);
  871. this_cell.innerHTML = col_heads[i];
  872. this_cell.style.fontSize = 14;
  873. if (i > 1)
  874. this_cell.style.textAlign = 'center';
  875. }
  876. control_panel.appendChild(text_area);
  877. }
  878.  
  879. /********HIT EXPORT ADDITIONS*****/
  880.  
  881. var EDIT = false;
  882. var HIT;
  883.  
  884. var TO_BASE = "http://turkopticon.ucsd.edu/";
  885. var API_BASE = "https://mturk-api.istrack.in/";
  886. var API_URL = API_BASE + "multi-attrs.php?ids=";
  887. DEFAULT_TEMPLATE = '[table][tr][td][b]Title:[/b] [url={prev_link}][COLOR=blue]{title}[/COLOR][/url]\n';
  888. DEFAULT_TEMPLATE += '[b]Requester:[/b] [url=https://www.mturk.com/mturk/searchbar?selectedSearchType=hitgroups&requesterId={rid}][COLOR=blue]{requester}[/COLOR][/url]';
  889. DEFAULT_TEMPLATE += ' [{rid}] ([url='+TO_BASE+'{rid}][COLOR=blue]TO[/COLOR][/url])';
  890. DEFAULT_TEMPLATE += '\n[b]TO Ratings:[/b]{to_stuff}';
  891. DEFAULT_TEMPLATE += '\n[b]Description:[/b] {description}';
  892. DEFAULT_TEMPLATE += '\n[b]Time:[/b] {time}';
  893. DEFAULT_TEMPLATE += '\n[b]Hits Available:[/b] {hits}';
  894. DEFAULT_TEMPLATE += '\n[b]Reward:[/b] [COLOR=green][b]{reward}[/b][/COLOR]';
  895. DEFAULT_TEMPLATE += '\n[b]Qualifications:[/b] {quals}[/td][/tr][/table]';
  896.  
  897. var TEMPLATE;
  898. var EASYLINK;
  899.  
  900. if (typeof GM_getValue === 'undefined')
  901. TEMPLATE = null;
  902. else {
  903. TEMPLATE = GM_getValue('HITScraper Template');
  904. EASYLINK = GM_getValue('HITScraper Easylink');
  905. }
  906. if (TEMPLATE == null) {
  907. TEMPLATE = DEFAULT_TEMPLATE;
  908. }
  909.  
  910. function buildXhrUrl(rai) {
  911. var url = API_URL;
  912. var ri = rai;
  913. url += rai;
  914. return url;
  915. }
  916.  
  917. function makeXhrQuery(url) {
  918. var xhr = new XMLHttpRequest();
  919. try{
  920. xhr.open('GET', url, false);
  921. xhr.send(null);
  922. return $.parseJSON(xhr.response);
  923. }
  924. catch(err){
  925. return "TO DOWN";
  926. }
  927. }
  928.  
  929. function getNamesForEmptyResponses(rai, resp) {
  930. for (var rid in rai) {
  931. if (rai.hasOwnProperty(rid) && resp[rid] == "") {
  932. resp[rid] = $.parseJSON('{"name": "' + rai[rid][0].innerHTML + '"}');
  933. }
  934. }
  935. return resp;
  936. }
  937.  
  938. function getKeys(obj) {
  939. var keys = [];
  940. for (var key in obj) {
  941. keys.push(key);
  942. }
  943. return keys;
  944. }
  945.  
  946. function export_func_deleg(item,index) {
  947. //console.log(item);
  948. export_func(item);
  949. }
  950.  
  951. function block_deleg(item,index) {
  952. //console.log(item);
  953. block(item);
  954. }
  955.  
  956. function block(hit){
  957. var requester = hit["requester"];
  958. ignore_list.push(requester);
  959. GM_setValue("scraper_ignore_list",ignore_list);
  960. console.log(GM_getValue("scraper_ignore_list"));
  961. alert(requester+" ignored. Re-scrape");
  962. }
  963.  
  964. function export_func(item) {
  965. HIT = item;
  966. edit_button.textContent = 'Edit Template';
  967. apply_template(item);
  968. div.style.display = 'block';
  969. textarea.select();
  970. }
  971.  
  972. function apply_template(hit_data) {
  973. var txt = TEMPLATE;
  974.  
  975. var vars = ['title', 'requester', 'rid', 'description', 'reward', 'quals', 'prev_link', 'time', 'hits', 'to_stuff', 'to_text'];
  976.  
  977. var resp = null;
  978. if (txt.indexOf('{to_text}') >= 0 || txt.indexOf('{to_stuff}') >= 0){
  979. var url = buildXhrUrl(hit_data["rid"]);
  980. resp = makeXhrQuery(url);
  981. //console.log(resp);
  982. }
  983. var toText = "";
  984. var toStuff = "";
  985. var toData = "";
  986. var numResp = (resp == null || resp == "TO DOWN" ? "n/a" : resp[hit_data["rid"]].reviews);
  987. if (resp == "TO DOWN"){
  988. toStuff = " [URL=\""+TO_BASE+hit_data['rid']+"\"]TO down.[/URL]";
  989. toText = toStuff;
  990. }
  991. else if (resp == null || resp[hit_data["rid"]].attrs == null && resp != "TO DOWN") {
  992. toStuff = " No TO ";
  993. toText = " No TO ";
  994. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  995. toStuff += "(Submit a new TO rating for this requester)[/URL]";
  996. }
  997. else {
  998. for (var key in resp[hit_data["rid"]].attrs) {
  999. //toText += "\n[*]"+key+": "+resp[hit_data["requesterId"]].attrs[key]+"\n";
  1000. var i = 0;
  1001. var color = "green";
  1002. var name = key;
  1003. var num = Math.floor(resp[hit_data["rid"]].attrs[key]);
  1004. switch (key){
  1005. case "comm":
  1006. name = "Communicativity";
  1007. break;
  1008. case "pay":
  1009. name = "Generosity";
  1010. break;
  1011. case "fast":
  1012. name = "Promptness";
  1013. break;
  1014. case "fair":
  1015. name = "Fairness";
  1016. break;
  1017. default:
  1018. name = key;
  1019. break;
  1020. }
  1021. switch (num){
  1022. case 0:
  1023. color = "red";
  1024. break;
  1025. case 1:
  1026. color = "red";
  1027. break;
  1028. case 2:
  1029. color = "orange";
  1030. break;
  1031. case 3:
  1032. color = "yellow";
  1033. break;
  1034. default:
  1035. break;
  1036. }
  1037. toText += (num > 0 ? "\n[color="+color+"]" : "\n");
  1038. for (i; i < num; i++){
  1039. toText += "[b]"+symbol+"[/b]"
  1040. }
  1041. toText += (num > 0 ? "[/color]" : "")
  1042. if (i < 5){
  1043. toText += "[color=white]";
  1044. for (i; i < 5; i++)
  1045. toText += "[b]"+symbol+"[/b]";
  1046. toText += "[/color]";
  1047. }
  1048. toText += " "+Number(resp[hit_data["rid"]].attrs[key]).toFixed(2)+" "+name;
  1049. toData += Number(resp[hit_data["rid"]].attrs[key]).toFixed(2) + ",";
  1050. }
  1051. //toText += "[/list]";
  1052. toText += (txt.indexOf('{to_stuff}') >= 0 ? "" : "\nNumber of Reviews: "+numResp+"\n[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"](Submit a new TO rating for this requester)[/URL]");
  1053. toStuff = '\n[img]http://data.istrack.in/to/' + toData.slice(0,-1) + '.png[/img]';
  1054. toStuff += (txt.indexOf('{to_stuff}') >= 0 ? (txt.indexOf('{to_text}') >= 0 ? "" : toText) : "");
  1055. toStuff += "\nNumber of Reviews: "+numResp;
  1056. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  1057. toStuff += "\n(Submit a new TO rating for this requester)[/URL]";
  1058. }
  1059. for (var i = 0; i < vars.length; i++) {
  1060. t = new RegExp('\{' + vars[i] + '\}', 'g');
  1061. if (vars[i] == "to_stuff") {
  1062. txt = txt.replace(t, toStuff);
  1063. }
  1064. else if (vars[i] == "to_text"){
  1065. txt = txt.replace(t, toText);
  1066. }
  1067. else if (vars[i] == "prev_link"){
  1068. txt = txt.replace(t,"https://www.mturk.com"+hit_data[vars[i]]);
  1069. }
  1070. else if (vars[i] == "acc_link"){
  1071. txt = txt.replace(t,"https://www.mturk.com"+hit_data[vars[i]]);
  1072. }
  1073. else
  1074. txt = txt.replace(t, hit_data[vars[i]]);
  1075. }
  1076. textarea.value = txt;
  1077. }
  1078.  
  1079. function hide_func(div) {
  1080. if (EDIT == false)
  1081. div.style.display = 'none';
  1082. }
  1083.  
  1084. function edit_func() {
  1085. if (EDIT == true) {
  1086. EDIT = false;
  1087. TEMPLATE = textarea.value;
  1088. edit_button.textContent = 'Edit Template';
  1089. apply_template(HIT);
  1090. }
  1091. else {
  1092. console.log("Editing");
  1093. EDIT = true;
  1094. edit_button.textContent = 'Show Changes';
  1095. save_button.disabled = false;
  1096. textarea.value = TEMPLATE;
  1097. }
  1098. }
  1099.  
  1100. function default_func() {
  1101. GM_deleteValue('HITScraper Template');
  1102. TEMPLATE = DEFAULT_TEMPLATE;
  1103. EDIT = false;
  1104. edit_button.textContent = 'Edit Template';
  1105. apply_template(HIT);
  1106. }
  1107.  
  1108. function save_func() {
  1109. if (EDIT)
  1110. TEMPLATE = textarea.value;
  1111. GM_setValue('HITScraper Template', TEMPLATE);
  1112. }
  1113.  
  1114. var div = document.createElement('div');
  1115. var textarea = document.createElement('textarea');
  1116. var div2 = document.createElement('label');
  1117.  
  1118. div.style.position = 'fixed';
  1119. div.style.width = '500px';
  1120. div.style.height = '235px';
  1121. div.style.left = '50%';
  1122. div.style.right = '50%';
  1123. div.style.margin = '-250px 0px 0px -250px';
  1124. div.style.top = '300px';
  1125. div.style.padding = '5px';
  1126. div.style.border = '2px';
  1127. div.style.backgroundColor = 'black';
  1128. div.style.color = 'white';
  1129. div.style.zIndex = '100';
  1130.  
  1131. textarea.style.padding = '2px';
  1132. textarea.style.width = '500px';
  1133. textarea.style.height = '200px';
  1134. textarea.title = '{title}\n{requester}\n{rid}\n{description}\n{reward}\n{quals}\n{prev_link}\n{time}\n{hit}\n{to_stuff}\n{to_text}';
  1135.  
  1136. div.textContent = 'Press Ctrl+C to copy to clipboard. Click textarea to close';
  1137. div.style.fontSize = '12px';
  1138. div.appendChild(textarea);
  1139.  
  1140. var edit_button = document.createElement('button');
  1141. var save_button = document.createElement('button');
  1142. var default_button = document.createElement('button');
  1143. var easy_button = document.createElement('button');
  1144.  
  1145. edit_button.textContent = 'Edit Template';
  1146. edit_button.setAttribute('id', 'edit_button');
  1147. edit_button.style.height = '18px';
  1148. edit_button.style.width = '100px';
  1149. edit_button.style.fontSize = '10px';
  1150. edit_button.style.paddingLeft = '3px';
  1151. edit_button.style.paddingRight = '3px';
  1152. edit_button.style.backgroundColor = 'white';
  1153.  
  1154. save_button.textContent = 'Save Template';
  1155. save_button.setAttribute('id', 'save_button');
  1156. save_button.style.height = '18px';
  1157. save_button.style.width = '100px';
  1158. save_button.style.fontSize = '10px';
  1159. save_button.style.paddingLeft = '3px';
  1160. save_button.style.paddingRight = '3px';
  1161. save_button.style.backgroundColor = 'white';
  1162. save_button.style.marginLeft = '5px';
  1163.  
  1164. easy_button.textContent = 'Change Adfly Url';
  1165. easy_button.setAttribute('id', 'easy_button');
  1166. easy_button.style.height = '18px';
  1167. easy_button.style.width = '100px';
  1168. easy_button.style.fontSize = '10px';
  1169. easy_button.style.paddingLeft = '3px';
  1170. default_button.textContent = ' D ';
  1171. default_button.setAttribute('id', 'default_button');
  1172. default_button.style.height = '18px';
  1173. default_button.style.width = '20px';
  1174. default_button.style.fontSize = '10px';
  1175. default_button.style.paddingLeft = '3px';
  1176. default_button.style.paddingRight = '3px';
  1177. default_button.style.backgroundColor = 'white';
  1178. default_button.style.marginLeft = '5px';
  1179. default_button.title = 'Return default template';
  1180. div.appendChild(edit_button);
  1181. div.appendChild(save_button);
  1182. div.appendChild(default_button);
  1183. div.appendChild(easy_button);
  1184. save_button.disabled = true;
  1185.  
  1186. div.style.display = 'none';
  1187. textarea.addEventListener("click", function() {hide_func(div);}, false);
  1188. edit_button.addEventListener("click", function() {edit_func();}, false);
  1189. save_button.addEventListener("click", function() {save_func();}, false);
  1190. default_button.addEventListener("click", function() {default_func();}, false);
  1191. document.body.insertBefore(div, document.body.firstChild);