HIT Scraper WITH EXPORT

Snag HITs.

当前为 2014-08-31 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name HIT Scraper WITH EXPORT
  3. // @author Kerek and TJ
  4. // @description Snag HITs.
  5. // Based in part on code from mmmturkeybacon Export Mturk History and mmmturkeybacon Color Coded Search with Checkpoints
  6. // @namespace http://userscripts.org/users/536998
  7. // @match https://www.mturk.com/mturk/findhits?match=true#hit_scraper*
  8. // @match https://www.mturk.com/mturk/findhits?match=true?hit_scraper*
  9. // @version 1.3.0.14
  10. // @grant GM_xmlhttpRequest
  11. // @grant GM_getValue
  12. // @grant GM_setValue
  13. // @grant GM_deleteValue
  14. // @require http://code.jquery.com/jquery-latest.min.js
  15. // ==/UserScript==
  16.  
  17. //alter the requester ignore last as you desire, case insensitive
  18. var default_list = ["oscar smith", "Diamond Tip Research LLC", "jonathon weber", "jerry torres", "Crowdsource", "we-pay-you-fast", "turk experiment", "jon brelig"];
  19. var ignore_list = default_list;
  20. if (GM_getValue("scraper_ignore_list"))
  21. ignore_list = GM_getValue("scraper_ignore_list");
  22.  
  23. //This is to update the hit export symbol
  24. var symbol = "☭";
  25.  
  26. //this searches extra pages if you skip too much, helps fill out results if you hit a chunk of ignored HITs. Change to true for this behavior.
  27. var correct_for_skips = true;
  28.  
  29. //weight the four TO ratings for the coloring. Default has pay twice as important as fairness and nothing for communication and fast.
  30. var COMM_WEIGHT = 0;
  31. var PAY_WEIGHT = 10;
  32. var FAIR_WEIGHT = 5;
  33. var FAST_WEIGHT = 0;
  34.  
  35. //display your hitdb records if applicable
  36. var check_hitDB = true;
  37.  
  38. //default text size
  39. var default_text_size=11;
  40.  
  41.  
  42.  
  43. var HITStorage = {};
  44. var indexedDB = window.indexedDB || window.webkitIndexedDB ||
  45. window.mozIndexedDB;
  46. window.IDBTransaction = window.IDBTransaction || window.webkitIDBTransaction || window.mozIDBTransaction;
  47. window.IDBKeyRange = window.IDBKeyRange || window.webkitIDBKeyRange || window.mozIDBKeyRange;
  48. HITStorage.IDBTransactionModes = { "READ_ONLY": "readonly", "READ_WRITE": "readwrite", "VERSION_CHANGE": "versionchange" };
  49. var IDBKeyRange = window.IDBKeyRange;
  50.  
  51. HITStorage.indexedDB = {};
  52. HITStorage.indexedDB = {};
  53. HITStorage.indexedDB.db = null;
  54.  
  55. HITStorage.indexedDB.onerror = function(e) {
  56. console.log(e);
  57. };
  58.  
  59. var v=4;
  60.  
  61. HITStorage.indexedDB.checkTitle = function(title,button) {
  62. var request = indexedDB.open("HITDB", v);
  63. request.onsuccess = function(e) {
  64. HITStorage.indexedDB.db = e.target.result;
  65. var db = HITStorage.indexedDB.db;
  66. if (!db.objectStoreNames.contains("HIT"))
  67. {
  68. db.close();
  69. return;
  70. }
  71. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  72. var store = trans.objectStore("HIT");
  73.  
  74. var index = store.index("title");
  75. index.get(title).onsuccess = function(event)
  76. {
  77. if (event.target.result === undefined)
  78. {
  79. console.log(title + ' not found');
  80. history[button].titledb=false;
  81. }
  82. else
  83. {
  84. console.log(title + ' found');
  85. history[button].titledb=true;
  86. }
  87. db.close();
  88. };
  89. };
  90. request.onerror = HITStorage.indexedDB.onerror;
  91. };
  92.  
  93. HITStorage.indexedDB.checkRequester = function(id,button) {
  94. var request = indexedDB.open("HITDB", v);
  95. request.onsuccess = function(e) {
  96. HITStorage.indexedDB.db = e.target.result;
  97. var db = HITStorage.indexedDB.db;
  98. if (!db.objectStoreNames.contains("HIT"))
  99. {
  100. db.close();
  101. return;
  102. }
  103. var trans = db.transaction(["HIT"], HITStorage.IDBTransactionModes.READ_ONLY);
  104. var store = trans.objectStore("HIT");
  105.  
  106. var index = store.index("requesterId");
  107. index.get(id).onsuccess = function(event)
  108. {
  109. if (event.target.result === undefined)
  110. {history[button].reqdb=false;
  111. console.log(id + ' not found');
  112. }
  113. else
  114. {
  115. history[button].reqdb=true;
  116. console.log(id + ' found');
  117. }
  118. db.close();
  119. };
  120. };
  121. request.onerror = HITStorage.indexedDB.onerror;
  122. };
  123.  
  124. var PAGES_TO_SCRAPE = 3;
  125. var MINIMUM_HITS = 100;
  126. var SEARCH_REFRESH=0;
  127. var URL_BASE = "/mturk/searchbar?searchWords=&selectedSearchType=hitgroups";
  128. var initial_url = URL_BASE;
  129. var TO_REQ_URL = "http://turkopticon.ucsd.edu/reports?id=";
  130. var found_key_list=[];
  131. var last_clear_time = new Date().getTime();
  132. var searched_once = false;
  133. var save_new_results_time = 120;
  134. var save_results_time = 3600;
  135. var default_type = 0;
  136. var cur_loc = window.location.href;
  137. var time_input = document.createElement("INPUT");
  138. time_input.value = 0;
  139. var page_input = document.createElement("INPUT");
  140. page_input.value = 3;
  141. var min_input = document.createElement("INPUT");
  142. var new_time_display_input = document.createElement("INPUT");
  143. new_time_display_input.value = 300;
  144. var reward_input = document.createElement("INPUT");
  145. var qual_input = document.createElement("INPUT");
  146. qual_input.type = "checkbox";
  147. qual_input.checked = true;
  148. var masters_input = document.createElement("INPUT");
  149. masters_input.type = "checkbox";
  150. var sort_input1 = document.createElement("INPUT");
  151. sort_input1.type = "radio";
  152. sort_input1.name = "sort_type";
  153. sort_input1.value = "latest";
  154. sort_input1.checked = true;
  155. var sort_input2 = document.createElement("INPUT");
  156. sort_input2.type = "radio";
  157. sort_input2.name = "sort_type";
  158. sort_input2.value = "most";
  159. var sort_input3 = document.createElement("INPUT");
  160. sort_input3.type = "radio";
  161. sort_input3.name = "sort_type";
  162. sort_input3.value = "amount";
  163.  
  164. var search_input = document.createElement("INPUT");
  165.  
  166. var LINK_BASE = "https://www.mturk.com";
  167. var BACKGROUND_COLOR = "rgb(19, 19, 19)";
  168. var STATUSDETAIL_DELAY = 250;
  169. var MPRE_DELAY = 3000;
  170.  
  171. var next_page = 1;
  172.  
  173. var GREEN = '#66CC66'; // > 4
  174. var LIGHTGREEN = '#ADFF2F'; // > 3 GREEN YELLOW
  175. var YELLOW = '#FFD700';
  176. var ORANGE = '#FF9900'; // > 2
  177. var RED = '#FF3030'; // <= 2
  178. var BLUE = '#C0D9D9'; // no TO
  179. var GREY = 'lightGrey';
  180. var BROWN = '#94704D';
  181. var DARKGREY = '#9F9F9F';
  182. $('body').css('background', BACKGROUND_COLOR);
  183.  
  184. var API_PROXY_BASE = 'https://api.turkopticon.istrack.in/';
  185. var API_MULTI_ATTRS_URL = API_PROXY_BASE + 'multi-attrs.php?ids=';
  186. var REVIEWS_BASE = 'http://turkopticon.ucsd.edu/';
  187.  
  188. var control_panel_HTML = '<div id="control_panel" style="margin: 0 auto 0 auto;' +
  189. 'border-bottom: 1px solid #000000; margin-bottom: 5px; ' +
  190. 'background-color: ' + BACKGROUND_COLOR + ';"></div>';
  191. $('body > :not(#control_panel)').hide(); //hide all nodes directly under the body
  192. $('body').prepend(control_panel_HTML);
  193.  
  194. var control_panel = document.getElementById("control_panel");
  195. var big_red_button = document.createElement("BUTTON");
  196. var reset_blocks = document.createElement("BUTTON");
  197. var progress_report = document.createTextNode("Stopped");
  198. var text_area = document.createElement("TABLE");
  199. big_red_button.textContent = "Show Interface";
  200. big_red_button.onclick = function(){show_interface();};
  201. /*reset_blocks.textContent = "Edit blocklist";
  202. reset_blocks.onclick = function(){
  203. console.log("in");
  204. var removeList = input("Current list: "+ignore_list.join()+". Enter items to add or remove in the box below, separated by comma.").split(",");
  205. console.log(removeList);
  206. //if (confirm("Are you sure you want to delete your blocklist?")){
  207. // GM_deleteValue("scraper_ignore_list");
  208. // ignore_list = default_list;
  209. // alert("Ignore list reset to default, please re-scrape");
  210. //}
  211. };*/
  212. control_panel.appendChild(big_red_button);
  213. //control_panel.appendChild(reset_blocks);
  214.  
  215. show_interface();
  216.  
  217. var global_run = false;
  218. var statusdetail_loop_finished = false;
  219. var date_header = "";
  220. var history = {};
  221. var wait_loop;
  222.  
  223. function set_progress_report(text, force)
  224. {
  225. if (global_run == true || force == true)
  226. {
  227. progress_report.textContent = text;
  228. }
  229. }
  230.  
  231. function get_progress_report()
  232. {
  233. return progress_report.textContent;
  234. }
  235.  
  236. function wait_until_stopped()
  237. {
  238. if (global_run == true)
  239. {
  240. if (statusdetail_loop_finished == true)
  241. {
  242. big_red_button.textContent = "Start";
  243. set_progress_report("Finished", false);
  244. }
  245. else
  246. {
  247. setTimeout(function(){wait_until_stopped();}, 500);
  248. }
  249. }
  250. }
  251.  
  252. function display_wait_time(wait_time)
  253. {
  254. if (global_run == true)
  255. {
  256. var current_progress = get_progress_report();
  257. if (current_progress.indexOf("Searching again in")!==-1)
  258. {
  259. set_progress_report(current_progress.replace(/Searching again in \d+ seconds/ , "Searching again in " + wait_time + " seconds"),false);
  260. }
  261. else
  262. set_progress_report(current_progress + " Searching again in " + wait_time + " seconds.", false);
  263. if (wait_time>1)
  264. setTimeout(function(){display_wait_time(wait_time-1);}, 1000);
  265. }
  266. }
  267.  
  268. function dispArr(ar)
  269. {
  270. var disp = "";
  271. for (var z = 0; z < ar.length; z++)
  272. {
  273. disp += "id " + z + " is " + ar[z] + " ";
  274. }
  275. console.log(disp);
  276. }
  277.  
  278. function scrape($src)
  279. {
  280. var $requester = $src.find('a[href^="/mturk/searchbar?selectedSearchType=hitgroups&requester"]');
  281. var $title = $src.find('a[class="capsulelink"]');
  282. var $reward = $src.find('span[class="reward"]');
  283. var $preview = $src.find('a[href^="/mturk/preview?"]');
  284. var $qualified = $src.find('a[href^="/mturk/notqualified?"]');
  285. var $times = $src.find('a[id^="duration_to_complete"]');
  286. var $descriptions = $src.find('a[id^="description"]');
  287. var not_qualified_group_IDs=[];
  288. var $quals = $src.find('a[id^="qualificationsRequired"]');
  289. $qualified.each(function(){
  290. var groupy = $(this).attr('href');
  291. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  292. not_qualified_group_IDs.push(groupy);
  293. });
  294. var $mixed = $src.find('a[href^="/mturk/preview?"],a[href^="/mturk/notqualified?"]');
  295. var listy =[];
  296. $mixed.each(function(){
  297. var groupy = $(this).attr('href');
  298. groupy = groupy.replace("/mturk/notqualified?hitId=","");
  299. groupy = groupy.replace("/mturk/preview?groupId=","");
  300. listy.push(groupy);
  301. });
  302. listy = listy.filter(function(elem, pos) {
  303. return listy.indexOf(elem) == pos;
  304. });
  305.  
  306. for (var j = 0; j < $requester.length; j++)
  307. {
  308. var $hits = $requester.eq(j).parent().parent().parent().parent().parent().parent().find('td[class="capsule_field_text"]');
  309. var requester_name = $requester.eq(j).text().trim();
  310. var requester_link = $requester.eq(j).attr('href');
  311. var group_ID=listy[j];
  312. var preview_link = "/mturk/preview?groupId=" + group_ID;
  313. var title = $title.eq(j).text().trim();
  314. var reward = $reward.eq(j).text().trim();
  315. var hits = $hits.eq(4).text().trim();
  316. var time = $times.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  317. var description = $descriptions.eq(j).parent()[0].nextSibling.nextSibling.innerHTML;
  318. //console.log(description);
  319. var requester_id = requester_link.replace('/mturk/searchbar?selectedSearchType=hitgroups&requesterId=','');
  320. var accept_link;
  321. accept_link = preview_link.replace('preview','previewandaccept');
  322. /*HIT SCRAPER ADDITION*/
  323. var qElements = $quals.eq(j).parent().parent().parent().find('tr');
  324. //console.log(qElements);
  325.  
  326. var qualifications = [];
  327. for (var i = 1; i < qElements.length; i++) {
  328. qualifications.push((qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ').indexOf("Masters") != -1 ? "[color=red][b]"+qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')+"[/b][/color]" : qElements[i].childNodes[1].textContent.trim().replace(/\s+/g, ' ')));
  329. }
  330. var qualList = (qualifications.join(', ') ? qualifications.join(', ') : "None");
  331.  
  332. key = requester_name+title+reward+group_ID;
  333. found_key_list.push(key);
  334. if (history[key] == undefined)
  335. {
  336. history[key] = {requester:"", title:"", description:"", reward:"", hits:"", req_link:"", quals:"", prev_link:"", rid:"", acc_link:"", new_result:"", qualified:"", found_this_time:"", initial_time:"", reqdb:"",titledb:"",time:""};
  337. history[key].req_link = requester_link;
  338. history[key].prev_link = preview_link;
  339. history[key].requester = requester_name;
  340. history[key].title = title;
  341. history[key].reward = reward;
  342. history[key].hits = hits;
  343. history[key].rid = requester_id;
  344. history[key].acc_link = accept_link;
  345. history[key].time = time;
  346. history[key].quals = qualList;
  347. history[key].description = description;
  348. HITStorage.indexedDB.checkRequester(requester_id,key);
  349. HITStorage.indexedDB.checkTitle(title,key);
  350. if (searched_once)
  351. {
  352. history[key].initial_time = new Date().getTime();//-1000*(save_new_results_time - SEARCH_REFRESH);
  353. history[key].new_result = 0;
  354. }
  355. else
  356. {
  357. history[key].initial_time = new Date().getTime()-1000*save_new_results_time;
  358. history[key].new_result = 1000*save_new_results_time;
  359. }
  360. if (not_qualified_group_IDs.indexOf(group_ID)!==-1)
  361. history[key].qualified = false;
  362. else
  363. history[key].qualified = true;
  364.  
  365. history[key].found_this_time = true;
  366. }
  367. else
  368. {
  369. history[key].new_result = new Date().getTime() - history[key].initial_time;
  370. history[key].found_this_time = true;
  371. history[key].hits = hits;
  372. }
  373. }
  374. }
  375.  
  376. function statusdetail_loop(next_URL)
  377. {
  378. if (global_run == true)
  379. {
  380. if (next_URL.length != 0)
  381. {
  382. $.get(next_URL, function(data)
  383. {
  384. var $src = $(data);
  385. var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
  386. if (maxpagerate.length == 0)
  387. {
  388. set_progress_report("Processing page " + next_page, false);
  389. scrape($src);
  390. $next_URL = $src.find('a[href^="/mturk/viewsearchbar"]:contains("Next")');
  391. next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
  392. next_page++;
  393. if (default_type == 1)
  394. {
  395. var hmin = MINIMUM_HITS+1;
  396. for (j = 0; j < found_key_list.length; j++)
  397. {
  398. if (history[found_key_list[j]].hits < hmin)
  399. {
  400. next_URL = "";
  401. next_page = -1;
  402. break;
  403. }
  404. }
  405. }
  406. else if (next_page > PAGES_TO_SCRAPE && correct_for_skips)
  407. {
  408. var skipped_hits = 0;
  409. var added_pages = 0;
  410. for (j = 0; j < found_key_list.length; j++)
  411. {
  412. var obj = history[found_key_list[j]];
  413. if (! ignore_check(obj.requester,obj.title))
  414. skipped_hits++;
  415. }
  416. added_pages = Math.floor(skipped_hits/10);
  417. if (skipped_hits%10 >6)
  418. added_pages++;
  419. if (next_page > PAGES_TO_SCRAPE + added_pages)
  420. {
  421. next_URL = "";
  422. next_page = -1;
  423. }
  424. }
  425. else if (next_page > PAGES_TO_SCRAPE)
  426. {
  427. next_URL = "";
  428. next_page = -1;
  429. }
  430. setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
  431. }
  432. else
  433. {
  434. console.log("MPRE");
  435. setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
  436. }
  437. });
  438. }
  439. else
  440. {
  441. searched_once = true;
  442. var found_hits = found_key_list.length;
  443. var shown_hits = 0;
  444. var new_hits = 0;
  445. var url = API_MULTI_ATTRS_URL;
  446. var rids = [];
  447. var lastRow = text_area.rows.length - 1;
  448. for (i = lastRow; i>0; i--)
  449. text_area.deleteRow(i);
  450. for (j = 0; j < found_key_list.length; j++)
  451. {
  452. //(function(url,rids,j) {
  453. var obj = history[found_key_list[j]];
  454. if (ignore_check(obj.requester,obj.title) && obj.found_this_time){
  455. ++shown_hits;
  456. //console.log(obj);
  457. //hit export will update col_heads[1]
  458. var col_heads = ["<a href='"+ LINK_BASE+obj.req_link +"' target='_blank'>" + obj.requester + "</a>","<a href='"+ LINK_BASE+obj.prev_link +"' target='_blank' title='"+ obj.description +"'>" + obj.title + "</a>",obj.reward,obj.hits,"TO down","<a href='"+ LINK_BASE+obj.acc_link +"' target='_blank'>Accept</a>"];
  459. var row = text_area.insertRow(text_area.rows.length);
  460. url += obj.rid + ',';
  461. rids.push(obj.rid);
  462. if (check_hitDB)
  463. {
  464. col_heads.push("R");
  465. col_heads.push("T");
  466. }
  467. if (!obj.qualified)
  468. {
  469. col_heads.push("Not Qualified");
  470. }
  471. for (i=0; i<col_heads.length; i++)
  472. {
  473. var this_cell = row.insertCell(i);
  474. row.cells[i].style.fontSize = default_text_size;
  475. this_cell.innerHTML = col_heads[i];
  476. if(i>1)
  477. this_cell.style.textAlign = 'center';
  478. if (check_hitDB)
  479. {
  480. if (i==6)
  481. {
  482. if (obj.reqdb)
  483. this_cell.style.backgroundColor = GREEN;
  484. else
  485. this_cell.style.backgroundColor = RED;
  486. }
  487. else if (i==7)
  488. {
  489. if (obj.titledb)
  490. this_cell.style.backgroundColor = GREEN;
  491. else
  492. this_cell.style.backgroundColor = RED;
  493. }
  494. else if (i==8)
  495. this_cell.style.backgroundColor = DARKGREY;
  496. }
  497. else if (i==6)
  498. this_cell.style.backgroundColor = DARKGREY;
  499. }
  500. if (Object.keys(history).length>0)
  501. {
  502. if (obj.new_result < 1000*save_new_results_time)
  503. {
  504. new_hits++;
  505. for (i in col_heads)
  506. {
  507. row.cells[i].style.fontSize = default_text_size + 1;
  508. row.cells[i].style.fontWeight = "bold";
  509. }
  510. }
  511. }
  512. button = document.createElement('button'); //HIT SCRAPER ADDITION
  513. button.textContent = 'vB';
  514. button.title = 'Export this HIT description as vBulletin formatted text';
  515. button.style.height = '14px';
  516. button.style.width = '30px';
  517. button.style.fontSize = '8px';
  518. button.style.border = '1px solid';
  519. button.style.padding = '0px';
  520. button.style.backgroundColor = 'transparent';
  521. button2 = document.createElement('button'); //BUTTON TO BLOCK REQUESTER
  522. button2.textContent = '☢';
  523. button2.title = 'Add requester to block list';
  524. button2.style.height = '14px';
  525. button2.style.width = '30px';
  526. button2.style.fontSize = '10px';
  527. button2.style.border = '1px solid';
  528. button2.style.padding = '0px';
  529. button2.style.backgroundColor = 'transparent';
  530. //button.addEventListener("click", function() {export_func_deleg(j);}.bind(null,j), false);
  531. button.addEventListener("click", (function (obj,j) { return function() {export_func_deleg(obj,j);}})(obj,j));
  532. row.cells[1].appendChild(button);
  533. button2.addEventListener("click", (function (obj,j) { return function() {block_deleg(obj,j);}})(obj,j));
  534. row.cells[0].appendChild(button2);
  535. }
  536. //});
  537. }
  538. set_progress_report("Scrape complete. " + shown_hits + " HITs found (" + new_hits + " new results). " + (found_hits - shown_hits) + " HITs ignored.", false);
  539. url = url.substring(0,url.length - 1);
  540. //console.log(url);
  541. var success_flag = false;
  542. GM_xmlhttpRequest(
  543. {
  544. method: "GET",
  545. url: url,
  546. onload: function (results)
  547. {
  548. //console.log(results.responseText);
  549. rdata = $.parseJSON(results.responseText);
  550. for (i = 0; i < rids.length; i++)
  551. {
  552. text_area.rows[i+1].style.backgroundColor = GREY;
  553. if (rdata[rids[i]])
  554. {
  555. var pay = rdata[rids[i]].attrs.pay
  556. var reviews = rdata[rids[i]].reviews
  557. var average = 0;
  558. var sum = 0;
  559. var divisor = 0;
  560. var comm = rdata[rids[i]].attrs.comm;
  561. var fair = rdata[rids[i]].attrs.fair;
  562. var fast = rdata[rids[i]].attrs.fast;
  563. if (comm > 0)
  564. {
  565. sum += COMM_WEIGHT*comm;
  566. divisor += COMM_WEIGHT;
  567. }
  568. if (pay > 0)
  569. {
  570. sum += PAY_WEIGHT*pay;
  571. divisor += PAY_WEIGHT;
  572. }
  573. if (fair > 0)
  574. {
  575. sum += FAIR_WEIGHT*fair;
  576. divisor += FAIR_WEIGHT;
  577. }
  578. if (fast > 0)
  579. {
  580. sum += FAST_WEIGHT*fast;
  581. divisor += FAST_WEIGHT;
  582. }
  583. if (divisor > 0)
  584. {
  585. average = sum/divisor;
  586. }
  587. text_area.rows[i+1].cells[4].innerHTML = "<a href='"+ TO_REQ_URL+rids[i] +"' target='_blank'>" + pay + "</a>";
  588. if (reviews > 4)
  589. {
  590. if (average > 4.49)
  591. text_area.rows[i+1].style.backgroundColor = GREEN;
  592. else if (average > 3.49)
  593. text_area.rows[i+1].style.backgroundColor = LIGHTGREEN;
  594. //else if (average > 2.99)
  595. // text_area.rows[i+1].style.backgroundColor = YELLOW;
  596. else if (average > 1.99)
  597. text_area.rows[i+1].style.backgroundColor = ORANGE;
  598. else if (average > 0)
  599. text_area.rows[i+1].style.backgroundColor = RED;
  600. }
  601. }
  602. else
  603. {
  604. text_area.rows[i+1].cells[4].innerHTML = "No data";
  605. }
  606. }
  607. success_flag = true;
  608. }
  609. });
  610. if (!success_flag)
  611. for (i = 0; i < rids.length; i++) text_area.rows[i+1].style.backgroundColor = GREY;
  612. statusdetail_loop_finished = true;
  613. if (SEARCH_REFRESH>0)
  614. {
  615. wait_loop = setTimeout(function(){if (global_run) start_it();}, 1000*SEARCH_REFRESH);
  616. display_wait_time(SEARCH_REFRESH);
  617. }
  618. else
  619. {
  620. global_run = false;
  621. big_red_button.textContent = "Start";
  622. }
  623. }
  624. }
  625. }
  626.  
  627. function ignore_check(r,t){
  628. return -1 == ignore_list.map(function(item) { return item.toLowerCase(); }).indexOf(r.toLowerCase());
  629. }
  630.  
  631. function start_running()
  632. {
  633. if (big_red_button.textContent == "Start")
  634. {
  635. global_run = true;
  636. initial_url = URL_BASE;
  637. if (search_input.value.length>0)
  638. {
  639. initial_url = initial_url.replace("searchWords=", "searchWords=" + search_input.value);
  640. }
  641. if (time_input.value.replace(/[^0-9]+/g,"") != "")
  642. {
  643. SEARCH_REFRESH = Number(time_input.value);
  644. }
  645. if (page_input.value.replace(/[^0-9]+/g,"") != "")
  646. {
  647. PAGES_TO_SCRAPE = Number(page_input.value);
  648. }
  649. if (min_input.value.replace(/[^0-9]+/g,"") != "")
  650. {
  651. MINIMUM_HITS = Number(min_input.value);
  652. }
  653. if (new_time_display_input.value.replace(/[^0-9]+/g,"") != "")
  654. {
  655. save_new_results_time = Number(new_time_display_input.value);
  656. }
  657. if (reward_input.value.replace(/[^0-9]+/g,"") != "")
  658. {
  659. initial_url += "&minReward=" + reward_input.value;
  660. }
  661. else
  662. {
  663. initial_url += "&minReward=0.00";
  664. }
  665. if (qual_input.checked)
  666. {
  667. initial_url += "&qualifiedFor=on"
  668. }
  669. else
  670. {
  671. initial_url += "&qualifiedFor=off"
  672. }
  673. if (masters_input.checked)
  674. {
  675. initial_url += "&requiresMasterQual=on"
  676. }
  677. if (sort_input1.checked)
  678. {
  679. initial_url+= "&sortType=LastUpdatedTime%3A1";
  680. default_type = 0;
  681. }
  682. else if (sort_input2.checked)
  683. {
  684. initial_url+= "&sortType=NumHITs%3A1";
  685. default_type = 1;
  686. }
  687. else if (sort_input3.checked)
  688. {
  689. initial_url+= "&sortType=Reward%3A1";
  690. default_type = 0;
  691. }
  692. initial_url+="&pageNumber=1&searchSpec=HITGroupSearch"
  693. start_it();
  694. }
  695. else
  696. {
  697. global_run = false;
  698. clearTimeout(wait_loop);
  699. big_red_button.textContent = "Start";
  700. set_progress_report("Stopped", true);
  701. }
  702. }
  703.  
  704. function start_it()
  705. {
  706. statusdetail_loop_finished = false;
  707. big_red_button.textContent = "Stop";
  708. found_key_list=[];
  709. var ctime = new Date().getTime()
  710. if (ctime - last_clear_time > save_results_time*666)
  711. {
  712. var last_history=history;
  713. history = {};
  714. for (var key in last_history)
  715. {
  716. if (last_history[key].new_result<save_results_time*1000)
  717. {
  718. history[key]=last_history[key];
  719. if (last_history[key].found_this_time)
  720. {
  721. last_history[key].found_this_time = false;
  722. if (last_history[key].new_result>save_new_results_time*1000)
  723. last_history[key].initial_time = ctime-1000*save_new_results_time;
  724. }
  725. }
  726.  
  727. }
  728. last_clear_time = ctime;
  729. }
  730. next_page = 1;
  731. statusdetail_loop(initial_url);
  732. }
  733.  
  734.  
  735. function show_interface()
  736. {
  737. control_panel.style.color = BROWN;
  738. control_panel.style.fontSize = 14;
  739. control_panel.removeChild(big_red_button);
  740. control_panel.appendChild(document.createTextNode("Auto-refresh delay: "));
  741. time_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  742. time_input.title = "Enter search refresh delay in seconds\n" + "Enter 0 for no auto-refresh\n" + "Default is 0 (no auto-refresh)";
  743. time_input.size = 3;
  744. control_panel.appendChild(time_input);
  745. control_panel.appendChild(document.createTextNode(" "));
  746. control_panel.appendChild(document.createTextNode("Pages to scrape: "));
  747. page_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  748. page_input.title = "Enter number of pages to scrape\n" + "Default is 4";
  749. page_input.size = 3;
  750. control_panel.appendChild(page_input);
  751. control_panel.appendChild(document.createTextNode(" "));
  752. control_panel.appendChild(document.createTextNode("Minimum batch size: "));
  753. min_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  754. min_input.title = "Enter minimum HITs for batch search\n" + "Default is 100";
  755. min_input.size = 3;
  756. control_panel.appendChild(min_input);
  757. control_panel.appendChild(document.createTextNode(" "));
  758. control_panel.appendChild(document.createTextNode("New HIT highlighting: "));
  759. new_time_display_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
  760. new_time_display_input.title = "Enter time (in seconds) to keep new HITs highlighted\n" + "Default is 300 (5 minutes)";
  761. new_time_display_input.size = 6;
  762. control_panel.appendChild(new_time_display_input);
  763. control_panel.appendChild(document.createElement("P"));
  764. control_panel.appendChild(document.createTextNode("Minimum reward: "));
  765. reward_input.size = 6;
  766. control_panel.appendChild(reward_input);
  767. control_panel.appendChild(document.createTextNode(" "));
  768.  
  769. control_panel.appendChild(document.createTextNode("Qualified"));
  770. control_panel.appendChild(qual_input);
  771. control_panel.appendChild(document.createTextNode(" "));
  772. control_panel.appendChild(document.createTextNode("Masters"));
  773. control_panel.appendChild(masters_input);
  774. control_panel.appendChild(document.createTextNode(" "));
  775. control_panel.appendChild(document.createTextNode("Sort types: "));
  776. control_panel.appendChild(sort_input1);
  777. control_panel.appendChild(document.createTextNode("Latest"));
  778. control_panel.appendChild(sort_input2);
  779. control_panel.appendChild(document.createTextNode("Most Available"));
  780. control_panel.appendChild(sort_input3);
  781. control_panel.appendChild(document.createTextNode("Amount"));
  782. control_panel.appendChild(document.createElement("P"));
  783. control_panel.appendChild(search_input);
  784. search_input.size = 20;
  785. search_input.title = "Enter a search term to include\n" + "Default is blank (no included terms)";
  786. search_input.placeholder="Enter search terms here";
  787. control_panel.appendChild(document.createTextNode(" "));
  788. big_red_button.textContent = "Start";
  789. big_red_button.onclick = function(){start_running();};
  790. reset_blocks.textContent = "Edit blocklist";
  791. reset_blocks.onclick = function(){
  792. console.log("in");
  793. var removeList = prompt("Current list:\n"+ignore_list.join()+"\nEnter items to add or remove in the box below, separated by comma.").split(",");
  794. console.log(removeList);
  795. console.log(ignore_list.map(function(item) { return item.toLowerCase(); }));
  796. for (var item in removeList){
  797. var name = removeList[item].trim().toLowerCase();
  798. console.log(name);
  799. if (ignore_list.map(function(item) { return item.toLowerCase(); }).indexOf(name) > -1)
  800. ignore_list.splice(ignore_list.map(function(item) { return item.toLowerCase(); }).indexOf(name),1);
  801. else
  802. ignore_list.push(name);
  803. }
  804. if (confirm("New ignore list: "+ignore_list.join()+". Save?")){
  805. GM_setValue("scraper_ignore_list",ignore_list);
  806. //ignore_list = default_list;
  807. alert("Ignore list updated, please re-scrape");
  808. }
  809. };
  810. control_panel.appendChild(big_red_button);
  811. control_panel.appendChild(reset_blocks);
  812. control_panel.appendChild(document.createTextNode(" "));
  813. control_panel.appendChild(progress_report);
  814. control_panel.appendChild(document.createElement("P"));
  815. text_area.style.fontWeight = 400;
  816. text_area.createCaption().innerHTML = "HITs";
  817. var col_heads = ['Requester','Title','Reward','HITs Available','TO pay',"Accept HIT"];
  818. var row = text_area.createTHead().insertRow(0);
  819. text_area.caption.style.fontWeight = 800;
  820. text_area.caption.style.color = BROWN;
  821. if (default_text_size > 10)
  822. text_area.cellPadding=Math.min(Math.max(1,Math.floor((default_text_size-10)/2)),5);
  823. //console.log(text_area.cellPadding);
  824. //text_area.cellPadding=2;
  825. text_area.caption.style.fontSize = 28;
  826. text_area.rows[0].style.fontWeight = 800;
  827. text_area.rows[0].style.color = BROWN;
  828. for (i=0; i<col_heads.length; i++)
  829. {
  830. var this_cell = row.insertCell(i);
  831. this_cell.innerHTML = col_heads[i];
  832. this_cell.style.fontSize = 14;
  833. if (i > 1)
  834. this_cell.style.textAlign = 'center';
  835. }
  836. control_panel.appendChild(text_area);
  837. }
  838.  
  839. /********HIT EXPORT ADDITIONS*****/
  840.  
  841. var EDIT = false;
  842. var HIT;
  843.  
  844. var TO_BASE = "http://turkopticon.ucsd.edu/";
  845. var API_BASE = "https://api.turkopticon.istrack.in/";
  846. var API_URL = API_BASE + "multi-attrs.php?ids=";
  847. DEFAULT_TEMPLATE = '[table][tr][td][b]Title:[/b] [url={prev_link}][COLOR=blue]{title}[/COLOR][/url]\n';
  848. DEFAULT_TEMPLATE += '[b]Requester:[/b] [url=https://www.mturk.com/mturk/searchbar?selectedSearchType=hitgroups&requesterId={rid}][COLOR=blue]{requester}[/COLOR][/url]';
  849. DEFAULT_TEMPLATE += ' [{rid}] ([url='+TO_BASE+'{rid}][COLOR=blue]TO[/COLOR][/url])';
  850. DEFAULT_TEMPLATE += '\n[b]TO Ratings:[/b]{to_stuff}';
  851. DEFAULT_TEMPLATE += '\n[b]Description:[/b] {description}';
  852. DEFAULT_TEMPLATE += '\n[b]Time:[/b] {time}';
  853. DEFAULT_TEMPLATE += '\n[b]Reward:[/b] [COLOR=green][b]{reward}[/b][/COLOR]';
  854. DEFAULT_TEMPLATE += '\n[b]Qualifications:[/b] {quals}[/td][/tr][/table]';
  855.  
  856. var TEMPLATE;
  857. var EASYLINK;
  858.  
  859. if (typeof GM_getValue === 'undefined')
  860. TEMPLATE = null;
  861. else {
  862. TEMPLATE = GM_getValue('HITScraper Template');
  863. EASYLINK = GM_getValue('HITScraper Easylink');
  864. }
  865. if (TEMPLATE == null) {
  866. TEMPLATE = DEFAULT_TEMPLATE;
  867. }
  868.  
  869. function buildXhrUrl(rai) {
  870. var url = API_URL;
  871. var ri = rai;
  872. url += rai;
  873. return url;
  874. }
  875.  
  876. function makeXhrQuery(url) {
  877. var xhr = new XMLHttpRequest();
  878. try{
  879. xhr.open('GET', url, false);
  880. xhr.send(null);
  881. return $.parseJSON(xhr.response);
  882. }
  883. catch(err){
  884. return "TO DOWN";
  885. }
  886. }
  887.  
  888. function getNamesForEmptyResponses(rai, resp) {
  889. for (var rid in rai) {
  890. if (rai.hasOwnProperty(rid) && resp[rid] == "") {
  891. resp[rid] = $.parseJSON('{"name": "' + rai[rid][0].innerHTML + '"}');
  892. }
  893. }
  894. return resp;
  895. }
  896.  
  897. function getKeys(obj) {
  898. var keys = [];
  899. for (var key in obj) {
  900. keys.push(key);
  901. }
  902. return keys;
  903. }
  904.  
  905. function export_func_deleg(item,index) {
  906. //console.log(item);
  907. export_func(item);
  908. }
  909.  
  910. function block_deleg(item,index) {
  911. //console.log(item);
  912. block(item);
  913. }
  914.  
  915. function block(hit){
  916. var requester = hit["requester"];
  917. ignore_list.push(requester);
  918. GM_setValue("scraper_ignore_list",ignore_list);
  919. console.log(GM_getValue("scraper_ignore_list"));
  920. alert(requester+" ignored. Re-scrape");
  921. }
  922.  
  923. function export_func(item) {
  924. HIT = item;
  925. edit_button.textContent = 'Edit Template';
  926. apply_template(item);
  927. div.style.display = 'block';
  928. textarea.select();
  929. }
  930.  
  931. function apply_template(hit_data) {
  932. var txt = TEMPLATE;
  933.  
  934. var vars = ['title', 'requester', 'rid', 'description', 'reward', 'quals', 'prev_link', 'time', 'hits', 'to_stuff', 'to_text'];
  935.  
  936. var resp = null;
  937. if (txt.indexOf('{to_text}') >= 0 || txt.indexOf('{to_stuff}') >= 0){
  938. var url = buildXhrUrl(hit_data["rid"]);
  939. resp = makeXhrQuery(url);
  940. //console.log(resp);
  941. }
  942. var toText = "";
  943. var toStuff = "";
  944. var toData = "";
  945. var numResp = (resp == null || resp == "TO DOWN" ? "n/a" : resp[hit_data["rid"]].reviews);
  946. if (resp == "TO DOWN"){
  947. toStuff = " [URL=\""+TO_BASE+hit_data['rid']+"\"]TO down.[/URL]";
  948. toText = toStuff;
  949. }
  950. else if (resp == null || resp[hit_data["rid"]].attrs == null && resp != "TO DOWN") {
  951. toStuff = " No TO ";
  952. toText = " No TO ";
  953. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  954. toStuff += "(Submit a new TO rating for this requester)[/URL]";
  955. }
  956. else {
  957. for (var key in resp[hit_data["rid"]].attrs) {
  958. //toText += "\n[*]"+key+": "+resp[hit_data["requesterId"]].attrs[key]+"\n";
  959. var i = 0;
  960. var color = "green";
  961. var name = key;
  962. var num = Math.floor(resp[hit_data["rid"]].attrs[key]);
  963. switch (key){
  964. case "comm":
  965. name = "Communicativity";
  966. break;
  967. case "pay":
  968. name = "Generosity";
  969. break;
  970. case "fast":
  971. name = "Promptness";
  972. break;
  973. case "fair":
  974. name = "Fairness";
  975. break;
  976. default:
  977. name = key;
  978. break;
  979. }
  980. switch (num){
  981. case 0:
  982. color = "red";
  983. break;
  984. case 1:
  985. color = "red";
  986. break;
  987. case 2:
  988. color = "orange";
  989. break;
  990. case 3:
  991. color = "yellow";
  992. break;
  993. default:
  994. break;
  995. }
  996. toText += (num > 0 ? "\n[color="+color+"]" : "\n");
  997. for (i; i < num; i++){
  998. toText += "[b]"+symbol+"[/b]"
  999. }
  1000. toText += (num > 0 ? "[/color]" : "")
  1001. if (i < 5){
  1002. toText += "[color=white]";
  1003. for (i; i < 5; i++)
  1004. toText += "[b]"+symbol+"[/b]";
  1005. toText += "[/color]";
  1006. }
  1007. toText += " "+Number(resp[hit_data["rid"]].attrs[key]).toFixed(2)+" "+name;
  1008. toData += Number(resp[hit_data["rid"]].attrs[key]).toFixed(2) + ",";
  1009. }
  1010. //toText += "[/list]";
  1011. toText += (txt.indexOf('{to_stuff}') >= 0 ? "" : "\nNumber of Reviews: "+numResp+"\n[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"](Submit a new TO rating for this requester)[/URL]");
  1012. toStuff = '\n[img]http://data.istrack.in/to/' + toData.slice(0,-1) + '.png[/img]';
  1013. toStuff += (txt.indexOf('{to_stuff}') >= 0 ? (txt.indexOf('{to_text}') >= 0 ? "" : toText) : "");
  1014. toStuff += "\nNumber of Reviews: "+numResp;
  1015. toStuff += "[URL=\""+TO_BASE+"report?requester[amzn_id]=" + hit_data['rid'] + "&requester[amzn_name]=" + hit_data['requester'] + "\"]";
  1016. toStuff += "\n(Submit a new TO rating for this requester)[/URL]";
  1017. }
  1018. for (var i = 0; i < vars.length; i++) {
  1019. t = new RegExp('\{' + vars[i] + '\}', 'g');
  1020. if (vars[i] == "to_stuff") {
  1021. txt = txt.replace(t, toStuff);
  1022. }
  1023. else if (vars[i] == "to_text"){
  1024. txt = txt.replace(t, toText);
  1025. }
  1026. else if (vars[i] == "prev_link"){
  1027. txt = txt.replace(t,"https://www.mturk.com"+hit_data[vars[i]]);
  1028. }
  1029. else if (vars[i] == "acc_link"){
  1030. txt = txt.replace(t,"https://www.mturk.com"+hit_data[vars[i]]);
  1031. }
  1032. else
  1033. txt = txt.replace(t, hit_data[vars[i]]);
  1034. }
  1035. textarea.value = txt;
  1036. }
  1037.  
  1038. function hide_func(div) {
  1039. if (EDIT == false)
  1040. div.style.display = 'none';
  1041. }
  1042.  
  1043. function edit_func() {
  1044. if (EDIT == true) {
  1045. EDIT = false;
  1046. TEMPLATE = textarea.value;
  1047. edit_button.textContent = 'Edit Template';
  1048. apply_template(HIT);
  1049. }
  1050. else {
  1051. console.log("Editing");
  1052. EDIT = true;
  1053. edit_button.textContent = 'Show Changes';
  1054. save_button.disabled = false;
  1055. textarea.value = TEMPLATE;
  1056. }
  1057. }
  1058.  
  1059. function default_func() {
  1060. GM_deleteValue('HITScraper Template');
  1061. TEMPLATE = DEFAULT_TEMPLATE;
  1062. EDIT = false;
  1063. edit_button.textContent = 'Edit Template';
  1064. apply_template(HIT);
  1065. }
  1066.  
  1067. function save_func() {
  1068. if (EDIT)
  1069. TEMPLATE = textarea.value;
  1070. GM_setValue('HITScraper Template', TEMPLATE);
  1071. }
  1072.  
  1073. var div = document.createElement('div');
  1074. var textarea = document.createElement('textarea');
  1075. var div2 = document.createElement('label');
  1076.  
  1077. div.style.position = 'fixed';
  1078. div.style.width = '500px';
  1079. div.style.height = '235px';
  1080. div.style.left = '50%';
  1081. div.style.right = '50%';
  1082. div.style.margin = '-250px 0px 0px -250px';
  1083. div.style.top = '300px';
  1084. div.style.padding = '5px';
  1085. div.style.border = '2px';
  1086. div.style.backgroundColor = 'black';
  1087. div.style.color = 'white';
  1088. div.style.zIndex = '100';
  1089.  
  1090. textarea.style.padding = '2px';
  1091. textarea.style.width = '500px';
  1092. textarea.style.height = '200px';
  1093. textarea.title = '{title}\n{requester}\n{rid}\n{description}\n{reward}\n{quals}\n{prev_link}\n{time}\n{hit}\n{to_stuff}\n{to_text}';
  1094.  
  1095. div.textContent = 'Press Ctrl+C to copy to clipboard. Click textarea to close';
  1096. div.style.fontSize = '12px';
  1097. div.appendChild(textarea);
  1098.  
  1099. var edit_button = document.createElement('button');
  1100. var save_button = document.createElement('button');
  1101. var default_button = document.createElement('button');
  1102. var easy_button = document.createElement('button');
  1103.  
  1104. edit_button.textContent = 'Edit Template';
  1105. edit_button.setAttribute('id', 'edit_button');
  1106. edit_button.style.height = '18px';
  1107. edit_button.style.width = '100px';
  1108. edit_button.style.fontSize = '10px';
  1109. edit_button.style.paddingLeft = '3px';
  1110. edit_button.style.paddingRight = '3px';
  1111. edit_button.style.backgroundColor = 'white';
  1112.  
  1113. save_button.textContent = 'Save Template';
  1114. save_button.setAttribute('id', 'save_button');
  1115. save_button.style.height = '18px';
  1116. save_button.style.width = '100px';
  1117. save_button.style.fontSize = '10px';
  1118. save_button.style.paddingLeft = '3px';
  1119. save_button.style.paddingRight = '3px';
  1120. save_button.style.backgroundColor = 'white';
  1121. save_button.style.marginLeft = '5px';
  1122.  
  1123. easy_button.textContent = 'Change Adfly Url';
  1124. easy_button.setAttribute('id', 'easy_button');
  1125. easy_button.style.height = '18px';
  1126. easy_button.style.width = '100px';
  1127. easy_button.style.fontSize = '10px';
  1128. easy_button.style.paddingLeft = '3px';
  1129. default_button.textContent = ' D ';
  1130. default_button.setAttribute('id', 'default_button');
  1131. default_button.style.height = '18px';
  1132. default_button.style.width = '20px';
  1133. default_button.style.fontSize = '10px';
  1134. default_button.style.paddingLeft = '3px';
  1135. default_button.style.paddingRight = '3px';
  1136. default_button.style.backgroundColor = 'white';
  1137. default_button.style.marginLeft = '5px';
  1138. default_button.title = 'Return default template';
  1139. div.appendChild(edit_button);
  1140. div.appendChild(save_button);
  1141. div.appendChild(default_button);
  1142. div.appendChild(easy_button);
  1143. save_button.disabled = true;
  1144.  
  1145. div.style.display = 'none';
  1146. textarea.addEventListener("click", function() {hide_func(div);}, false);
  1147. edit_button.addEventListener("click", function() {edit_func();}, false);
  1148. save_button.addEventListener("click", function() {save_func();}, false);
  1149. default_button.addEventListener("click", function() {default_func();}, false);
  1150. document.body.insertBefore(div, document.body.firstChild);