CH Block Using HIT Scraper's Blocklist

Block requesters and HITs on regular MTurk search results pages using your blocklist from 'HIT Scraper With Export'. Also highlights favorite requesters from your includelist.

  1. // ==UserScript==
  2. // @name CH Block Using HIT Scraper's Blocklist
  3. // @description Block requesters and HITs on regular MTurk search results pages using your blocklist from 'HIT Scraper With Export'. Also highlights favorite requesters from your includelist.
  4. // @version 3.0c
  5. // @author clickhappier
  6. // @namespace clickhappier
  7. // @include https://www.mturk.com/mturk/findhits*
  8. // @include https://www.mturk.com/mturk/viewhits*
  9. // @include https://www.mturk.com/mturk/sorthits*
  10. // @include https://www.mturk.com/mturk/searchbar*selectedSearchType=hitgroups*
  11. // @include https://www.mturk.com/mturk/viewsearchbar*selectedSearchType=hitgroups*
  12. // @include https://www.mturk.com/mturk/sortsearchbar*HITGroup*
  13. // @include https://www.mturk.com/mturk/preview*
  14. // @include https://www.mturk.com/mturk/accept*
  15. // @include https://www.mturk.com/mturk/return*
  16. // @include https://www.mturk.com/mturk/submit*
  17. // @exclude https://www.mturk.com/*hit_scraper*
  18. // @require http://code.jquery.com/jquery-latest.min.js
  19. // @grant GM_log
  20. // ==/UserScript==
  21.  
  22.  
  23. // adaptations from Kerek+Tjololo's 'HIT Scraper WITH EXPORT': https://greasyfork.org/en/scripts/2002-hit-scraper-with-export
  24.  
  25.  
  26. // use localStorage instead of GM's storage
  27. //if (!this.GM_getValue || (this.GM_getValue.toString && this.GM_getValue.toString().indexOf("not supported")>-1)) { // these grants aren't declared, so the answer's always no
  28. this.GM_getValue = function(key,def) {
  29. return localStorage[key] || def;
  30. };
  31. this.GM_setValue = function(key,value) {
  32. return localStorage[key]=value;
  33. };
  34. this.GM_deleteValue = function(key) {
  35. return localStorage.removeItem(key);
  36. };
  37. //}
  38.  
  39.  
  40. // load ignore (block) list
  41. console.log("blocklist script loaded");
  42. var ignore_list;
  43. if ( !GM_getValue("scraper_ignore_list") )
  44. {
  45. GM_setValue("scraper_ignore_list","nothing blocked yet");
  46. }
  47. if ( GM_getValue("scraper_ignore_list") )
  48. {
  49. ignore_list = GM_getValue("scraper_ignore_list").split('^');
  50. // console.log(ignore_list);
  51. }
  52.  
  53. // check ignore list for requester name and HIT title (wildcard support from feihtality)
  54. function ignore_check(r,t){
  55. var tempList = ignore_list.map(function(item) { return item.toLowerCase().replace(/\s+/g," "); });
  56. var foundR = -1;
  57. var foundT = -1;
  58. var blockWilds = [], blockExact = [];
  59. blockExact = tempList.filter(function(item) { // separate glob patterns from literal strings
  60. if (item.search(".*?[*].*")) return true; else if (item.length > 1) {blockWilds.push(item); return false;}
  61. });
  62. // run default matching first
  63. foundR = blockExact.indexOf(r.toLowerCase().replace(/\s+/g," "));
  64. foundT = blockExact.indexOf(t.toLowerCase().replace(/\s+/g," "));
  65. // if no match, try globs
  66. if (foundR == -1 && foundT == -1) {
  67. for (var i=0; i<blockWilds.length; i++) {
  68. blockWilds[i] = blockWilds[i].replace(/([+${}[\](\)^|?.\\])/g, "\\$1"); // escape special characters
  69. blockWilds[i] = "^".concat(blockWilds[i].replace(/([^*]|^)[*](?!\*)/g, "$1.*").replace(/\*{2,}/g, function(s) { return s.replace(/\*/g, "\\*"); })).concat("$"); //set up wildcards and escape consecutive asterisks
  70. foundR = r.toLowerCase().replace(/\s+/g," ").search(blockWilds[i]);
  71. foundT = t.toLowerCase().replace(/\s+/g," ").search(blockWilds[i]);
  72. if (foundR != -1 || foundT != -1)
  73. break;
  74. }
  75. }
  76. var found = foundR == -1 && foundT == -1;
  77. return found; // returns false (making !(ignore_check(x,y)) true) if HIT should be blocked, returns true if it shouldn't be blocked
  78. }
  79.  
  80.  
  81. // load include list
  82. var include_list = [];
  83. if ( !GM_getValue("scraper_include_list") )
  84. {
  85. GM_setValue("scraper_include_list","nothing includelisted yet");
  86. }
  87. if ( GM_getValue("scraper_include_list") )
  88. {
  89. include_list = GM_getValue("scraper_include_list").split('^');
  90. // console.log(include_list);
  91. }
  92.  
  93. // check include list for requester name and HIT title
  94. function include_check(r,t)
  95. {
  96. var tempList = include_list.map(function(item) { return item.toLowerCase().replace(/\s+/g," "); });
  97. var foundR = -1;
  98. var foundT = -1;
  99. foundR = tempList.indexOf(r.toLowerCase().replace(/\s+/g," "));
  100. foundT = tempList.indexOf(t.toLowerCase().replace(/\s+/g," "));
  101. var found = foundR == -1 && foundT == -1;
  102. return found; // returns false (making !(include_check(x,y)) true) if HIT should be highlighted, returns true if it shouldn't be highlighted
  103. }
  104.  
  105.  
  106. // identify HITs, requesters, and titles
  107. var $requester = $('a[href^="/mturk/searchbar?selectedSearchType=hitgroups&requester"]');
  108. var $title = $('a[class="capsulelink"]');
  109. var $hitcapsule = $("table[width='100%'][cellspacing='0'][cellpadding='0'][border='0'][height='100%']").parent(); // using parent td for compatibility with 'mmmturkeybacon Color-Coded Search' / 'mmmturkeybacon Color-Coded Search with Checkpoints', which hides/shows the table inside the parent td
  110. console.log("HIT capsules identified: " + $hitcapsule.length);
  111.  
  112. // hide blocked hits
  113. var blockedcount = 0;
  114. var blockednames = "";
  115. function hideBlocked()
  116. {
  117. // reload lists
  118. if ( GM_getValue("scraper_ignore_list") ) { ignore_list = GM_getValue("scraper_ignore_list").split('^'); }
  119. if ( GM_getValue("scraper_include_list") ) { include_list = GM_getValue("scraper_include_list").split('^'); }
  120. console.log("starting to block, total HITs to check: " + $requester.length);
  121. blockedcount = 0;
  122. blockednames = "";
  123. for (var j = 0; j < $requester.length; j++)
  124. {
  125. var requester_name = $requester.eq(j).text().trim();
  126. var title = $title.eq(j).text().trim();
  127. console.log("HIT " + (j+1) + " detected. Requester: " + requester_name + ", Title: " + title);
  128. var hitcapsule = $hitcapsule.eq(j);
  129. // hide hit if requester name or hit title is in your blocklist
  130. if (!ignore_check(requester_name,title))
  131. {
  132. hitcapsule.css('border','red solid thick');
  133. hitcapsule.hide();
  134. blockedcount++;
  135. blockednames += requester_name + ", ";
  136. console.log("blocked HIT " + (j+1) );
  137. }
  138. // check includelist for favorite hits to highlight (green outline)
  139. else if (!include_check(requester_name,title))
  140. {
  141. hitcapsule.css('border','green dashed thick');
  142. hitcapsule.show();
  143. console.log("highlighted HIT " + (j+1) );
  144. }
  145. // reset display for hits no longer on blocklist or includelist
  146. else
  147. {
  148. hitcapsule.css('border','none');
  149. hitcapsule.show();
  150. }
  151. }
  152. console.log("Total HITs blocked: " + blockedcount);
  153. blockednames = blockednames.replace(/,\s*$/, ""); // remove final comma and space
  154. $('#showblocked').prop('title', blockednames); // update displayed list in show/hide link's mouseover text
  155. $('#showblocked').text("Show " + blockedcount + " Blocked"); // update displayed block count
  156. }
  157.  
  158. $(document).ready(hideBlocked()); // initiate hiding first time when page loads
  159.  
  160. // unhide blocked hits
  161. function showBlocked(){
  162. console.log("starting to un-hide");
  163. for (var j = 0; j < $requester.length; j++){
  164. var hitcapsule = $hitcapsule.eq(j);
  165. hitcapsule.show();
  166. }
  167. }
  168.  
  169. // open blocklist editor
  170. var edit_blocks = document.createElement("span");
  171. edit_blocks.innerHTML = '<a href="#" class="footer_links" id="blocklist_edit_link" title="Blocklist = Disliked requester names and HIT titles to be hidden/ignored, and displayed with a red solid border when unhidden.">Edit Blocklist</a>';
  172. edit_blocks.onclick = function(){
  173. // console.log("opened blocklist editor");
  174. ignore_list = GM_getValue("scraper_ignore_list").split('^');
  175. var textarea = $("#blocklist_text");
  176. var text = "";
  177. for (var i = 0; i < ignore_list.length; i++){
  178. text += ignore_list[i]+"^";
  179. }
  180. textarea.val(text.substring(0, text.length - 1));
  181. $("#blocklist_div").show();
  182. };
  183.  
  184. // show/hide blocked hits
  185. var showAllBlocked = document.createElement("span");
  186. showAllBlocked.innerHTML = '<a href="#" class="footer_links" id="showblocked" title="' + blockednames + '">Show ' + blockedcount + ' Blocked</a>';
  187. showAllBlocked.onclick = function(){
  188. if ( document.getElementById('showblocked').innerHTML.indexOf("Show") > -1 ) {
  189. console.log("Un-hiding blocked hits - " + document.getElementById('showblocked').innerHTML );
  190. showBlocked();
  191. document.getElementById('showblocked').innerHTML = "Hide " + blockedcount + " Blocked";
  192. }
  193. else if ( document.getElementById('showblocked').innerHTML.indexOf("Hide") > -1 ) {
  194. console.log("Re-hiding blocked hits - " + document.getElementById('showblocked').innerHTML );
  195. hideBlocked();
  196. document.getElementById('showblocked').innerHTML = "Show " + blockedcount + " Blocked";
  197. }
  198. };
  199.  
  200. // open includelist editor
  201. var edit_includes = document.createElement("span");
  202. edit_includes.innerHTML = '<a href="#" class="footer_links" id="includelist_edit_link" title="Includelist = Favorite requester names and HIT titles to be displayed with a green dashed border to make them easy to spot.">Edit Includelist</a>';
  203. edit_includes.onclick = function(){
  204. // console.log("opened includelist editor");
  205. include_list = GM_getValue("scraper_include_list").split('^');
  206. var textarea = $("#includelist_text");
  207. var text = "";
  208. for (var i = 0; i < include_list.length; i++){
  209. text += include_list[i]+"^";
  210. }
  211. textarea.val(text.substring(0, text.length - 1));
  212. $("#includelist_div").show();
  213. };
  214.  
  215. // add edit and show/hide links to regular search results pages
  216. var blocklinksDivider = '&nbsp;&nbsp;<font color="#9ab8ef">|</font>&nbsp;&nbsp;';
  217. if ( document.location.href.indexOf('?last_hits_previewed') < 0 ) {
  218. $('#collapseall').eq(0).after("<br>", edit_blocks, blocklinksDivider, showAllBlocked, blocklinksDivider, edit_includes);
  219. // collapseAll.parentNode.insertBefore(showAllBlocked, collapseAll.nextSibling);
  220. // collapseAll.parentNode.insertBefore(edit_blocks, collapseAll.nextSibling);
  221. }
  222. else { // add edit and show/hide links to last_hits_previewed page
  223. // edit_blocks.innerHTML = edit_blocks.innerHTML.replace(blocklinksDivider, '');
  224. $("h1:contains('Last HITs Previewed')").eq(0).after(edit_blocks, blocklinksDivider, showAllBlocked, blocklinksDivider, edit_includes, "<br><br>");
  225. }
  226.  
  227.  
  228. // For editing the blocklist
  229. var blocklistdiv = document.createElement('div');
  230. var blocklisttextarea = document.createElement('textarea');
  231.  
  232. blocklistdiv.style.position = 'fixed';
  233. blocklistdiv.style.width = '500px';
  234. blocklistdiv.style.height = '255px';
  235. blocklistdiv.style.left = '50%';
  236. blocklistdiv.style.right = '50%';
  237. blocklistdiv.style.margin = '-250px 0px 0px -250px';
  238. blocklistdiv.style.top = '300px';
  239. blocklistdiv.style.padding = '5px';
  240. blocklistdiv.style.border = '2px';
  241. blocklistdiv.style.backgroundColor = 'black';
  242. blocklistdiv.style.color = 'white';
  243. blocklistdiv.style.zIndex = '100';
  244. blocklistdiv.setAttribute('id','blocklist_div');
  245. blocklistdiv.style.display = 'none';
  246.  
  247. blocklisttextarea.style.padding = '2px';
  248. blocklisttextarea.style.width = '500px';
  249. blocklisttextarea.style.height = '180px';
  250. blocklisttextarea.title = 'Block list';
  251. blocklisttextarea.setAttribute('id','blocklist_text');
  252.  
  253. blocklistdiv.textContent = 'This BLOCKLIST (ignored requesters/HITs) is shared with HIT Scraper With Export. Separate requester names and HIT titles with the ^ character. After clicking "Save", changes will be immediately applied in this tab (for other tabs to reflect the changes, refresh them or click their show/hide links twice).';
  254. blocklistdiv.style.fontSize = '12px';
  255. blocklistdiv.appendChild(blocklisttextarea);
  256.  
  257. var save_BLbutton = document.createElement('button');
  258. var cancel_BLbutton = document.createElement('button');
  259.  
  260. save_BLbutton.textContent = 'Save';
  261. save_BLbutton.setAttribute('id', 'save_BLblocklist');
  262. save_BLbutton.style.height = '18px';
  263. save_BLbutton.style.width = '100px';
  264. save_BLbutton.style.fontSize = '10px';
  265. save_BLbutton.style.paddingLeft = '3px';
  266. save_BLbutton.style.paddingRight = '3px';
  267. save_BLbutton.style.backgroundColor = 'white';
  268. save_BLbutton.style.marginLeft = '5px';
  269.  
  270. cancel_BLbutton.textContent = 'Cancel';
  271. cancel_BLbutton.setAttribute('id', 'cancel_BLblocklist');
  272. cancel_BLbutton.style.height = '18px';
  273. cancel_BLbutton.style.width = '100px';
  274. cancel_BLbutton.style.fontSize = '10px';
  275. cancel_BLbutton.style.paddingLeft = '3px';
  276. cancel_BLbutton.style.paddingRight = '3px';
  277. cancel_BLbutton.style.backgroundColor = 'white';
  278. cancel_BLbutton.style.marginLeft = '5px';
  279.  
  280. blocklistdiv.appendChild(save_BLbutton);
  281. blocklistdiv.appendChild(cancel_BLbutton);
  282. document.body.insertBefore(blocklistdiv, document.body.firstChild);
  283.  
  284. // save and cancel for blocklist
  285. function save_BLblocklist() {
  286. // console.log("Save blocklist");
  287. var textarea = $("#blocklist_text");
  288. var text = textarea.val();
  289. var temp_block_list = text.split("^");
  290. var trimmed_list = [];
  291. for (var requester in temp_block_list){
  292. if (temp_block_list[requester].trim().length !== 0)
  293. trimmed_list.push(temp_block_list[requester].toLowerCase().trim());
  294. }
  295. // console.log(trimmed_list);
  296. GM_setValue("scraper_ignore_list",trimmed_list.join('^'));
  297. ignore_list = GM_getValue("scraper_ignore_list").split('^');
  298. // console.log("Save blocklist complete: ");
  299. // console.log(ignore_list);
  300. $("#blocklist_div").hide();
  301. // apply changes to current page
  302. hideBlocked();
  303. }
  304. save_BLbutton.addEventListener("click", function(){ save_BLblocklist(); }, false);
  305. cancel_BLbutton.addEventListener("click", function(){
  306. // reset textarea contents upon cancel
  307. ignore_list = GM_getValue("scraper_ignore_list").split('^');
  308. var textarea = $("#blocklist_text");
  309. var text = "";
  310. for (var i = 0; i < ignore_list.length; i++){
  311. text += ignore_list[i]+"^";
  312. }
  313. textarea.val(text.substring(0, text.length - 1));
  314. // close editor
  315. $("#blocklist_div").hide();
  316. }, false);
  317.  
  318.  
  319. // For editing the includelist
  320. var includelistdiv = document.createElement('div');
  321. var includelisttextarea = document.createElement('textarea');
  322.  
  323. includelistdiv.style.position = 'fixed';
  324. includelistdiv.style.width = '500px';
  325. includelistdiv.style.height = '255px';
  326. includelistdiv.style.left = '50%';
  327. includelistdiv.style.right = '50%';
  328. includelistdiv.style.margin = '-250px 0px 0px -250px';
  329. includelistdiv.style.top = '300px';
  330. includelistdiv.style.padding = '5px';
  331. includelistdiv.style.border = '2px';
  332. includelistdiv.style.backgroundColor = 'black';
  333. includelistdiv.style.color = 'white';
  334. includelistdiv.style.zIndex = '100';
  335. includelistdiv.setAttribute('id','includelist_div');
  336. includelistdiv.style.display = 'none';
  337.  
  338. includelisttextarea.style.padding = '2px';
  339. includelisttextarea.style.width = '500px';
  340. includelisttextarea.style.height = '180px';
  341. includelisttextarea.title = 'Include list';
  342. includelisttextarea.setAttribute('id','includelist_text');
  343.  
  344. includelistdiv.textContent = 'This INCLUDELIST (favorite requesters/HITs) is shared with HIT Scraper With Export. Separate requester names and HIT titles with the ^ character. After clicking "Save", changes will be immediately applied in this tab (for other tabs to reflect the changes, refresh them or click their show/hide links twice).';
  345. includelistdiv.style.fontSize = '12px';
  346. includelistdiv.appendChild(includelisttextarea);
  347.  
  348. var save_ILbutton = document.createElement('button');
  349. var cancel_ILbutton = document.createElement('button');
  350.  
  351. save_ILbutton.textContent = 'Save';
  352. save_ILbutton.setAttribute('id', 'save_ILincludelist');
  353. save_ILbutton.style.height = '18px';
  354. save_ILbutton.style.width = '100px';
  355. save_ILbutton.style.fontSize = '10px';
  356. save_ILbutton.style.paddingLeft = '3px';
  357. save_ILbutton.style.paddingRight = '3px';
  358. save_ILbutton.style.backgroundColor = 'white';
  359. save_ILbutton.style.marginLeft = '5px';
  360.  
  361. cancel_ILbutton.textContent = 'Cancel';
  362. cancel_ILbutton.setAttribute('id', 'cancel_ILincludelist');
  363. cancel_ILbutton.style.height = '18px';
  364. cancel_ILbutton.style.width = '100px';
  365. cancel_ILbutton.style.fontSize = '10px';
  366. cancel_ILbutton.style.paddingLeft = '3px';
  367. cancel_ILbutton.style.paddingRight = '3px';
  368. cancel_ILbutton.style.backgroundColor = 'white';
  369. cancel_ILbutton.style.marginLeft = '5px';
  370.  
  371. includelistdiv.appendChild(save_ILbutton);
  372. includelistdiv.appendChild(cancel_ILbutton);
  373. document.body.insertBefore(includelistdiv, document.body.firstChild);
  374.  
  375. // save and cancel for includelist
  376. function save_ILincludelist() {
  377. // console.log("Save includelist");
  378. var textarea = $("#includelist_text");
  379. var text = textarea.val();
  380. var temp_include_list = text.split("^");
  381. var trimmed_list = [];
  382. for (var requester in temp_include_list){
  383. if (temp_include_list[requester].trim().length !== 0)
  384. trimmed_list.push(temp_include_list[requester].toLowerCase().trim());
  385. }
  386. // console.log(trimmed_list);
  387. GM_setValue("scraper_include_list",trimmed_list.join('^'));
  388. include_list = GM_getValue("scraper_include_list").split('^');
  389. // console.log("Save includelist complete: ");
  390. // console.log(include_list);
  391. $("#includelist_div").hide();
  392. // apply changes to current page
  393. hideBlocked();
  394. }
  395. save_ILbutton.addEventListener("click", function(){ save_ILincludelist(); }, false);
  396. cancel_ILbutton.addEventListener("click", function(){
  397. // reset textarea contents upon cancel
  398. include_list = GM_getValue("scraper_include_list").split('^');
  399. var textarea = $("#includelist_text");
  400. var text = "";
  401. for (var i = 0; i < include_list.length; i++){
  402. text += include_list[i]+"^";
  403. }
  404. textarea.val(text.substring(0, text.length - 1));
  405. // close editor
  406. $("#includelist_div").hide();
  407. }, false);
  408.  
  409.  
  410. // Buttons - with help from kadauchi
  411. for ( var i = 0; i < ($hitcapsule.length); i++ )
  412. {
  413. var ButtonXTitle = document.createElement("button");
  414. ButtonXTitle.innerHTML = "X Title";
  415. ButtonXTitle.title = "Add HIT title to blocklist.";
  416. ButtonXTitle.value = $("a[class='capsulelink']").eq(i).text().trim();
  417. ButtonXTitle.style.width = "44px";
  418. ButtonXTitle.style.height = "16px";
  419. ButtonXTitle.style.fontSize = "10px";
  420. ButtonXTitle.style.fontWeight= "bolder";
  421. ButtonXTitle.style.border = "2px solid";
  422. ButtonXTitle.style.marginLeft = "5px";
  423. ButtonXTitle.style.padding = "0px";
  424. ButtonXTitle.style.backgroundColor = "transparent";
  425. ButtonXTitle.addEventListener("click",function(){
  426. var Title = $(this).val().toLowerCase();
  427. if (!ignore_check("placeholderxyz",Title)) // if already on blocklist
  428. {
  429. window.alert("This HIT title \""+Title+"\" is already in your blocklist. To unblock it, use 'Edit Blocklist'.");
  430. }
  431. else
  432. {
  433. var Confirm = confirm("Do you really want to block HITs matching HIT title \""+Title+"\"?");
  434. if (Confirm)
  435. {
  436. GM_setValue("scraper_ignore_list", GM_getValue("scraper_ignore_list")+"^"+Title);
  437. hideBlocked();
  438. }
  439. }
  440. });
  441. $("a[class='capsulelink']").eq(i).after(ButtonXTitle);
  442.  
  443. var ButtonXReq = document.createElement("button");
  444. ButtonXReq.innerHTML = "X Req";
  445. ButtonXReq.title = "Add requester name to blocklist.";
  446. ButtonXReq.value = $("span[class='requesterIdentity']").eq(i).text().trim();
  447. ButtonXReq.style.width = "44px";
  448. ButtonXReq.style.height = "16px";
  449. ButtonXReq.style.fontSize = "10px";
  450. ButtonXReq.style.fontWeight= "bolder";
  451. ButtonXReq.style.border = "2px solid";
  452. ButtonXReq.style.marginLeft = "5px";
  453. ButtonXReq.style.padding = "0px";
  454. ButtonXReq.style.backgroundColor = "transparent";
  455. ButtonXReq.addEventListener("click",function(){
  456. var Req = $(this).val().toLowerCase();
  457. if (!ignore_check(Req,"placeholderxyz")) // if already on blocklist
  458. {
  459. window.alert("This requester name \""+Req+"\" is already in your blocklist. To unblock it, use 'Edit Blocklist'.");
  460. }
  461. else
  462. {
  463. var Confirm = confirm("Do you really want to block HITs matching requester name \""+Req+"\"?");
  464. if (Confirm)
  465. {
  466. GM_setValue("scraper_ignore_list", GM_getValue("scraper_ignore_list")+"^"+Req);
  467. hideBlocked();
  468. }
  469. }
  470. });
  471. $("a[class='capsulelink']").eq(i).after(ButtonXReq);
  472. }