pakku.user.js

GreaseMonkey version of pakku.js.

// ==UserScript==
// @name        pakku.user.js
// @description GreaseMonkey version of pakku.js.
// @include     *://*.bilibili.com/*
// @version     1.1
// @grant       none
// @namespace me.fts.pakkujs
// ==/UserScript==

(function(){

// Configuration
// I am too lazy to add configuration UI :)

THRESHOLD=20;
TRIM_ENDING=true;
TAOLUS=[[/^23{2,}$/,"233..."],[/^6{3,}$/,"666..."],[/^[fF]+$/,"FFF..."],[/^[hH]+$/,"hhh..."]];
REMOVE_SEEK=true;
PROC_TYPE7=true;
MAX_COSINE=80;
MAX_DIST=3;
TRIM_ENDING=true;
TRIM_SPACE=true;
WHITELIST=[];
DANMU_MARK='suffix';
ENLARGE=true;

//Unused in script:
/*
DANMU_BADGE
FLASH_NOTIF
POPUP_BADGE
//*/

function fromholyjson(txt) {
	var item=JSON.parse(txt);
	for(var i in item)
		item[i][0]=RegExp(item[i][0]);
	return item;
}
function toholyjson(obj) {
	var item=[];
	for(var i in obj)
		item.push([obj[i][0].source,obj[i][1]]);
	return JSON.stringify(item);
}




// Copied from edit_distance.js

var ed_counts = new Int16Array (0x10ffff);
var ed_a = new Int16Array (1048576);
var ed_b = new Int16Array (1048576);
var ed_t = new Int32Array (2048);

var MIN_DANMU_SIZE=10;

function hash(a, b) {
	return ((a<<10)^b)&1048575;
}

function edit_distance (P, Q) {
	'use strict';
	// TODO: Make this less hacky

	if (P.length + Q.length < MIN_DANMU_SIZE)
		return (MAX_DIST + 1) * +(P != Q);

	for (var i = 0; i < P.length; i ++) ed_counts [P.charCodeAt (i)] ++;
	for (var i = 0; i < Q.length; i ++) ed_counts [Q.charCodeAt (i)] --;

	var ans = 0;

	for (var i = 0; i < P.length; i ++) {
		ans += Math.abs (ed_counts[P.charCodeAt (i)]);
		ed_counts[P.charCodeAt (i)] = 0;
	}

	for (var i = 0; i < Q.length; i ++) {
		ans += Math.abs (ed_counts[Q.charCodeAt (i)]);
		ed_counts[Q.charCodeAt (i)] = 0;
	}

	return ans;
}

function cosine_distance (P, Q) {

	'use strict';

	var ed_t_p = 0;

	ed_a[hash(P.charCodeAt(P.length - 1), P.charCodeAt(0))] = 1;
	ed_b[hash(Q.charCodeAt(Q.length - 1), Q.charCodeAt(0))] = 1;

	ed_t[ed_t_p++] = hash(P.charCodeAt(P.length - 1), P.charCodeAt(0));
	ed_t[ed_t_p++] = hash(Q.charCodeAt(Q.length - 1), Q.charCodeAt(0));
	for (var i = 0; i < P.length - 1; i++) {
		var h1 = hash(P.charCodeAt(i), P.charCodeAt(i + 1));
		ed_t[ed_t_p++] = h1;
		ed_a[h1] += 1;
	}
	for (var i = 0; i < Q.length - 1; i++) {
		var h1 = hash(Q.charCodeAt(i), Q.charCodeAt(i + 1));
		ed_t[ed_t_p++] = h1;
		ed_b[h1] += 1;
	}
	var data = Array();

	var x = 0, y = 0, z = 0;

	for (var i = 0; i < ed_t_p; i++) {
		var h1 = ed_t[i];
		if (ed_a[h1]) {
			y += ed_a[h1] * ed_a[h1];
			if (ed_b[h1]) {
                x += ed_a[h1] * ed_b[h1];
                z += ed_b[h1] * ed_b[h1];
				ed_b[h1] = 0;
			}
			ed_a[h1] = 0;
		}
		else {
			if (ed_b[h1]) {
                z += ed_b[h1] * ed_b[h1];
				ed_b[h1] = 0;
			};
			
		}
	}
	//console.log(x,y,z);
	return x * x / y / z;
}

function similar(P,Q) {
	return edit_distance(P,Q)<=MAX_DIST || cosine_distance(P,Q)*100>=MAX_COSINE;
}

// Copied from core.js

var trim_ending_re=/^(.+?)[\.。,,/\??!!~~@\^、+=\-_♂♀ ]*$/;
var trim_space_re=/[  ]/g;
var LOG_VERBOSE=true;

function parse(old_dom,tabid) {
	TAOLUS_len=TAOLUS.length;
	WHITELIST_len=WHITELIST.length;
	
	  //chrome.browserAction.setTitle({
	  //		title: '正在处理弹幕…', // if u can see this, pakku might not be working correctly :)
		//	  tabId: tabid
		//});
	
	console.time('parse');
	
	function enlarge(size,count) {
		return count<=10 ? size : Math.floor(size*Math.log10(count));
	}

	
	function make_mark(txt,cnt) {
		return DANMU_MARK=='suffix' ? txt+' [x'+cnt+']' :
			   DANMU_MARK=='prefix' ? '[x'+cnt+'] '+txt : txt;
	}
	
	function detaolu(text) {
		for(var i=0;i<TAOLUS_len;i++)
			if(TAOLUS[i][0].test(text))
				return TAOLUS[i][1];
		text = TRIM_ENDING ? text.replace(trim_ending_re,'$1') : text;
		text = TRIM_SPACE ? text.replace(trim_space_re,'') : text;
		return text;
	}
	
	function whitelisted(text) {
		for(var i=0;i<WHITELIST_len;i++)
			if(WHITELIST[i][0].test(text))
				return true;
		return false;
	}
	
	function ext_special_danmu(text) {
		try {
			return JSON.parse(text)[4];
		} catch(e) {
			return text;
		}
	}
	
	function build_text(elem) {
		var count=elem.count;
		var dumped=null;
		if(elem.mode=='7') // special danmu, need more parsing
			try {
				dumped=JSON.parse(elem.orig_str);
			} catch(e) {}
		
		if(dumped) {
			dumped[4]=count==1?dumped[4]:make_mark(elem.str,count);
			return JSON.stringify(dumped);
		} else // normal case
			return count==1?elem.orig_str:make_mark(elem.str,count);
	}

	var parser=new DOMParser();
	var dom=parser.parseFromString(old_dom,'text/xml');
	var new_dom=parser.parseFromString('<i></i>','text/xml');
	var i_elem=new_dom.childNodes[0];

	var danmus=[];
	[].slice.call(dom.childNodes[0].children).forEach(function(elem) {
		if(elem.tagName=='d') { // danmu
			var attr=elem.attributes['p'].value.split(',');
			var str=elem.childNodes[0] ? elem.childNodes[0].data : '';

			if(!PROC_TYPE7 && attr[1]=='7') // special danmu
				i_elem.appendChild(elem);
			else if(attr[1]=='8') { // code danmu
				if(REMOVE_SEEK && str.indexOf('Player.seek(')!=-1)
					elem.childNodes[0].data='/* player.seek filtered by pakku */';
				i_elem.appendChild(elem);
			} else if(whitelisted(str)) {
				i_elem.appendChild(elem);
			} else
				danmus.push({
					attr: attr, // thus we can build it into new_dom again
					str: attr[1]=='7' ? detaolu(ext_special_danmu(str)) : detaolu(str),
					time: parseFloat(attr[0]),
					orig_str: str,
					mode: attr[1],
					count: 1,
				});
		} else
			i_elem.appendChild(elem);
	});
	danmus.sort(function(x,y) {return x.time-y.time;});

	var danmu_chunk=Array();
	var last_time=0;
	var counter=0;

	function apply_item(dm) {
		counter+=dm.count-1;
		var d=new_dom.createElement('d');
		var tn=new_dom.createTextNode(build_text(dm));

		d.appendChild(tn);
		if(ENLARGE)
			dm.attr[2]=''+enlarge(parseInt(dm.attr[2]),dm.count);
		d.setAttribute('p',dm.attr.join(','));
		i_elem.appendChild(d);
	}
	
	danmus.forEach(function(dm) {
		while(danmu_chunk.length && dm.time-danmu_chunk[0].time>THRESHOLD)
			apply_item(danmu_chunk.shift());
		
		for(var i=0;i<danmu_chunk.length;i++) {
			if(similar(dm.str,danmu_chunk[i].str)) {
				if(LOG_VERBOSE) {
					if(edit_distance(dm.str,danmu_chunk[i].str)>MAX_DIST)
						console.log('cosine_dis',dm.str,'to',danmu_chunk[i].str);
					else
						console.log('edit_dis',dm.str,'to',danmu_chunk[i].str);
				}
				danmu_chunk[i].count++;
				return; // aka continue
			}
		}
		danmu_chunk.push(dm);
	});
	for(var i=0;i<danmu_chunk.length;i++)
		apply_item(danmu_chunk[i]);

	//setbadge((
	//        POPUP_BADGE=='count' ? ''+counter :
	//        POPUP_BADGE=='percent' ? (danmus.length ? (counter*100/danmus.length).toFixed(0)+'%' : '0%') :
	//        ''
	//    ),SUCCESS_COLOR,tabid
	//);
	//chrome.browserAction.setTitle({
	//    title: '已过滤 '+counter+'/'+danmus.length+' 弹幕',
	//    tabId: tabid
	//});
	var serializer=new XMLSerializer();
	console.timeEnd('parse');
	return [serializer.serializeToString(new_dom), new_dom];
}

// Hijacking XMLHttpRequest to make this script work

WINDOW = window;
if(WINDOW._fts_xhr)return;
WINDOW._fts_xhr = WINDOW.XMLHttpRequest;
WINDOW.XMLHttpRequest = function() {
	//cannot use apply directly since we want a 'new' version
	var wrapped = new(Function.prototype.bind.apply(WINDOW._fts_xhr, arguments));

	wrapped.addEventListener("readystatechange", function () {
		var r;
		try{
			if(wrapped.readyState == 4) {
				//console.log(wrapped);
				if( wrapped.responseURL.match(/^(https:|http:|)?(\/\/)?comment.bilibili.com\/.*\.xml/)) {
					console.log("processing danmoku", wrapped);
						r = parse(wrapped.response, 0);
						Object.defineProperty(wrapped, 'responseText', {
							writable: true
						});
						Object.defineProperty(wrapped, 'responseXML', {
							writable: true
						});
						Object.defineProperty(wrapped, 'response', {
							writable: true
						});
						wrapped.responseXML = r[1];
						wrapped.responseText = r[0];
						wrapped.response = r[0];
				}
			}
		} catch(e) {
			console.log(e, r);
		}
	});
	//*/

	return wrapped;
};
console.log("r1 loaded");


})();