// ==UserScript==
// @name PaperDownload & titleAsPDFName
// @namespace http://tampermonkey.net/
// @version 2.3
// @description 下载pdf论文并自动用文章标题重命名文件,点击左上角的save按钮或按 Ctrl+S 触发下载(按键下载有bug暂不可用)。Automatially download a pdf paper and rename it with the paper title. This script works on opened pdf pages, e.g. arxiv.org/pdf/xxx.pdf. Click save button on the top left corner or press Ctrl+S to trigger. Currently support pdf pages in arxiv.org, aclweb.org/anthology/, proceedings.mlr.press,openaccess.thecvf.com, openreview.net, and ieeexplore.ieee.org.
// @author LTGuo
// @include https://arxiv.org/pdf/*
// @include https://browse.arxiv.org/pdf/*
// @include https://aclweb.org/anthology/*
// @include https://www.aclweb.org/anthology/*
// @include http://proceedings.mlr.press/*.pdf
// @include http://openaccess.thecvf.com/*.pdf
// @include https://openreview.net/pdf?*
// @include https://ieeexplore.ieee.org/*
// @include https://papers.nips.cc/paper/*
// @grant GM_xmlhttpRequest
// @grant GM_download
// @grant GM_setClipboard
// @grant GM_notification
// @require https://greasyfork.org/scripts/383707-hotkeys-min-js/code/hotkeysminjs.js?version=702670
// ==/UserScript==
var filename = '';
function is_pdf(url){
return /pdf/i.test(url.slice(-3)) || document.querySelectorAll("embed")[0].type === "application/pdf";
}
function clean_text(text){
text = text.replace(/(\r\n\t|\n|\r\t)/gm,"");
return text;
}
function clean_title(title){
title = title.replace(/[<>:"/\\|?*]/g, " ");
title = title.replace(/\s\s+/g, ' ').trim();
return title;
}
// fectch pdf title for arxiv papers http://arxiv.org
// paper pdf url: http://arxiv.org/pdf/1411.4555.pdf
// paper info url: https://arxiv.org/abs/1411.4555
// match title: dc:title="Show and Tell: A Neural Image Caption Generator"
function get_title_arxiv(){
var url = window.location.href;
if (is_pdf(url)) {
var abs_url = url.replace('pdf', 'abs');
var url_ = url.split('/');
var yearmon = url_[url_.length-1].split('.')[0];
var year = yearmon.slice(0,2);
var mon = yearmon.slice(2,4);
console.log("year: "+year+", month: "+mon);
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /dc:title=(.*)trackback:ping/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
console.log("title: "+title);
filename = '['+year+'.'+mon+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for browse.arxiv papers http://browse.arxiv.org
// paper pdf url: http://browse.arxiv.org/pdf/1411.4555.pdf
// paper info url: https://browse.arxiv.org/abs/1411.4555
// match title: dc:title="Show and Tell: A Neural Image Caption Generator"
function get_title_browse_arxiv(){
var url = window.location.href;
if (is_pdf(url)) {
var abs_url = url.replace('pdf', 'abs');
var url_ = url.split('/');
var yearmon = url_[url_.length-1].split('.')[0];
var year = yearmon.slice(0,2);
var mon = yearmon.slice(2,4);
console.log("year: "+year+", month: "+mon);
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /dc:title=(.*)trackback:ping/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
console.log("title: "+title);
filename = '['+year+'.'+mon+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for ACL Anthology papers https://aclanthology.info/
// paper pdf url: https://aclweb.org/anthology/P18-1008, https://www.aclweb.org/anthology/D18-1049
// paper info url: https://aclanthology.info/papers/P18-1008/p18-1008 --> https://aclweb.org/anthology/papers/P/P18/P18-1008/
// match title: <meta content="The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation" name="citation_title" >
function get_title_acl(){
var url = window.location.href;
if (is_pdf(url)) {
var url_ = url.split('/');
var pid = url_[url_.length-1].split('.pdf')[0];
var abs_url = "https://aclanthology.info/papers/"+pid+'/'+pid.toLowerCase();
var year = pid.split('-')[0].slice(1,3);
console.log("Abs url: " + abs_url);
console.log("year: "+year);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /<meta content=\"(.*?)\" name=citation_title>/gm;
var conf_name_regex = /<a href=\/anthology\/venues\/.*?>(.*?)<\/a>/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = conf_name_regex.exec(text);
var conf_name = "ACL Anthology";
if (match !== null){
conf_name = match[1].trim();
}
console.log("title: "+title);
console.log("conf_name: "+conf_name);
filename = '['+conf_name+year+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for PMLR papers http://proceedings.mlr.press
// paper pdf url: http://proceedings.mlr.press/v70/jaderberg17a/jaderberg17a.pdf
// paper info url: http://proceedings.mlr.press/v70/jaderberg17a.html
// match title: <meta name="citation_title" content="Decoupled Neural Interfaces using Synthetic Gradients"/>
function get_title_pmlr(){
var url = window.location.href;
if (is_pdf(url)) {
var url_ = url.split('/');
var pid = url_[url_.length-1].split('.pdf')[0];
var abs_url = url.replace('/'+ url_[url_.length-1], '.html');
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /<meta name="citation_title" content="(.*?)"\/>/gm;
var conf_year_regex = /<meta name="citation_publication_date" content="(.*?)">/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = conf_year_regex.exec(text);
var year = "";
if (match !== null){
year = match[1].split('/')[0].trim();
}
console.log("title: "+title);
console.log("year: "+year);
filename = '[PMLR'+year+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for thecvf papers http://openaccess.thecvf.com
// paper pdf url: http://openaccess.thecvf.com/content_cvpr_2018/papers/Bai_Finding_Tiny_Faces_CVPR_2018_paper.pdf
// paper info url: http://openaccess.thecvf.com/content_cvpr_2018/html/Bai_Finding_Tiny_Faces_CVPR_2018_paper.html
// match title: <meta name="citation_title" content="Finding Tiny Faces in the Wild With Generative Adversarial Network"><meta name="citation_author"
function get_title_thecvf(){
var url = window.location.href;
if (is_pdf(url)) {
var url_ = url.split('/');
var pid = url_[url_.length-1].split('.pdf')[0];
var abs_url = url.replace('/papers/','/html/').replace('.pdf','.html');
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /<meta name="citation_title" content="(.*?)"><meta name="citation_author"/gm;
var conf_name_regex = /<div id="header_title"><a href=".*?">(.*?)<\/a>/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = conf_name_regex.exec(text);
var conf_name = "thecvf";
if (match !== null){
conf_name = match[1].trim();
}
console.log("title: "+title);
console.log("conf_name: "+conf_name);
filename = '['+conf_name+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for OpenReview papers https://openreview.net/
// paper pdf url: https://openreview.net/pdf?id=ryQu7f-RZ
// paper info url: https://openreview.net/forum?id=ryQu7f-RZ
function get_title_openreview(){
var url = window.location.href;
if (is_pdf(url)) {
var abs_url = url.replace('/pdf?','/forum?');
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /<meta name="citation_title" content="(.*?)"\/>/gm;
var conf_name_regex = /<h3><span><span>(.*?)Conference Submission/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = conf_name_regex.exec(text);
var conf_name = "openreview";
if (match !== null){
conf_name = match[1].trim();
}
console.log("title: "+title);
console.log("conf_name: "+conf_name);
filename = '['+conf_name+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for IEEE papers https://ieeexplore.ieee.org
// paper pdf url: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8031355
// real pdf url: https://ieeexplore.ieee.org/ielx7/6046/8291714/08031355.pdf?tp=&arnumber=8031355&isnumber=8291714
// paper info url: https://ieeexplore.ieee.org/document/8031355
// match title: "title":"GLA: Global–Local Attention for Image Description",
// match publication name: "doi":"10.1109/TMM.2017.2751140"
function get_title_ieee(){
var url = window.location.href;
if (true) {
var url_ = url.split('arnumber=');
var pid = url_[url_.length-1].split('&')[0];
var abs_url = "https://ieeexplore.ieee.org/document/"+pid;
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /"title":"(.*?)",/gm;
var conf_name_regex = /"doi":".*?\/(.*?\..*?)\..*?"/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = conf_name_regex.exec(text);
var conf_name = "IEEE";
if (match !== null){
conf_name = match[1].trim();
}
console.log("title: "+title);
console.log("conf_name: "+conf_name);
filename = '['+conf_name+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
// fectch pdf title for OpenReview papers https://openreview.net/
// paper pdf url: https://papers.nips.cc/paper/8137-coordinate-descent-with-bandit-sampling.pdf
// paper info url: https://papers.nips.cc/paper/8137-coordinate-descent-with-bandit-sampling
// match title: <meta name="citation_title" content="Coordinate Descent with Bandit Sampling">
function get_title_nips(){
var url = window.location.href;
if (is_pdf(url)) {
var abs_url = url.replace('.pdf','');
console.log("Abs url: " + abs_url);
GM_xmlhttpRequest({
method: "GET",
url: abs_url,
onload: function(res) {
if (res.status === 200) {
var text = res.responseText;
text = clean_text(text);
var regex = /<meta name="citation_title" content="(.*?)">/gm;
var year_regex = /<meta name="citation_publication_date" content="(.*?)">/gm;
var match = regex.exec(text);
if (match !== null){
var title = match[1];
title = clean_title(title);
match = year_regex.exec(text);
var year = "";
if (match !== null){
year = match[1].trim();
}
console.log("title: "+title);
console.log("year: "+year);
filename = '[NIPS '+year+'] '+title+'.pdf';
console.log("filename: "+filename);
}
}
}
});
console.log("send");
}
}
var url = window.location.href;
// redirect ieee page to real pdf url
if (/ieeexplore.ieee.org\/stamp\/stamp.jsp/i.test(url)){
var target = document.getElementsByTagName("iframe")[0].src;
window.location.replace(target);
}
// get paper title
if (/browse\.arxiv\.org\/pdf/i.test(url)){
get_title_browse_arxiv();
}
else if (/arxiv\.org\/pdf/i.test(url)){
get_title_arxiv();
}
else if (/aclweb\.org\/anthology\//i.test(url)){
get_title_acl();
}
else if (/proceedings.mlr.press\/.*\.pdf/i.test(url)){
get_title_pmlr();
}
else if (/openaccess.thecvf.com\/.*\.pdf/i.test(url)){
get_title_thecvf();
}
else if (/openreview.net\/pdf/i.test(url)){
get_title_openreview();
}
else if (/ieeexplore.ieee.org.*pdf.*/i.test(url)){
get_title_ieee();
}
else if (/papers.nips.cc\/paper\/.*/i.test(url)){
get_title_nips();
}
else if (/arxivfellow\.com\/pdf/i.test(url)){
get_title_arxivFellow();
}
// automatically download paper
function download(){
// GM_setClipboard(filename);
var xhr = new XMLHttpRequest();
// load document from local cache
xhr.open("GET", '', true);
xhr.responseType = "blob";
xhr.onload = function (e) {
if (xhr.status === 200) {
var file = window.URL.createObjectURL(xhr.response);
var a = document.createElement("a");
a.href = file;
a.download = filename;
document.body.appendChild(a);
a.click();
}
};
xhr.send();
}
function redirect_arixv_to_mirror(){
if (window.location.href.includes('browse')){
var mirror_url = window.location.href.replace('browse.arxiv', 'arxiv');
}
else {
var mirror_url = window.location.href.replace('arxiv', 'browse.arxiv');
}
window.location.href = mirror_url;
}
function redirect_arixv_to_abs(){
var abs_url = url.replace('pdf', 'abs');
window.open(abs_url);
}
// download by clicking on the button
var btn = document.createElement("button");
btn.innerText = "save";
btn.setAttribute('style', "position:absolute;z-index:1000; left: 12px; top: 12px; height: 28px; padding-left: 20px; padding-right: 20px; background-color: #424649; border: none; color: white; font-size: 16px; cursor: pointer;");
btn.setAttribute('id', "btn");
document.body.appendChild(btn);
btn.onclick=download;
btn.onmouseover = function() {
this.style.backgroundColor="#424649"
};
btn.onmouseout = function() {
this.style.backgroundColor="#323639"
};
// download with hot-key (cannot work now)
// hotkeys('ctrl+s', function(event,handler) {
// switch(handler.key){
// case "ctrl+s": console.log('you pressed ctrl+s!'); event.preventDefault(); download(); break;
// }
// });
// add extra buttons for arxiv
if (/arxiv\.org\/pdf/i.test(url)){
// add a button to redirect arxiv pdf to abs
var arxiv_abs_btn = document.createElement("button");
arxiv_abs_btn.innerText = "abs";
arxiv_abs_btn.setAttribute('style', "position:absolute;z-index:1000; left: 80px; top: 12px; height: 28px; padding-left: 25px; padding-right: 25px; background-color: #424649; border: none; color: white; font-size: 16px; cursor: pointer;");
arxiv_abs_btn.setAttribute('id', "arxiv_abs_btn");
document.body.appendChild(arxiv_abs_btn);
arxiv_abs_btn.onclick=redirect_arixv_to_abs;
arxiv_abs_btn.onmouseover = function() {
this.style.backgroundColor="#424649"
};
arxiv_abs_btn.onmouseout = function() {
this.style.backgroundColor="#323639"
};
// add a button to redirect arxiv to browse.arxiv (or browse.arxiv to arxiv)
var arxiv_mirror_btn = document.createElement("button");
arxiv_mirror_btn.innerText = "mirror";
arxiv_mirror_btn.setAttribute('style', "position:absolute;z-index:1000; left: 150px; top: 12px; height: 28px; padding-left: 10px; padding-right: 10px; background-color: #424649; border: none; color: white; font-size: 16px; cursor: pointer;");
arxiv_mirror_btn.setAttribute('id', "arxiv_mirror_btn");
document.body.appendChild(arxiv_mirror_btn);
arxiv_mirror_btn.onclick=redirect_arixv_to_mirror;
arxiv_mirror_btn.onmouseover = function() {
this.style.backgroundColor="#424649"
};
arxiv_mirror_btn.onmouseout = function() {
this.style.backgroundColor="#323639"
};
}