mmmturkeybacon Export Mturk History (Classic Version)

Exports Mturk history to tab separated values so you can easily save the HITs you've worked on in a spreadsheet.

当前为 2016-09-16 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name        mmmturkeybacon Export Mturk History (Classic Version)
// @author      mmmturkeybacon
// @description Exports Mturk history to tab separated values so you can easily save the HITs you've worked on in a spreadsheet.
// @namespace   http://userscripts.org/users/523367
// @include     https://www.mturk.com/mturk/status
// @require     http://code.jquery.com/jquery-latest.min.js
// @version     1.1.1c
// @grant       none
// @grant       GM_log
// ==/UserScript==

var BACKGROUND_COLOR = "#FFFFFF";
var DATE_LIST_DELAY = 500;
var STATUSDETAIL_DELAY = 500;
var MPRE_DELAY = 2000;

var control_panel_HTML = '<div id="control_panel" style="margin: 0 auto 0 auto;' +
                         'border-bottom: 1px solid #000000; margin-bottom: 5px; ' +
                         'background-color: ' + BACKGROUND_COLOR + ';"></div>';

$('body').prepend(control_panel_HTML);
var control_panel = document.getElementById("control_panel");
var date_input = document.createElement("INPUT");
var big_red_button = document.createElement("BUTTON");
var progress_report = document.createTextNode("Stopped");
var p = document.createElement("P");
var text_area = document.createElement("TEXTAREA");

big_red_button.textContent = "Show Interface";
big_red_button.onclick =  function(){show_interface();};
control_panel.appendChild(big_red_button);

var global_run = false;
var statusdetail_loop_finished = false;
var page_num = 0;
var date_header = "";
var history = {};

function set_progress_report(text, force)
{
    if (global_run == true || force == true)
    {
        progress_report.textContent = text;
    }
}

function last_sixty_days($date_URLs)
{
    var date_URLs = $.makeArray($date_URLs);
    var date_URL_parts = date_URLs[date_URLs.length-1].toString().split("encodedDate=");
    var last_date = date_URL_parts[1];
    var month = last_date.substring(0,2);
    var day = parseInt(last_date.substring(2,4), "10");
    var year = last_date.substring(4,8);
    var i = 1;
    while(i <= day && i <= 15)
    {
        var past_day = String("0" + (day - i)).substr(-2);
        date_URLs.push(date_URL_parts[0]+"encodedDate="+month+past_day+year);
        i++;
    }

    if (i < 16)
    {
        var $src;
        jQuery.ajax({
            url:    date_URLs[date_URLs.length-1],
            success: function(data) {$src = $(data);},
            async:   false
        });

        var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
        if (maxpagerate.length == 0)
        {
            last_date = $src.find('td[class="white_text_14_bold"]:contains("HITs You Worked On For")').clone().children().remove().end().text().trim().substring(22);
            month = (month == "01") ? "12" : String("0" + (month - 1)).substr(-2);
            day = parseInt(last_date.split(",")[0].substr(-2), "10");
            year = last_date.substr(-4);
            var j = 1;
            while(i <= 15)
            {
                var past_day = String("0" + (day - j)).substr(-2);
                date_URLs.push(date_URL_parts[0]+"encodedDate="+month+past_day+year);
                i++;
                j++;
            }
        }
        else
        {
            date_URLs = $.makeArray($date_URLs); // error, output last 45 days.
        }
    }
    return date_URLs.reverse();
}

function wait_until_stopped()
{
    if (global_run == true)
    {
        if (statusdetail_loop_finished == true)
        {
            big_red_button.textContent = "Start";
            set_progress_report("Finished", false);
        }
        else
        {
            setTimeout(function(){wait_until_stopped();}, 500);
        }
    }
}

function scrape($src)
{
    var $requester = $src.find('a[href^="/mturk/contact?"]');
    var $title = $src.find('td[class="statusdetailTitleColumnValue"]');
    var $reward = $src.find('td[class="statusdetailAmountColumnValue"]');
    var $approval = $src.find('td[class="statusdetailStatusColumnValue"]');
    var $feedback = $src.find('td[class="statusdetailRequesterFeedbackColumnValue"]');

    for (var j = 0; j < $requester.length; j++)
    {
        var requester_name = $requester.eq(j).text();
        var requester_id = $requester.eq(j).attr("href").split(/requesterId=|&/)[1];  // changed from /requesterId=|&requesterName/ by CH on 2016-09-16 due to Amazon adding a bunch of other crud to the status pages' contact links
        var title = $title.eq(j).text();
        var reward = $reward.eq(j).text();
        var approval = $approval.eq(j).text();
        var feedback = $feedback.eq(j).text().trim().replace(/\r\n|\n|\r|\t/g, ' ');

        key = requester_id+title+reward;
        if (history[key] == undefined)
        {
            history[key] = {submitted:0, approved:0, pending:0, rejected:0, requester:"", title:"", reward:"", approved_feedback:{}, rejected_feedback:{}};
            history[key].requester = requester_name + " (" + requester_id + ")";
            history[key].title = title;
            history[key].reward = reward;
        }

        history[key].submitted = history[key].submitted+1;

        if (approval == "Rejected")
        {
            history[key].rejected = history[key].rejected+1;
            if (feedback != "")
            {
                if (history[key].rejected_feedback[feedback])
                {
                    history[key].rejected_feedback[feedback] = history[key].rejected_feedback[feedback]+1;
                }
                else
                {
                    history[key].rejected_feedback[feedback] = 1;
                }
            }
        }                
        else if (approval == "Pending Approval")
        {
            history[key].pending = history[key].pending+1;
        }                
        else // Approved or Paid
        {
            history[key].approved = history[key].approved+1;
            if (feedback != "")
            {
                if (history[key].approved_feedback[feedback])
                {
                    history[key].approved_feedback[feedback] = history[key].approved_feedback[feedback]+1;
                }
                else
                {
                    history[key].approved_feedback[feedback] = 1;
                }
            }
        }
    }
}

function statusdetail_loop(next_URL)
{
    if (global_run == true)
    {
        if (next_URL.length != 0)
        {
            //var $src;
            //jQuery.ajax({
            //    url:    next_URL,
            //    success: function(data) {$src = $(data);},
            //    async:   false
            //});

            //var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
            //if (maxpagerate.length == 0)
            //{
            //    date_header = $src.find('td[class="white_text_14_bold"]:contains("HITs You Worked On For")').clone().children().remove().end().text().trim();
            //    page_num++;
            //    set_progress_report("Processing " + date_header + " page " + page_num, false);
            //    console.log(progress_report.textContent);
            //    scrape($src);
     
            //    $next_URL = $src.find('a[href^="/mturk/statusdetail"]:contains("Next")');
            //    next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
            //    setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
            //}
            //else
            //{
            //    setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
            //}

            $.get(next_URL, function(data)
            {
                var $src = $(data);
                var maxpagerate = $src.find('td[class="error_title"]:contains("You have exceeded the maximum allowed page request rate for this website.")');
                if (maxpagerate.length == 0)
                {
                    date_header = $src.find('td[class="white_text_14_bold"]:contains("HITs You Worked On For")').clone().children().remove().end().text().trim();
                    page_num++;
                    set_progress_report("Processing " + date_header + " page " + page_num, false);
                    console.log(progress_report.textContent);
                    scrape($src);
     
                    $next_URL = $src.find('a[href^="/mturk/statusdetail"]:contains("Next")');
                    next_URL = ($next_URL.length != 0) ? $next_URL.attr("href") : "";
                    setTimeout(function(){statusdetail_loop(next_URL);}, STATUSDETAIL_DELAY);
                }
                else
                {
                    setTimeout(function(){statusdetail_loop(next_URL);}, MPRE_DELAY);
                }
            });
        }
        else
        {
            text_area.value += date_header + "\n";
            for (var key in history)
            {
                var obj = history[key];
                for (var prop in obj)
                {
                    // important check that this is objects own property 
                    // not from prototype prop inherited
                    if(obj.hasOwnProperty(prop))
                    {
                        // submitted, approved, pending, rejected, requester_name (requester_id), title, reward, approved_feedback, rejected_feedback
                        if (prop == "approved_feedback" || prop == "rejected_feedback")
                        {
                            var feedback_obj = obj[prop];
                            var fb_str = "";
                            for (var feedback in feedback_obj)
                            {
                                var count = feedback_obj[feedback];
                                if (count > 1)
                                {
                                    fb_str += "("+count+") "+feedback+" | ";
                                }
                                else
                                {
                                    fb_str += feedback+" | ";
                                }
                            }
                            text_area.value += fb_str.substring(0, fb_str.length-3)+"\t";
                        }
                        else
                        {
                            text_area.value += obj[prop]+"\t";
                        }
                    }
                }
                text_area.value += "\n";
            }
            statusdetail_loop_finished = true;
        }
    }
}

function date_list_loop(date_URLs)
{
    if (global_run == true)
    {
        if (date_URLs.length != 0)
        {
            if (statusdetail_loop_finished == true)
            {
                page_num = 0;
        
                statusdetail_loop_finished = false;
                history = {};
                var next_URL = date_URLs.pop();
                statusdetail_loop(next_URL);

                setTimeout(function(){date_list_loop(date_URLs);}, DATE_LIST_DELAY);
            }
            else
            {
                setTimeout(function(){date_list_loop(date_URLs);}, DATE_LIST_DELAY);
            }
        }
        else
        {
            wait_until_stopped();
        }
    }
}

function start_running()
{
    if (big_red_button.textContent == "Start")
    {
        global_run = true;
        statusdetail_loop_finished = true;
        big_red_button.textContent = "Stop";
        set_progress_report("Running", false);
        text_area.value = "";
        text_area.value +=  "Submitted\tApproved\tPending\tRejected\tRequester Name (Requester ID)\tTitle\tReward\tApproved Feedback\tRejected Feedback\n";

        var $date_URLs = $('a[href^="/mturk/statusdetail"]');
        var date_URLs = "";

        if (date_input.value != "")
        {
            var date_input_parts = date_input.value.split(":");
            if (date_input_parts.length == 1)
            {
                var date1_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date_input.value+'"]');
                if (date_input.value.substring(0,1).toLowerCase() == "t")
                {
                    date_URLs = $.makeArray($date_URLs.eq(0)); // today
                }
                else if (date_input.value.substring(0,1).toLowerCase() == "y")
                {
                    date_URLs = $.makeArray($date_URLs.eq(1)); // yesterday
                }
                else if (date1_URL.length != 0)
                {
                    date_URLs = $.makeArray(date1_URL); // single date
                }
                else if (date_input.value == "45")
                {
                    date_URLs = $.makeArray($date_URLs).reverse(); // last 45 days
                }
                else if (date_input.value == "60")
                {
                    date_URLs = last_sixty_days($date_URLs); // last 60 days
                }
                else
                {
                    set_progress_report("Input Error!", false);
                }
            }
            else if (date_input_parts.length == 2)
            {
                var date1 = date_input_parts[0];
                var date2 = date_input_parts[1];
                var date1_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date1+'"]');
                var date2_URL = $('a[href^="/mturk/statusdetail?encodedDate='+date2+'"]');
                var date1_idx = $date_URLs.index(date1_URL);
                var date2_idx = $date_URLs.index(date2_URL);
                if (date1_idx != -1 && date2_idx != -1)
                {
                    if (date1_idx > date2_idx)
                    { // ascending
                        $date_URLs = $date_URLs.slice(date2_idx, date1_idx+1);
                        date_URLs = $.makeArray($date_URLs);
                    }
                    else
                    { // descending
                        $date_URLs = $date_URLs.slice(date1_idx, date2_idx+1);
                        date_URLs = $.makeArray($date_URLs).reverse();
                    }
                }
                else
                {
                    set_progress_report("Input Error!", false);
                }
            }
        }
        else
        {
            date_URLs = $.makeArray($date_URLs.eq(0)); // today
        }
        date_list_loop(date_URLs);
    }
    else
    {
        global_run = false;
        big_red_button.textContent = "Start";
        set_progress_report("Stopped", true);
    }
}

function show_interface()
{
    control_panel.removeChild(big_red_button);

    control_panel.appendChild(document.createTextNode("Date: "));
    date_input.onkeydown = function(event){if (event.keyCode == 13){start_running();}};
    date_input.title = "Leave blank to get today\'s completed HITs\n"
                      +"Enter 'yesterday' to get yesterday\'s HITs\n"
                      +"To get a single day use mmddyyyy\n"
                      +"To get a range of days use mmddyyyy:mmddyyyy\n"
                      +"Enter 45 to get the last 45 days\n";
//                      +"Enter 60 to get the last 60 days";  // the ability to go back beyond 45 days broke for this script at some point way back
    control_panel.appendChild(date_input);
    
    big_red_button.textContent = "Start";
    big_red_button.onclick = function(){start_running();};
    control_panel.appendChild(big_red_button);
    
    control_panel.appendChild(document.createTextNode(" "));
    control_panel.appendChild(progress_report);
    
    control_panel.appendChild(p);
    
    text_area.style.height = 200;
    text_area.style.width = "100%";
    control_panel.appendChild(text_area);
}