Pending Earnings

Adds a pending earnings item to mturk dashboard

// ==UserScript==
// @name           Pending Earnings
// @version        1.0
// @namespace      http://bluesky.software.com/turkscripts
// @description    Adds a pending earnings item to mturk dashboard
// @include        https://www.mturk.com/mturk/dashboard
// ==/UserScript==

//
// We are on the dashboard page. We want to go to the status_detail
// pages for the last 30 days and total the dollar amount of the
// HITs still pending and add it to the dashboard page like the today's projected earnings script
// does. We will use the XMLHttpRequest Object to get the pages and then
// process them one by one until we have done them all. An enhancement I just
// thought of is that we don't have to do all 30 days if we read the number of
// HITs pending and just process dates that have pending HITs.
//
// This will only be run if someone clicks on it as it is going to be really slow if they
// have a lot of HITs pending from a long time ago.
//
//
// 08/22/2011 Coding started
//
// 08/23/2011 Beta version ready but I only have 1 HIT pending so no way to do
//            much testing, will have to do some turking and then test
//
// 08/25/2011 There is the potential for a race condition in that while I'm
//            added up pending HITs to get to the total that was present on the
//            status page will have been approved before I've added them all up.
//            So I need to add some new logic otherwise it would be possible to
//            end up in an infinite loop trying to add in some pending HITs that
//            are no longer pending. I'm going to implement this by keeping track
//            of the number of HITs submitted and when reading detailed status
//            pages abort trying to add up x pending HITs if we exceed the number
//            of HITs submitted for that day.
//
// 08/30/2011 Not working for some so changed the way getNumberOfPending() works
//            and added an error message alert if this function can't find the
//            number of pending HITs
//
// 08/31/2011 Made a test version, the problem is located in  
//            process_status_page() 
//
// 09/01/2011 Found the problem: If there was no link in a row then trying to
//            to work on links[0] that doesn't exist would cause the script to blow
//            up, I put in a check that links has a length before trying to work with
//            it.
//
// 10/03/2011 Modified to save a new cookie which is a history of the previous time we
//            summed up the pendingEarnings and we use that to skip processing dates where
//            the pendingHits hasn't changed since the last time we were invoked. This should
//            minimize the times the page request exceeded error is encountered.
//
// 10/29/2011 Changed the expiration date of the pendingEarnings cookie to 30 days.
//
// 11/03/2011 Moved the setting of the pendingEarnings cookie outside the if loop to fix the
//            change of 10/29.
//
// The normal functioning of the script is to just get the cookie
// of PendingEarnings and display the link and total.
//
// If we are clicked on then processPendingHits is called and we will
// add up the pending HITs. Note there could still be no pending HITs
// the user just clicked on it anyway so we can't ignore the case of
// zero HITs to process. The total is saved in a cookie so it is
// available for all the times we haven't been clicked on.
//

// Insert the Pending Earnings in the dashboard.
// Copied from current_earnings script - Copyright (c) 2008, Mr. Berserk
// 
// Modified to suit
//

var allTds, thisTd;
allTds = document.getElementsByTagName('td');
for (var i = 0; i < allTds.length; i++)
{
   thisTd = allTds[i];
   if ( thisTd.innerHTML.match(/Total Earnings/) && thisTd.className.match(/metrics\-table\-first\-value/) )
   {
      var row = document.createElement('tr');
      row.className = "even";


      var pendingEarningsLink = document.createElement('a');
      pendingEarningsLink.href =  "https://www.mturk.com/mturk/dashboard";
      pendingEarningsLink.innerHTML = "Pending HITs";
      pendingEarningsLink.addEventListener('click',processPendingHits,false);
      var cellLeft = document.createElement('td');
      cellLeft.className = "metrics-table-first-value";
      cellLeft.appendChild(pendingEarningsLink);
      row.appendChild(cellLeft);
		 
      var cellRight = document.createElement('td');   
      if(getCookie("MturkPendingEarnings"))
      {
         var pendingEarnings = parseFloat(getCookie("MturkPendingEarnings"));
         cellRight.innerHTML = "$" + pendingEarnings.toFixed(2);
      }
      else
      {
         cellRight.innerHTML = "$?.??";
      }
      row.appendChild(cellRight);
			 
      thisTd.parentNode.parentNode.insertBefore(row,thisTd.parentNode.nextSibling);
   }
}

//
// Functions
//

//
// User has clicked on us, so add up all the pending HITs. The first thing
// we do is get a copy of the status page, this contains the summary of the
// past 30 days. We scan through this looking at the Pending HITs column and
// saving the dates that have pending HITs which we will subsequently use to
// access the appropriate detailed status pages :)
//

function processPendingHits()
{
   var pendingEarnings = 0;            // Dollar amount of pendingHITs
   var pendingHits = getNumberOfPending();
   if(pendingHits>0)   // only process pages if there is at least one pending HIT
   {
      var oldDatesToDo = new Array();           // this array will hold the history of the last
                                                // time we were clicked
      var datesToDo = process_status_page();    // get dates that have pending HITs

      //
      // Ok we have a list of dates to process and the number of pending HITS
      // for that date is appended to the encoded date. To make the script faster and
      // more efficient we save the array datesToDo as a cookie and compare the new values
      // just retrieved from the status page to skip processing those dates where the number of
      // pending HITs hasn't changed since the last time we were called. The one exception is today
      // where the pending HIT count might not of changed but the hits pending might have since more
      // hits could of been added. To catch this we also check the submitted HITs to the old submitted HITs
      //

      if(getCookie("MturkPendingDates"))   // retrieve history if it exists
      {
         oldDatesToDo = getCookie("MturkPendingDates").split(",");        
      }

      //
      // Now we want to compare the old array with the new array and only process those dates where we don't have
      // the subtotal in the old array. But if the date is today, the pending HIT count could be the same but
      // but they could be different HITs because pending HITs could be added and subtracted so we have to check the
      // submitted HITs to catch this condition.
      //

      var subtotal = 0;                   
      for(n = 0; n < datesToDo.length; n++)
      {
         var dateProcessed = 0;
         var encodedDate = datesToDo[n].substr(0,8);
         var index1 = datesToDo[n].indexOf('$');
         var pendingHits = datesToDo[n].substring(8,index1); // the next part of the string up to the $
                                                            // is the pending Hits total
         var submittedHits = datesToDo[n].substring(index1+1);    

         for(var m = 0; m < oldDatesToDo.length; m++)            // check if we have this date in the history
         {
            var old_encodedDate = oldDatesToDo[m].substr(0,8);
            if(encodedDate == old_encodedDate)
            {
               index1 = oldDatesToDo[m].indexOf('$');
               var old_pendingHits = oldDatesToDo[m].substring(8,index1); // the next part of the string up to the $
                                                                          // is the pending Hits total
               var index2 = oldDatesToDo[m].indexOf('%');                 // the next part of the string up to the %
                                                                          //  is the submitted HITs total
               var old_submittedHits = oldDatesToDo[m].substring(index1+1,index2);

               var old_subtotal = oldDatesToDo[m].substring(index2+1);   // the rest of the string is the subtotal  

               // So since we have this date in the history we check if the pending HITs and the submitted HITs are the
               // same and if they are we don't bother to process this date by NOT adding it to the processDates array 
               // and we add the existing subtotal for that date into the pendingHITs dollar amount right now.

               if (submittedHits == old_submittedHits && pendingHits == old_pendingHits) 
               {
                  
                  pendingEarnings += parseFloat(old_subtotal);           // use the old value since it is still good
                  datesToDo[n] = datesToDo[n] + '%' + old_subtotal;      // add old subtotal into the new array
                  dateProcessed = 1;
                  break;                                                 // found the date so exit loop         
               }                  
            }   
         }  
         if(dateProcessed < 1)                                           // if the date wasn't in the history or the
         {                                                               // pending HITs has changed process the date
            subtotal = process_detailed_status_pages(encodedDate, pendingHits, submittedHits);
            datesToDo[n] = datesToDo[n] + '%' + subtotal;
            pendingEarnings += subtotal;
         }
      }      
      // now overwrite the oldDatesToDo with the new one if the cookie already existed else create the cookie
      setCookie("MturkPendingDates",datesToDo.join(","),1);   // Save the array datesToDo as a cookie by converting 
                                                              //  it to a string first

   }
      setCookie("MturkPendingEarnings",pendingEarnings,30);    // save total in cookie - move out here so we set it
                                                               // to zero if no HITs pending
}


//
// Get total pending HITs
//
// As of now there are 8 'metrics-table's on the dashboard
// but the last two are subsets of table 6 which has 26 td's
// but again we have subsets within subsets so that was the confusion
// So when you get the td's for table 5 you are also getting the td's
// for table 7 which is what we want. The confusion is when I try to 
// match on innerHTML for ... Pending, I get a match for the superset
// td's not on the individual td. So td 14 contains ... Pending but it
// also contains all the other td's that are part of the second column
// 
// To handle the recursive tables I'm now checking that the td is 
// numeric, if it isn't we continue to scan tds.
//
function getNumberOfPending()
{
   var tables = document.getElementsByClassName('metrics-table');
   for (var m = 0; m <tables.length; m++)
   {
      var table_data = tables[m].getElementsByTagName('td');  // 26 data
      for (var n = 0; n <table_data.length; n++)
      {
         if(table_data[n].innerHTML.match('... Pending'))
         {                                                
            if(isNumber(table_data[n+1].innerHTML))
            {
               return table_data[n+1].innerHTML;
            }        
         }
      }
   }
   alert("Script Bug: Can't find how many pending HITs you have");
   return -1;           // didn't find it - This is a bug exit! 
}

//
// Process the status page by getting all those dates that have pending HITs
//
// The first status page contains 30 days worth of HITs so there is no need to 
// check the second page as there should be no pending HITS on that page
//
// Note: If the person is a total newbie then maybe the status page doesn't even
// have 30 days worth of data so don't hard code the 30 bozo :)
//
//
// We need to process the DOM in a recursive manner so that I can associate the
// correct date with the correct pending HIT count. The days listed aren't necessarly 
// in sequential order if he took some days off there will be missing days.
//
// This main function just grabs the status page and then calls the function
// pending_dates to do the actual work
//

function process_status_page()
{
   // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
   var page = getHTTPObject();
   page.open("GET",'https://www.mturk.com/mturk/status',false);      
   page.send(null);
   return pending_dates(page.responseText);
}

//
// First of all we have to turn the grabbed status page back into a DOM
// object so we can process it with javascript. We do this with the 
// create div trick.
//
// Now get all the tables
// Some of these tables won't be what we are looking for but there is no
// way to distingush at the table level, we have to look at the table data to
// know if this table is of interest to us.
//
// There is a problem that the table we are interested in is embedded in another
// table so we are processing the table we want twice, how to avoid this?
//

function pending_dates(page_text)
{
   var page_html = document.createElement('div');
   page_html.innerHTML = page_text;

   var datesIndex = 0;
   var activeDates = new Array();
   var tableProcessed = 0;

   var tables = page_html.getElementsByTagName('table');
   for (var m = 0; m < tables.length; m++) // process each table
   {
      var table_rows = tables[m].getElementsByTagName('tr');  
      for (var n = 0; n < table_rows.length; n++) // process each row
      {
         //
         // if we are in a row we are interested in, its first td will contain a link
         // to a detailed status page, we look for a match on that link
         //
         var links = table_rows[n].getElementsByTagName('a');
         if(links.length>0 && links[0].href.substr(0,40)=='https://www.mturk.com/mturk/statusdetail')
         {
            //
            // OK we found an interesting row, now does this date have any pending HITs?
            //
            tableProcessed = 1; // Indicate that we have processed the table
            var table_data = table_rows[n].getElementsByClassName('statusPendingColumnValue');
            var pendingHits = table_data[0].innerHTML;  // This is a number, if greater than zero we have pending HITs
            table_data = table_rows[n].getElementsByClassName('statusSubmittedColumnValue');
            var submittedHits = table_data[0].innerHTML; // Number of HITs submitted for this date
            if(pendingHits > 0) //then save the date in the activeDates array
            {
               var encodedDate = links[0].href.substr(links[0].href.search(/Date=/)+5,8);
               // as a hack I'm appending the number of pending HITs to the encoded date so we
               // can return both pieces of data through the one dimensional array
               // now I want to add in the number of HITs submitted also but I have to be able
               // to parse the string later to distingush the two numbers
               activeDates[datesIndex++] = encodedDate + pendingHits + '$' + submittedHits;        
            }   
         }
      }
      if(tableProcessed>0) return activeDates;  // bail if we already processed the table, don't do it again!
   }   
   alert("Script Bug: Couldn't find any dates to process");
}

//
// Process the detailed status pages associated with this date until we have
// found all the pending HITs for this date
//

function process_detailed_status_pages(encodedDate, pendingHits, submittedHits)
{
   var subtotal = 0;
   var pagenum = 1;          // detailed status page number, we start with page 1
   while (pendingHits > 0)
   {
      // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
      var page = getHTTPObject();
      link = "https://www.mturk.com/mturk/statusdetail?sortType=All&pageNumber=" + pagenum + "&encodedDate=" + encodedDate; 
      page.open("GET",link,false);      
      page.send(null);
      var page_html = document.createElement('div');
      page_html.innerHTML = page.responseText;
      var amounts = page_html.getElementsByClassName('statusdetailAmountColumnValue');
      var statuses = page_html.getElementsByClassName('statusdetailStatusColumnValue');
      for(var k = 0; k < amounts.length; k++)
      {
         if(statuses[k].innerHTML == 'Pending Approval')
         {
            pendingHits--; 
            index = amounts[k].innerHTML.indexOf('$');
            subtotal += parseFloat(amounts[k].innerHTML.substring(index+1));
         }
      }
      submittedHits -= 25;                       // 25 HITs to a page
      if (submittedHits <= 0) return subtotal;   // We have done all the HITs for this date
                                                 // But the pendingHits count isn't zero 
                                                 // So we must of encountered a race condition
                                                 // exit with the subtotal    
      pagenum++; // go do another page if necessary
   }
   return subtotal; // This is the dollar amount of pending HITs for this date
}

//
// XMLHttpRequest wrapper from web
//

function getHTTPObject()  
{ 
   if (typeof XMLHttpRequest != 'undefined')
   { 
      return new XMLHttpRequest();
   }
   try
   { 
      return new ActiveXObject("Msxml2.XMLHTTP");
   } 
   catch (e) 
   { 
      try
      { 
         return new ActiveXObject("Microsoft.XMLHTTP"); 
      } 
      catch (e) {} 
   } 
   return false;
}

//
// Is the variable a number or a string that parses to a number?
//

function isNumber (o)
{
   return ! isNaN(o-0);
}


//
//  Cookie functions copied from http://www.w3schools.com/JS/js_cookies.asp
//

function setCookie(cookie_name,value,exdays)
{
   var exdate = new Date(); 
   exdate.setDate(exdate.getDate() + exdays);
   var cookie_value = escape(value) + ((exdays==null) ? "" : "; expires="+exdate.toUTCString());
   document.cookie = cookie_name + "=" + cookie_value;
}


function getCookie(cookie_name)
{
   var i,x,y
   var ARRcookies = document.cookie.split(";");
   for (i=0; i<ARRcookies.length; i++)
   {
      x = ARRcookies[i].substr(0,ARRcookies[i].indexOf("="));
      y = ARRcookies[i].substr(ARRcookies[i].indexOf("=")+1);
      x = x.replace(/^\s+|\s+$/g,"");
      if (x == cookie_name)
      {
         return unescape(y);
      }
   }
}