Wednesday, December 7, 2011

YouTube Comments Sorter


// ==UserScript==
// @name           YouTube Comments Sorter
// @namespace      http://griffeltavla.wordpress.com/2011/07/04/youtube-comments-sorting-and-threading/
// @description    Provides comment threading, aggregation and sorting in chronological and reverse order.
// @version        1.0.1
// @author         tinjon@gmail.com
// @include        http://www.youtube.com/all_comments?v=*
// @include        http://www.youtube.com/watch?v=*
// @require        http://code.jquery.com/jquery-1.6.2.min.js
// ==/UserScript==

// ============================================================================
// Global Helpers
// ============================================================================
function log(){
  if(unsafeWindow.console) unsafeWindow.console.log.apply(this,arguments);
}
function getTotalComments(){
  return parseInt( $('.comments-section-stat').text().replace(/[\D]/g,''), 10);
}
function getComments(doc){
  doc = doc || window.document;
  return $('.comment-list:last .comment',doc);
}
function isAllPage() {
  return /\/all_comments\?/.test(window.location.href)
}
function setSeq(el,i){
  $(el).attr('oidx',-(i+1));
}
function getSeq(el){
  var s = $(el).attr('oidx');
  if(s) return parseInt(s,10);
}
function timeit(f,msg) {
  var st = new Date();
  f();
  log((msg || "duration")," :", (new Date()) - st)
}

// ============================================================================
// Classes (object prototypes)
// ============================================================================
function Synchronizer(doneCallback){
  var count=0
  return {
    inc: function(){count++; },
    dec: function(){
      count--;
      if(count <= 0) doneCallback();
    }
  };
}

function ProgressBar(max,parentNode, updatePrefixText, donePrefixText) {
  var el = $('<div class="simple-progressbar"></div>');
  el.appendTo($(parentNode));
  return {
    delete: function(){
      el.remove();
    },
    update: function(processedCount){
      el.html( updatePrefixText.replace("{VALUE}", ""+ Math.round( processedCount * 100 / max )) );
    },
    done: function(processedCount){
      el.html( donePrefixText.replace("{VALUE}", ""+ processedCount) );
      el.css('color','green');
    }
  };
}

/**
 * Fetches all comment pages and concatinates the result onto the page from
 * which this script was triggered.
 *
 * The fetching occurs in parallel and once all pages have been retrieved,
 * the comments are re-ordered according to the page number they were found on,
 * so as to appear in the correct order (reverse chronological which Google in
 * its infinite wisdom decided was the optimal order to follow a discussion !?)
 */
function CommentFetcher(pagesFetchedCb) {
  var sync = new Synchronizer(function(){
        $('.comments-pagination').parent().remove();
        sortPages();
        var comments = $('.comments-section').not( $('.comments-section h4:contains(Top Comments)').parent() ).find('.comment');
        comments.remove().appendTo('.comment-list:last'); // Ensure all comments are in the same container for later sorting.
        progressBar.done(getComments().size());
        if(pagesFetchedCb) pagesFetchedCb();
      }),
      progressBar = {
        update: function(){},
        done: function(){}
      },
      pageUrls = [],
      processedPageUrls = [];

  function getEl(filter,doc) {
    doc = doc || window.document;
    return $(filter,doc);
  }
  function getUrlFromButton(filter,doc){
    var el = getEl(filter,doc);
    if(el[0]) return el.attr('href');
  }
  function getNextPageUrl(doc){
    return getUrlFromButton('.yt-uix-button:contains(Next)',doc);
  }
  function getPageNumber(doc) {
    var s = $('.comments-pagination .yt-uix-button-toggled',doc).attr('data-page')
    return parseInt(s,10);
  }
  function setPageNumberForLastPageSection(doc) {
    $('.comment-list:last',doc).attr('page',getPageNumber(doc));
  }
  function addPageSection(doc){
    setPageNumberForLastPageSection(doc);
    $('.comment-list:last',doc).insertAfter('.comment-list:last');
  }
  function markProcessed(url){
    if(url) processedPageUrls.push(url);
    return url;
  }
  function nextUrl(){
    var unprocessedUrls = pageUrls.filter(function(url) {
      return processedPageUrls.indexOf(url) < 0
    });
    return markProcessed(unprocessedUrls.shift());
  }
  function addNewPageUrls(doc){
    $('.comments-pagination button',doc).each(function(){
      var url = $(this).attr('href');
      if( pageUrls.indexOf(url) == -1 ) {
        pageUrls.push(url);
        log("Added new url: "+ url);
      }
    });
  }
  function sortPages(){
    var pages  = $('*[page]');
    var parent = $(pages[0]).parent();
    var sorted = $('*[page]').sort(function(a,b){ 
      a = parseInt( $(a).attr('page'),10 );
      b = parseInt( $(b).attr('page'),10 );
      return a > b ? 1 : a < b ? -1 : 0
    });
    sorted.remove().appendTo(parent);
  }
  // Triggers parallel fetching of all queued pages which haven't yet been
  // scheduled for retrieval.
  function fetchPages(){
    var url;
    while(url = nextUrl()) {
      fetchCommentsFromPage(url)
    }
  }

  function fetchCommentsFromPage(url) {
    $.ajax({
      url: url,
      success: function(html){
        var doc = $(html);
        addNewPageUrls(doc);
        addPageSection(doc);
        updateProgress();
        fetchPages();
      },
      error: function(xhr, textStatus, errorThrown) {
        log("failed to get: "+ url);
      },
      beforeSend: sync.inc,
      complete: sync.dec
    });
  }
  function updateProgress(){
    var fetchedCommentCount = getComments().size();
    if( fetchedCommentCount < getTotalComments() ) {
      progressBar.update(fetchedCommentCount);
    }
  }

  return {
    start: function(){
      var startPageUrl = getUrlFromButton('.comments-pagination .yt-uix-button-toggled');
      if(isAllPage()){
        markProcessed(startPageUrl);     // So we can skip re-fetching this one. (only on the all-page as the normal page has different behavior)
        addNewPageUrls(window.document); // Add all page urls we can "see" in the navigation button list.
      } else {
        var allPage1 = $('.comments-pagination button:nth(0)').attr('href');
        if(allPage1) pageUrls.push(allPage1);
      }
      if(pageUrls.length == 0) return;   // Unless there are more pages, no need to show progress messages or try to fetch pages.
      setPageNumberForLastPageSection(window.document);
      if(!isAllPage()) $('.comment-list:last').html("");  // Empty the first partially filled comments list. Only applicable on the video page.
      updateProgress();
      fetchPages();
    },
    applicable: function(){
      return !!getNextPageUrl(window.document)
    },
    setProgressBar: function(bar){
      progressBar = bar;
    }
  }
}

function CommentSorter(){
  var chronological = false;
  function assignOriginalSeqNo(comments){
    comments.each(function(i){
      if( !getSeq(this) ) {
        setSeq(this,i);
      }
    });
  }
  function sortComments(comments){
    var parent = $(comments[0]).parent();
    comments.remove();                     // Remove from DOM to prevent unecessary reflow.
    assignOriginalSeqNo(comments);         // New comments might have been added, so let's make sure we have all numbered.
    
    // About 4x slower than naive sort based on array-index (array position),
    // but in total, with all three steps in this function summed, only a 60% 
    // latency increase is yielded.
    
    comments.sort(function(a,b){
      a = getSeq(a); b = getSeq(b);
      if(chronological) {                  // Not best practice to branching in-loop, but it saves LOCs.
        return a > b ?  1 : a < b ? -1 : 0;
      } else {
        return a > b ? -1 : a < b ?  1 : 0;
      }
    }).appendTo(parent);                   // Re-add the comments to the DOM, in the sorted order.
  }
  return {
    reverse: function(){
      chronological = !chronological;
      this.sort();
    },
    sort: function(){
      var comments = getComments();
      assignOriginalSeqNo(comments);
      sortComments(comments);
    },
    applicable: function(){
      return getComments().size() > 0;
    },
    isChronological: function(){
      return chronological;
    }
  }
}

// Own implementation of the Map interface since Javascript can't store objects as keys.
function Map(){
  keys = [];
  values = {};
  return {
    put: function(k,v){
      var ki;
      if((ki = keys.indexOf(k)) == -1) {
        keys.push(k);
        ki = keys.length - 1;
      } else {
        keys[ki] = k;
      }
      values[ki] = v;
    },
    get: function(k){
      var ki;
      if((ki = keys.indexOf(k)) == -1) {
        return;
      }
      return values[ki];
    },
    clear: function(){
      keys=[];
      values={};
    }
  };
};

function threadComments() {
  var postsByAuthor = {};
  var cache = Map();
  
  function getAuthor(comment){
    return $.trim($(".metadata .author",comment).text());
  }
  function getAt(comment){
    var el = $(".watch-comment-atlink",comment)
    var s = el.text().replace(/^@/,'');
    if(s.length > 0) return s;
  }
  function getParent(comment){
    var atAuthorPosts = postsByAuthor[ getAt(comment) ];
    if(! atAuthorPosts) return;
    var atAuthorPost, i, atPostSeq, refSeq = cache.get(comment);
            
    for(i=0; i < atAuthorPosts.length; i++) {  // Iterate over all the referenced author's comments
      atAuthorPost = atAuthorPosts[i];
      atPostSeq    = cache.get(atAuthorPost);  // Pick the closest at-author comment posted before *this* comment and treat it as the parent.
      if( atPostSeq > refSeq ) {               // A parent must have been posted before *this* comment. (have a lower seq-no)
        break;                                 // If not, then check the next comment by the at-author
      }                                        // Note: Newer comments have higher sequence number than older ones.
    }
    return atAuthorPosts[i];                   // index will be the closest at-author comment or null of no match was found,
  }                                            // thus regarded the parent of *this* comment.
  var comments = getComments().sort(function(a,b){   // Sort chronological
        a = getSeq(a); b = getSeq(b);
        return a > b ?  1 : a < b ? -1 : 0;
      });

  
  comments.each(function(){
    cache.put(this,getSeq(this));
    var author = getAuthor(this);
    if(! author) return;
    if(! postsByAuthor[author] ) postsByAuthor[author] = [];
    postsByAuthor[author].push(this);
  });

  // build tree
  comments.each(function(){
    var atAuthor, parent, parents=[], topAncestor,node,level;
    if(atAuthor = getAt(this)) {
      if(parent = getParent(this)) {
        parents.push(this);
        while(parent) {
          parents.push(parent);
          parent = getParent(parent)
        }
        parents = parents.reverse();   // Make the order in the proper indent-order
        topAncestor = parents.shift(); // Remove top ancestor so we can build a thread underneath that node.
        $(parents).remove();           // Remove top ancestor's children from DOM so we can inject them all underneath the top parent.
        level = parents.length;
        node = topAncestor;
        while(node) {
          if(parents[0]) {                   // Check that there are still parents left to append the node to.
            $(parents[0]).insertAfter(node);
          }
          node = parents.shift();
        }        
        $(this).css('margin-left',''+ level*2 +'em');
      }
    }
  });
}

// ============================================================================
// Wiring function
// ============================================================================
function createUI(){
  var uiAnchor = $('<div class="custom-controls"></div>').insertBefore('#comments-view'),
      sorter  = new CommentSorter(),
      fetcher = new CommentFetcher(function(){ /* sorter.sort(); */ })

  $('<style type="text/css">'+
    '.custom-controls { border:1px solid #BBBBBB; margin-top:0.5em; margin-bottom:0.5em; padding:0.5em; }'+
  '</style>').appendTo('head');

  if( sorter.applicable() ) {
    var sortButton = $('<input type="button" value="Reverse comment ordering"></input>');
    var chronological = false
    sortButton.appendTo(uiAnchor).click(function(){
      sorter.reverse();
    });
  }
  if( sorter.applicable() ) {    
    var threadButton = $('<input type="button" value="Thread comments"></input>');
    threadButton.appendTo(uiAnchor).click(function(){
      sorter.sort();
      threadComments();
    });
  }
  if( fetcher.applicable() ) {
    var fetchButton = $('<input type="button" value="Fetch all comments"></input>');
    fetchButton.appendTo(uiAnchor).click(function(){
        var bar = new ProgressBar(
              getTotalComments(),
              uiAnchor,
              "Comment fetching in progress: {VALUE} %",
              "Comments found: {VALUE}"
            );
      fetchButton.remove();
      fetcher.setProgressBar(bar);
      fetcher.start();
    });
  }
}

createUI();


0 comments:

Post a Comment