Saturday, July 16, 2011

Sort HTML Tables rows using perl script

Do you have a table and you want to quickly sort it based on first cell values?
This script does just that.


Input:
  1. File containing only table elements ( remove everything else)
  2. First column must be serial number so this will be ignored and only second column will be compared rest all also will be ignored

The Perl Script

#Mandatory: first column must be serial number so this will be ignored and only second column will be compared rest all also will be ignored
#Also keep only tables in the html file

#This assumes
# any html file containing tables which may have tbody. But th must be used to denote the culumn header.
use strict;
use feature "switch";

use Common;

use HTML::Element;

use HTML::TreeBuilder;

my $filename="F:/tmp/t1.html";

my $reverse=1;

my $toc;

sub autoincrement
{

my $table=$_[0];

#see if tboday is present
my @children=$table->content_list();

foreach my $item (@children)
{
if($item->tag() eq "tbody")
{
$table = $item;
last;
}

}




my @unsortedrows = grep { $_->tag() eq "tr" and (($_->content_list())[0]->tag() eq "td")} $table->content_list();

my @sortedrows = sort { uc(($a->content_list())[1]->as_trimmed_text()) cmp uc(($b->content_list())[1]->as_trimmed_text())} @unsortedrows;

@sortedrows = reverse @sortedrows if $reverse;


my $parent=$sortedrows[0]->parent();

for(my $i=0;$i<@unsortedrows;++$i)
{
$unsortedrows[$i]->detach();

}


for(my $i=0;$i<@sortedrows;++$i)
{
$parent->push_content($sortedrows[$i]);
}



}

die "File $filename not found" if !-r $filename;

my $tree = HTML::TreeBuilder->new();

$tree->parse_file($filename);


my @h = $tree->content_list();


my @all_elements=$h[1]->content_list();

foreach my $item (@all_elements)
{

autoincrement($item) if ref(\$item) ne "SCALAR" and $item->tag() eq "table";

}


my @list1=$tree->content_list();

my @list2=$list1[1]->content_list();

foreach my $table (@list2)
{
print $table->as_HTML();
}

# Finally:

 

No comments:

Post a Comment