# Last update: 2003/12/07
#
# HTML Table to CSV
#
# (p) Jerry Nagasaki
#
#
# Converts "simple" n*m HTML tables (w/o merged cells or nested
# tables) to a csv file.
#
# INPUT: Extracted HTML table (
) named "table.html"
# OUTPUT: Generated CSV File (;) named "table.csv"
#
# (optional 2nd output: "Plain Table" named "table2.html")
#
#
# Notes:
# - New-line breaks in -texts are NOT converted to a "simple space"
# - Empty | entries should be filled with a character in the source file
#
open(FILE, ";
close(FILE);
@dummy = "";
@dummy2 = "";
@table2 = "";
$newelem = "";
foreach (@infile)
{
$_ =~ s/\n//g; # remove \n of line
$newelem = $newelem.$_;
}
push (@dummy, $newelem);
$i = 0;
foreach (@dummy)
{
print "$i $_";
$i++;
$_ =~ s/\ / /g; # turn   to "simple space"
$_ =~ tr/ //s; # remove multiple spaces
$_ =~ s/\t//g; # remove tabs
$_ =~ s// |