Katos Perl CSV parser
#!/usr/local/bin/perl -w
# FILE: convertcsvtosybbcp
# AUTHOR: Kato Haws 12/12/2005
# PURPOSE: Convert .csv files (e.g. Excell export files) to my own bcp format.
# PARAMTERS: None
### Set up usage message.
$USAGE="convertcsvtosybbcp
Converts csv files (e.g. Excell export files) to ASU's own special Sybase bcp format.
cat xxxxx.csv | convertcsvtosybbcp > xxxxx.dta";
### See if parameter error or request for information.
die "$USAGE\n" if ( $#ARGV > -1 );
### Set up the literals for output and the quote counter.
$bcp_ft = "|<ft>|"; # Output field terminator.
$bcp_rt = "|<RT>|"; # Output row terminator.
$qc=0; # Double quote counter (in .csv files the quote of concern is the double quote, not the single quote).
$bcp_rt_printed=1; # Flag to keep track of whether the last line was terminated properly on output.
# (Set it on to keep from printing an error if there are no lines).
### Process the file.
LOOP: while (defined ($line = <>)) {
### Do more powerful version of "chomp" on line. (In case ftp was run in non-ascii mode).
$line =~ s/[\n\r\f]*$//; #Get rid of trailing newlines, carriage returns, and form feeds.
### Keep track of whether last line is properly terminated.
$bcp_rt_printed=0;
### Parse the line one character at a time.
@char = split '', $line;
for ($i=0; $i<=$#char; $i++) {
$qc++ if ($char[$i] eq '"'); # Count quotes.
#print "DEBUG: char[i]=$char[$i], qc=$qc\n";
if ($char[$i] eq "," && $qc%2 == 0) { # See if this character is a comma that counts as a field separator.
print "$bcp_ft"; # Yes, print the $bcp_ft,
$qc=0; # and reset the $qc (double quote counter).
} elsif ($char[$i] ne '"' || ($qc > 1 && $qc%2 == 1)) { # Not a field separating comma, probably a printable character.
print "$char[$i]"; # Yes, print it.
} elsif (! $char[$i] eq '"' && $qc%2 == 0) { # Character should be a non-printable double quote.
die "*** convertcsvtosybbcp: internal error.\n"; # Double checking our assumptions.
}
}
### We came to the end of this line.
if ($qc%2 == 0) { # See if this can be a valid end of data row.
print "$bcp_rt\n"; # Yes, print out the end of row marker with a new line.
$bcp_rt_printed=1; # Keep track of the fact that this output line was properly terminated.
$qc=0; # Reset the $qc (double quote counter).
} else {
print "\n"; # Nope, this cannot be a valid end of data row, just print a new line and keep going with next input line.
}
}
die "*** convertcsvtosybbcp Warning: last output line was not properly terminated, check input file.\n"
if ($bcp_rt_printed != 1);
Created by admin. Last Modification: Sunday 17 of June, 2007 01:31:40 UTC by admin.






