Print

Katos Perl CSV parser

#!/usr/local/bin/perl -w

# FILE:          convertcsvtosybbcp

# AUTHOR:        Kato Haws 12/12/2005

# PURPOSE:       Convert .csv files (e.g. Excell export files) to my own bcp format.

# PARAMTERS:     None

### Set up usage message.

$USAGE="convertcsvtosybbcp

    Converts csv files (e.g. Excell export files) to ASU's own special Sybase bcp format.

       

        cat xxxxx.csv | convertcsvtosybbcp > xxxxx.dta";

### See if parameter error or request for information.

die "$USAGE\n" if ( $#ARGV > -1 );

### Set up the literals for output and the quote counter.

$bcp_ft = "|<ft>|";  # Output field terminator.

$bcp_rt = "|<RT>|";  # Output row terminator.

$qc=0;  # Double quote counter (in .csv files the quote of concern is the double quote, not the single quote).

$bcp_rt_printed=1;  # Flag to keep track of whether the last line was terminated properly on output.

                                        # (Set it on to keep from printing an error if there are no lines).

### Process the file.

LOOP: while (defined ($line = <>)) {

        ### Do more powerful version of "chomp" on line. (In case ftp was run in non-ascii mode).

        $line =~ s/[\n\r\f]*$//; #Get rid of trailing  newlines, carriage returns, and form feeds.

        ### Keep track of whether last line is properly terminated.

        $bcp_rt_printed=0;

        ### Parse the line one character at a time.

        @char = split '', $line;

        for ($i=0; $i<=$#char; $i++) {

                $qc++ if ($char[$i] eq '"'); # Count quotes.

                #print "DEBUG: char[i]=$char[$i], qc=$qc\n";

                if ($char[$i] eq "," && $qc%2 == 0) { # See if this character is a comma that counts as a field separator.

                        print "$bcp_ft"; # Yes, print the $bcp_ft,

                        $qc=0;           # and reset the $qc (double quote counter).

                } elsif ($char[$i] ne '"' || ($qc > 1 && $qc%2 == 1)) {  # Not a field separating comma, probably a printable character.

                        print "$char[$i]"; # Yes, print it.

                } elsif (! $char[$i] eq '"' && $qc%2 == 0) {     # Character should be a non-printable double quote.

                        die "*** convertcsvtosybbcp: internal error.\n"; # Double checking our assumptions.

                }

        }

        ### We came to the end of this line.

        if ($qc%2 == 0) {  # See if this can be a valid end of data row.

                print "$bcp_rt\n"; # Yes, print out the end of row marker with a new line.

                $bcp_rt_printed=1; # Keep track of the fact that this output line was properly terminated.

                $qc=0;             # Reset the $qc (double quote counter).

        } else {

                print "\n"; # Nope, this cannot be a valid end of data row, just print a new line and keep going with next input line.

        }

}

die "*** convertcsvtosybbcp Warning: last output line was not properly terminated, check input file.\n"

                if ($bcp_rt_printed != 1);


Created by admin. Last Modification: Sunday 17 of June, 2007 01:31:40 UTC by admin.