csvUpdate.pl

#!/usr/bin/perl

# csvUpdate.pl

# This perl script will be used to update
# customer data in a database.  The script reads in a comma
# separated value file.  Each line of the file contains
# all the info to update 1 entry in a database.
# Each line will have 3 types of fields:
# 1) Some fields in the input field will contain
# data to update.  For example, a field to update
# might contain the amount of money due.
# 2) Other fields in the line will contain data
# that allows you to determine which entry to update
# These fields don't get updated, and are used just
# to find the entry in the entry in the database.
# An example of it would be a person's name, or
# id number.
# 3) The last type of field is just extra data presented
# by the customer that is neither updated nor used to
# identify the entry in the database.  This information
# is ignored by us.

# The xml file will tell us which columns (fields) contain
# data to update, and which contain columns (fields) to
# use to get to the right entry in the database.

# The csv file's 1st line will have the column headers.
# The same names will be in the xml file.

# - Jim Maher

# james.maher@ironmountain.com

use lib "/nc/apps/perlgcc/drcopslib";
use strict;
use warnings;
use XML::LibXML;
use Log::Rolling;

# USAGE
# ./csvUpdate.pl -f csvUpdate.xml

my $AppGroup;
my $Folder;
my $arsquery;      # Holds the string we send to arsdoc that lets us id a unique line in the table.
my $ArsUpdate;     # Holds the string we send to arsdoc that has the updates.
my $cmdLine;
my $DryRun;
my $InputFile;
my $OutputFile;
my $Instance;
my $LogFile;
my $recordsIn = 0;
my $recordsDeleted = 0;
my $UserName;
my $UserPassword;
my $logCounter;
my $DRYRUN = 0;
my $parser = XML::LibXML->new();
my ($arg,$cfgfile) = ();
my $parseline;
my $DOC_NAME;
my $DOC_OFF;
my $DOC_LEN;
my $COMP_OFF;
my $COMP_LEN;
my $Column;        # Used in gathering the various columns from the xml file.
my %KeyName;       # Holds the fields that allow us to identify a specific entry
                  # in a database.
my %MetaDataName;  # A hash table for holding the Meta Data Column names, and 
                  # whether they are integers,or characters.
my @Headers;       # Array to hold the columns titles obtained from they
                  # 1st line of the csv file.
my $ACounter = 0;  # Increments index in TableColumns
my @OneLineOfData; # Holds each line of data broken down by the split command.
my %HashOfOneLine; # Holds one line of data from the csv file as the values,
                  # and the name of each colum as the keys.
my $HKey;          # Variable the allows you to go through the hash tables 1 key at a time.
my $TempKey;       # Used to temporarily store key names.
my $TempValue;     # Used to temporarily store key values.
my $TotalKeyNames; # Total number of entries in the KeyName Hash.
my $TotalMetaDataKeysNames;  # Total number of entries in the hash table "MetaDataName"
my $Counter;       # Just a loop counter.

####################################################
# Get the -f (filename) argument
####################################################
while (defined $ARGV[0] and $ARGV[0] =~ /^-/)
{
 my $arg = shift @ARGV;
 if (lc $arg eq  "-f")
 {
   $cfgfile = shift @ARGV;
 }
}

#
# Setup xml parser structure
#
my $retrieveParms = $parser->parse_file($cfgfile);
# Get the number of seconds to wait between monitoring cycles.
$Instance = (($retrieveParms->findnodes('/UpdateConfig/InstanceName'))->to_literal);
$AppGroup = (($retrieveParms->findnodes('/UpdateConfig/AppGroup'))->to_literal);
$Folder = (($retrieveParms->findnodes('/UpdateConfig/Folder'))->to_literal);
$UserName = (($retrieveParms->findnodes('/UpdateConfig/UserName'))->to_literal);
$UserPassword = (($retrieveParms->findnodes('/UpdateConfig/UserPassword'))->to_literal);
$DryRun = (($retrieveParms->findnodes('/UpdateConfig/DryRun'))->to_literal);
$LogFile = (($retrieveParms->findnodes('/UpdateConfig/LogFile'))->to_literal);
$InputFile = (($retrieveParms->findnodes('/UpdateConfig/InputFile'))->to_literal);
$OutputFile = (($retrieveParms->findnodes('/UpdateConfig/OutputFile'))->to_literal);

# Each table that could be updated will have different columns.
# The xml file will tell you which columns this particular table
# holds.  

# First, find the key columns which contain the data for finding
# the entry in the database.

# Reset the array counter to 0.
$ACounter = 0;

foreach $Column ($retrieveParms->findnodes('/UpdateConfig/Columns/KeyColumns/Column'))
{
 # The Column is now a key in a hash.
 # The Data Type is now the value to that key.
 # Populate the key columns arrays, and increment the counter.
 $TempKey=(($Column->findnodes('./Name'))->to_literal);
 $TempValue=(($Column->findnodes('./DataType'))->to_literal);
 $KeyName{$TempKey} = $TempValue;
}

# Calculate the total number of keys in KeyName.
$TotalKeyNames = keys(%KeyName);

# Now gather the name of the columns that will be updated.

foreach $Column ($retrieveParms->findnodes('/UpdateConfig/Columns/MetaDataColumns/Column'))
{
 # The Column is now a key in a hash.
 # The Data Type is now the value to that key.
 # Populate the key columns arrays, and increment the counter.
 $TempKey=(($Column->findnodes('./Name'))->to_literal);
 $TempValue=(($Column->findnodes('./DataType'))->to_literal);
 $MetaDataName{$TempKey} = $TempValue;
}

# Calculate the total number of keys in MetaDataName
$TotalMetaDataKeysNames = keys(%MetaDataName);

if ( $DryRun eq "true" ) { $DRYRUN = 1 };

unless ( $LogFile =~ m/[a-zA-Z0-9]/ ) { die "FATAL ERROR: No  defined."}



#
# Setup logging
#

my $log = Log::Rolling->new(log_file => $LogFile,
                                max_size => 0,                 # No limit on log rcords
                                wait_attempts => 30,
                                wait_interval => 1,
                                mode => 0600,
                                pid => 0);

sub closeLog { print "\n"; $log->commit; exit 0 }               # Capture log entries in the event of an interrupt
$SIG{INT} = \&closeLog;
$SIG{TERM} = \&closeLog;
$SIG{QUIT} = \&closeLog;

# Document parameter values in the logfile

&logit (" ****** Starting recap of xml parameters ****** ");
&logit (" InstanceName: $Instance");
&logit (" AppGroup: $AppGroup");
&logit (" Folder: $Folder");
&logit (" DryRun: $DryRun");
&logit (" LogFile: $LogFile");
&logit (" InputFile: $InputFile");
&logit (" OutputFile: $OutputFile");
&logit (" ****** Ending recap of xml parameters ****** ");
&logit (" ****** Start of processing ****** ");

open (FILEIN,"$InputFile") || die "Cannot open $InputFile \n";
&logit (" Opened input file: $InputFile ");
open (FILEOUT,">$OutputFile") || die "Cannot open $OutputFile \n";
&logit (" Opened output file: $OutputFile ");

#
# Main processing section.
#
while (defined ($parseline=))
{
 ++$recordsIn;
 chomp($parseline);
 $_ = $parseline;
 #
 @OneLineOfData = split /,/;

 # The very 1st line of the csv file holds the
 # Column titles.  We need to save this information off
 # into a special array.

 if ( $recordsIn == 1 )
 {
   @Headers = @OneLineOfData;

   #We don't want to do any more processing on
   # this line.  Go to the 2nd line of the input file.
   next;
 }

 # At this point, you have an array with header
 # names in it, and an array with customer data in
 # it.  Merge the 2 together to create a hash
 # table because it is easier to work with to
 # pull data.

 foreach $ACounter ( 0 .. $#Headers )
 {
   $HashOfOneLine{$Headers[$ACounter]} = $OneLineOfData[$ACounter];
 }

 # Build the arsquery string.

 # Reset the arsquery string.
 $arsquery = "\\\"where ";

 # Copy the number of entries in the keys hash.
 $Counter = $TotalKeyNames - 1;

 # Grab the keys that from the xml file that tell us which
 # columns are used for finding the entry in the table.
 foreach $HKey ( keys(%KeyName) )
 {
   $arsquery = $arsquery . "$HKey  = ";

   # If the key is a character type, we need to put a back slash, single quotes around the data.
   if ( "$KeyName{$HKey}" eq "CHARACTER" )
   {
     $arsquery = $arsquery . "\\'$HashOfOneLine{$HKey}\\'";
   }
   else
   {
    # Hold numbers.  Don't add single quotes.
     $arsquery = $arsquery . "$HashOfOneLine{$HKey}";
   }

   # Add an " AND " if not the last entry to be processed.
   if ( $Counter > 0)
   {
     $arsquery = $arsquery . " AND ";
     # Decrease counter by 1 so you know when you
     # have reached the end of the hash table.
     $Counter--;
   }
 }
 $arsquery = $arsquery . "\\\"";

 &logit(" Processing records $arsquery\n");
 #&logit (" Processing record with the following fields: $DOC_NAME, $DOC_OFF, $DOC_LEN, $COMP_OFF, $COMP_LEN");

 # We now have a way to get a unique line in the table.
 # We now need to construct the string that will do the actual updating.

 # Reset $ArsUpdate to nothing each time through the loop.
 $ArsUpdate = "";

##########

 # Grab the keys that from the xml file that tell us which
 # columns are used for updating the entry in the table.
 foreach $HKey ( keys(%MetaDataName) )
 {
   $ArsUpdate = $ArsUpdate . " -n \\\"$HKey  = ";

   # If the key is a character type, we need to put a back slash, single quotes around the data.
   if ( "$MetaDataName{$HKey}" eq "CHARACTER" )
   {
     $ArsUpdate = $ArsUpdate . "\\'$HashOfOneLine{$HKey}\\'\\\"";
   }
   else
   {
    # Hold numbers.  Don't add single quotes.
     $ArsUpdate = $ArsUpdate . "$HashOfOneLine{$HKey}\\\"";
   }
 }

 # Log the $ArsUpdate string.
 &logit("Constructed update string: $ArsUpdate");

 &logit (" Constructed query: /usr/lpp/ars/bin/arsdoc update -h $Instance -G $AppGroup -f \"$Folder\" -u admin -p ********  -i $arsquery  $ArsUpdate -v ");
 $cmdLine = "/usr/lpp/ars/bin/arsdoc update -h $Instance -G $AppGroup -f \"$Folder\" -u admin -p $UserPassword -i $arsquery $ArsUpdate -v";		

 unless ($DRYRUN)
 {
   ##system "$cmdLine";
   if ( `echo $?`== 0 ) 
   { 
     ++$recordsDeleted;
     &logit (" Successful processing for: $DOC_NAME, $DOC_OFF, $DOC_LEN ");
   }
   else 
   {
     &logit (" UNSUCCESSFUL PROCESSING: $arsquery ");
     print FILEOUT ("UNSUCCESSFUL PROCESSING FOR:$arsquery\n");	
   }	

 }

}


&logit ("Total input records processed: $recordsIn ");
&logit ("Total successful deletions: $recordsDeleted ");


close (FILEIN);
&logit (" Closed input file: $InputFile ");
close (FILEOUT);
&logit (" Closed output file: $OutputFile ");



sub logit
{
 ++$logCounter;
 $log->entry("$_[0]");

 # Commit every record to the logfile.
 unless ( $logCounter % 1 )
 {
   $log->commit;
 }
}