csvUpdate.pl
#!/usr/bin/perl
# csvUpdate.pl
# This perl script will be used to update
# customer data in a database. The script reads in a comma
# separated value file. Each line of the file contains
# all the info to update 1 entry in a database.
# Each line will have 3 types of fields:
# 1) Some fields in the input field will contain
# data to update. For example, a field to update
# might contain the amount of money due.
# 2) Other fields in the line will contain data
# that allows you to determine which entry to update
# These fields don't get updated, and are used just
# to find the entry in the entry in the database.
# An example of it would be a person's name, or
# id number.
# 3) The last type of field is just extra data presented
# by the customer that is neither updated nor used to
# identify the entry in the database. This information
# is ignored by us.
# The xml file will tell us which columns (fields) contain
# data to update, and which contain columns (fields) to
# use to get to the right entry in the database.
# The csv file's 1st line will have the column headers.
# The same names will be in the xml file.
# - Jim Maher
# james.maher@ironmountain.com
use lib "/nc/apps/perlgcc/drcopslib";
use strict;
use warnings;
use XML::LibXML;
use Log::Rolling;
# USAGE
# ./csvUpdate.pl -f csvUpdate.xml
my $AppGroup;
my $Folder;
my $arsquery; # Holds the string we send to arsdoc that lets us id a unique line in the table.
my $ArsUpdate; # Holds the string we send to arsdoc that has the updates.
my $cmdLine;
my $DryRun;
my $InputFile;
my $OutputFile;
my $Instance;
my $LogFile;
my $recordsIn = 0;
my $recordsDeleted = 0;
my $UserName;
my $UserPassword;
my $logCounter;
my $DRYRUN = 0;
my $parser = XML::LibXML->new();
my ($arg,$cfgfile) = ();
my $parseline;
my $DOC_NAME;
my $DOC_OFF;
my $DOC_LEN;
my $COMP_OFF;
my $COMP_LEN;
my $Column; # Used in gathering the various columns from the xml file.
my %KeyName; # Holds the fields that allow us to identify a specific entry
# in a database.
my %MetaDataName; # A hash table for holding the Meta Data Column names, and
# whether they are integers,or characters.
my @Headers; # Array to hold the columns titles obtained from they
# 1st line of the csv file.
my $ACounter = 0; # Increments index in TableColumns
my @OneLineOfData; # Holds each line of data broken down by the split command.
my %HashOfOneLine; # Holds one line of data from the csv file as the values,
# and the name of each colum as the keys.
my $HKey; # Variable the allows you to go through the hash tables 1 key at a time.
my $TempKey; # Used to temporarily store key names.
my $TempValue; # Used to temporarily store key values.
my $TotalKeyNames; # Total number of entries in the KeyName Hash.
my $TotalMetaDataKeysNames; # Total number of entries in the hash table "MetaDataName"
my $Counter; # Just a loop counter.
####################################################
# Get the -f (filename) argument
####################################################
while (defined $ARGV[0] and $ARGV[0] =~ /^-/)
{
my $arg = shift @ARGV;
if (lc $arg eq "-f")
{
$cfgfile = shift @ARGV;
}
}
#
# Setup xml parser structure
#
my $retrieveParms = $parser->parse_file($cfgfile);
# Get the number of seconds to wait between monitoring cycles.
$Instance = (($retrieveParms->findnodes('/UpdateConfig/InstanceName'))->to_literal);
$AppGroup = (($retrieveParms->findnodes('/UpdateConfig/AppGroup'))->to_literal);
$Folder = (($retrieveParms->findnodes('/UpdateConfig/Folder'))->to_literal);
$UserName = (($retrieveParms->findnodes('/UpdateConfig/UserName'))->to_literal);
$UserPassword = (($retrieveParms->findnodes('/UpdateConfig/UserPassword'))->to_literal);
$DryRun = (($retrieveParms->findnodes('/UpdateConfig/DryRun'))->to_literal);
$LogFile = (($retrieveParms->findnodes('/UpdateConfig/LogFile'))->to_literal);
$InputFile = (($retrieveParms->findnodes('/UpdateConfig/InputFile'))->to_literal);
$OutputFile = (($retrieveParms->findnodes('/UpdateConfig/OutputFile'))->to_literal);
# Each table that could be updated will have different columns.
# The xml file will tell you which columns this particular table
# holds.
# First, find the key columns which contain the data for finding
# the entry in the database.
# Reset the array counter to 0.
$ACounter = 0;
foreach $Column ($retrieveParms->findnodes('/UpdateConfig/Columns/KeyColumns/Column'))
{
# The Column is now a key in a hash.
# The Data Type is now the value to that key.
# Populate the key columns arrays, and increment the counter.
$TempKey=(($Column->findnodes('./Name'))->to_literal);
$TempValue=(($Column->findnodes('./DataType'))->to_literal);
$KeyName{$TempKey} = $TempValue;
}
# Calculate the total number of keys in KeyName.
$TotalKeyNames = keys(%KeyName);
# Now gather the name of the columns that will be updated.
foreach $Column ($retrieveParms->findnodes('/UpdateConfig/Columns/MetaDataColumns/Column'))
{
# The Column is now a key in a hash.
# The Data Type is now the value to that key.
# Populate the key columns arrays, and increment the counter.
$TempKey=(($Column->findnodes('./Name'))->to_literal);
$TempValue=(($Column->findnodes('./DataType'))->to_literal);
$MetaDataName{$TempKey} = $TempValue;
}
# Calculate the total number of keys in MetaDataName
$TotalMetaDataKeysNames = keys(%MetaDataName);
if ( $DryRun eq "true" ) { $DRYRUN = 1 };
unless ( $LogFile =~ m/[a-zA-Z0-9]/ ) { die "FATAL ERROR: No defined."}
#
# Setup logging
#
my $log = Log::Rolling->new(log_file => $LogFile,
max_size => 0, # No limit on log rcords
wait_attempts => 30,
wait_interval => 1,
mode => 0600,
pid => 0);
sub closeLog { print "\n"; $log->commit; exit 0 } # Capture log entries in the event of an interrupt
$SIG{INT} = \&closeLog;
$SIG{TERM} = \&closeLog;
$SIG{QUIT} = \&closeLog;
# Document parameter values in the logfile
&logit (" ****** Starting recap of xml parameters ****** ");
&logit (" InstanceName: $Instance");
&logit (" AppGroup: $AppGroup");
&logit (" Folder: $Folder");
&logit (" DryRun: $DryRun");
&logit (" LogFile: $LogFile");
&logit (" InputFile: $InputFile");
&logit (" OutputFile: $OutputFile");
&logit (" ****** Ending recap of xml parameters ****** ");
&logit (" ****** Start of processing ****** ");
open (FILEIN,"$InputFile") || die "Cannot open $InputFile \n";
&logit (" Opened input file: $InputFile ");
open (FILEOUT,">$OutputFile") || die "Cannot open $OutputFile \n";
&logit (" Opened output file: $OutputFile ");
#
# Main processing section.
#
while (defined ($parseline=))
{
++$recordsIn;
chomp($parseline);
$_ = $parseline;
#
@OneLineOfData = split /,/;
# The very 1st line of the csv file holds the
# Column titles. We need to save this information off
# into a special array.
if ( $recordsIn == 1 )
{
@Headers = @OneLineOfData;
#We don't want to do any more processing on
# this line. Go to the 2nd line of the input file.
next;
}
# At this point, you have an array with header
# names in it, and an array with customer data in
# it. Merge the 2 together to create a hash
# table because it is easier to work with to
# pull data.
foreach $ACounter ( 0 .. $#Headers )
{
$HashOfOneLine{$Headers[$ACounter]} = $OneLineOfData[$ACounter];
}
# Build the arsquery string.
# Reset the arsquery string.
$arsquery = "\\\"where ";
# Copy the number of entries in the keys hash.
$Counter = $TotalKeyNames - 1;
# Grab the keys that from the xml file that tell us which
# columns are used for finding the entry in the table.
foreach $HKey ( keys(%KeyName) )
{
$arsquery = $arsquery . "$HKey = ";
# If the key is a character type, we need to put a back slash, single quotes around the data.
if ( "$KeyName{$HKey}" eq "CHARACTER" )
{
$arsquery = $arsquery . "\\'$HashOfOneLine{$HKey}\\'";
}
else
{
# Hold numbers. Don't add single quotes.
$arsquery = $arsquery . "$HashOfOneLine{$HKey}";
}
# Add an " AND " if not the last entry to be processed.
if ( $Counter > 0)
{
$arsquery = $arsquery . " AND ";
# Decrease counter by 1 so you know when you
# have reached the end of the hash table.
$Counter--;
}
}
$arsquery = $arsquery . "\\\"";
&logit(" Processing records $arsquery\n");
#&logit (" Processing record with the following fields: $DOC_NAME, $DOC_OFF, $DOC_LEN, $COMP_OFF, $COMP_LEN");
# We now have a way to get a unique line in the table.
# We now need to construct the string that will do the actual updating.
# Reset $ArsUpdate to nothing each time through the loop.
$ArsUpdate = "";
##########
# Grab the keys that from the xml file that tell us which
# columns are used for updating the entry in the table.
foreach $HKey ( keys(%MetaDataName) )
{
$ArsUpdate = $ArsUpdate . " -n \\\"$HKey = ";
# If the key is a character type, we need to put a back slash, single quotes around the data.
if ( "$MetaDataName{$HKey}" eq "CHARACTER" )
{
$ArsUpdate = $ArsUpdate . "\\'$HashOfOneLine{$HKey}\\'\\\"";
}
else
{
# Hold numbers. Don't add single quotes.
$ArsUpdate = $ArsUpdate . "$HashOfOneLine{$HKey}\\\"";
}
}
# Log the $ArsUpdate string.
&logit("Constructed update string: $ArsUpdate");
&logit (" Constructed query: /usr/lpp/ars/bin/arsdoc update -h $Instance -G $AppGroup -f \"$Folder\" -u admin -p ******** -i $arsquery $ArsUpdate -v ");
$cmdLine = "/usr/lpp/ars/bin/arsdoc update -h $Instance -G $AppGroup -f \"$Folder\" -u admin -p $UserPassword -i $arsquery $ArsUpdate -v";
unless ($DRYRUN)
{
##system "$cmdLine";
if ( `echo $?`== 0 )
{
++$recordsDeleted;
&logit (" Successful processing for: $DOC_NAME, $DOC_OFF, $DOC_LEN ");
}
else
{
&logit (" UNSUCCESSFUL PROCESSING: $arsquery ");
print FILEOUT ("UNSUCCESSFUL PROCESSING FOR:$arsquery\n");
}
}
}
&logit ("Total input records processed: $recordsIn ");
&logit ("Total successful deletions: $recordsDeleted ");
close (FILEIN);
&logit (" Closed input file: $InputFile ");
close (FILEOUT);
&logit (" Closed output file: $OutputFile ");
sub logit
{
++$logCounter;
$log->entry("$_[0]");
# Commit every record to the logfile.
unless ( $logCounter % 1 )
{
$log->commit;
}
}