#!/usr/bin/perl use strict; use POSIX; die "Usage: ./checkLogs.pl input_dir log_dir missed_dir chunk_size\n" unless @ARGV == 4; my $inputDir = $ARGV[0]; my $outputDir = $ARGV[1]; my $missedDir = $ARGV[2]; my $numFilesPerChunk = $ARGV[3]; my $DEBUG_ME = 1; #count original files in inputdir, calculate how many log files my @fileList = `ls $inputDir`; my @logList = `ls $outputDir/*.log`; if(1 == $DEBUG_ME) { print STDERR "filelist is @fileList...\n"; print STDERR "loglist is @logList...\n"; } #convert logList to logHash my %logHash; for( my $i = 0; $i < $#logList + 1; $i++) { my $file = $logList[$i]; chomp $file; $logHash{$file} = 1; } my $numFiles = ($#fileList + 1.0); my $expectedNumLogs = ceil($numFiles / $numFilesPerChunk); if(1 == $DEBUG_ME) { print STDERR "Num files: $numFiles; numFilesPerChunk: $numFilesPerChunk;\n"; print STDERR "Expected number of log files: $expectedNumLogs\n"; } for( my $i = 0; $i < $expectedNumLogs; $i++) { #check file exists #very important to coordinate log filename pattern in processAcquaintData #format is "subset$subsetIndex.log" my $fName = "$outputDir/subset$i.log"; if(1 == $DEBUG_ME) { print STDERR "checking for log file $fName...\n"; } if(1 != $logHash{$fName}) { print "MISSED CHUNK $i.\n"; ©MissedFiles($i); } else { #scan log file for errors &checkLogForErrors($fName); } } sub copyMissedFiles { my $index = $_[0]; my $start = $index * $numFilesPerChunk; my $end = $start + $numFilesPerChunk; for( my $i = $start; $i < $end; $i++ ) { my $file = $fileList[$i]; if("" ne $file) #may be fewer files in last chunk { chomp $file; if(1 == $DEBUG_ME) { print STDERR "copy source name is $file...\n"; print STDERR "Calling cp $inputDir/$file $missedDir...\n"; } `cp $inputDir/$file $missedDir`; } } } sub checkLogForErrors { # my $fileName = $outputDir."/".$_[0]; my $fileName = $_[0]; open (IN, "<$fileName") or die "Can't open $fileName: $!\n"; my $numSentenceError = "false"; my $emptySentenceError = "false"; while() { if($_ =~ m/null sentence/) { $emptySentenceError = "true"; } elsif($_ =~ m/mismatched number/) { $numSentenceError = "true"; } } if("true" eq $emptySentenceError) { print "ERROR reported in $fileName: empty sentence.\n"; } if("true" eq $numSentenceError) { print "ERROR reported in $fileName:"; print " number of sentences in vs. out doesn't match.\n"; } }