#!/usr/local/bin/perl ################################################################################ # # httpd log splitter for pwebstats # # Martin Gleeson, gleeson@unimelb.edu.au # # (c) Copyright the University of Melbourne, 1995-7 # # This program is provided free of charge provided the Copyright # notice remains intact. No warranty is made, either expressed or # implied. USE AT YOUR OWN RISK. # # Version 1.1.1 # Last Updated 21st August 1997 # - changed call to unix date command to call to sub time_now # # v1.1 30th April 1997 # - added -b option # sub usage { $usagescreen = < [-i ] [-b ] [-v verbose] * The server log file must be in Common Log Format, in a file specified by the -l option * The interval must be 'daily', 'weekly' or 'monthly', specified by the -i option. Default is weekly. * Places "httpd-log.YYYY-MM-DD" files in the current directory. where YYYY-MM-DD is the date of the last access in each file (the filename has the date in the order "year, month, day" to make pattern matching listings have the files in chronological order). * The -b option specifies a different basename for the filenames produced by log-splitter.pl. e.g. -b apache will produce files names "apache.YYYY-MM-DD" * -v verbose for verbose output, including progress bar. [optional] USAGE print STDERR $usagescreen; exit(0); } ################################################################################ # use newgetopt library to parse the options @options = ("l:s","v:s","i:s","b:s"); if (! &NGetOpt(@options)) { &usage; }; ################################################################################ # Check the options from the conf file and/or the command line if( ! $opt_l ){ &usage; } if( ! -e $opt_l ) { print STDERR "Could not find log file: $opt_l\n"; &usage; } $logfile = $opt_l; $base = "httpd-log"; $base = "$opt_b" if($opt_b); if( $opt_i ) { $interval = $opt_i; } else { $interval = "weekly"; } if( $opt_v ) { $verbose = 1; } %months = ( 'Jan','01', 'Feb','02', 'Mar','03', 'Apr','04', 'May','05', 'Jun','06', 'Jul','07', 'Aug','08', 'Sep','09', 'Oct','10', 'Nov','11', 'Dec','12',); %m_days = ( 'Jan','31', 'Feb','28', 'Mar','31', 'Apr','30', 'May','31', 'Jun','30', 'Jul','31', 'Aug','31', 'Sep','30', 'Oct','31', 'Nov','30', 'Dec','31',); %longmonths = ( '0','January', '1','February', '2','March', '3','April', '4','May', '5','June', '6','July', '7','August', '8','September', '9','October', '10','November', '11','December'); ################################################################################ # # # Main Program # # # ################################################################################ # Initialise counters, etc. $totaldates = 0; $totalhours = 0; $currDateStart = 9999999999; $currDateFinish = 0; $date_now=&time_now(); #`date +"%I:%M %p, %A %B %e %Y"`; chop($date_now); if( $verbose ) { printf STDERR "===================================================================\n"; printf STDERR "log-splitter.pl started at $date_now.\n"; printf STDERR "log-splitter.pl splitting logfile on a $interval basis.\n"; printf STDERR "===================================================================\n\n"; } &read_and_write_logfile($logfile); $date=&time_now(); #`date +"%I:%M %p, %A %B %e %Y"`;chop($date); if( $verbose ) { printf STDERR "===================================================================\n"; printf STDERR "log-splitter.pl finished at $date_now.\n"; printf STDERR "The log file started on $dateStart and ended on $dateFinish.\n"; printf STDERR "===================================================================\n"; exit(0); } ################################################################################ # End of Main Program # ################################################################################ ################################################################################ # Subroutines # ################################################################################ # Subroutine for reading input files and sorting sub read_and_write_logfile { $file=pop(@_); open(LOG_FILE,"$file"); if( $verbose ) { open(COUNT,"/bin/wc -l $file |"); while( ){ chop; ($line_count) = /^\s+(\d+)\s+\S+$/; } close(COUNT); $inc = sprintf "%d", ( $line_count / 50 ); print STDERR "The logfile has $line_count entries.\n"; print STDERR "Processing...\n"; print STDERR "0% 50% 100%\n"; print STDERR "|-----------------------|------------------------|\n"; $counter=0; } while() { if( $verbose ) { $counter++; if( $counter >= $inc ) { $counter = 0; if( $verbose ) { printf STDERR "\#"; } } } # split the input line into its various components $line=$_; ( $host, $rfc931, $user, $longtime, $request, $type, $size) = /^(\S+) (\S+) (\S+) \[(.+)\] \"(.*)\" (\S+) (\S+)\s/; ($datetime) = split(/ /,$longtime,1); ( $date , $time) = split(/:/,$datetime,2); ( $hour , $minute , $second ) = split (/:/,$time); $timeshort = "$hour:$minute"; ( $dd, $mm, $yy ) = split (/\//,$date); $month = $months{$mm}; $datelong = "$yy$month$dd"; $timelong = "$datelong$hour"; if ( ($datelong =~ m/\[/i) || ( $datelong !~ m/^\d?\d\d\d\d\d\d\d$/i )){ next; } if($interval ne "daily") { $day = &day_of_interval($longtime); $ext = ( 6 - $day ) if( $interval eq "weekly" ); if( $interval eq "monthly" ) { $ext = ( $m_days{$mm} - $day ); $ext++ if( ($year % 4 == 0) && ($month eq "Feb")); } if( ($ext + $dd) > $m_days{$mm} ) { $ndays = ($ext + $dd) - $m_days{$mm}; $month++; $yy++ if( $month > 12 ); $month = "01" if( $month > 12 ); $logfile_extension = "$yy" . "$month" . "0" . "$ndays"; } else { $logfile_extension = sprintf "%s", ($datelong + $ext); } } else { $logfile_extension = sprintf "%s", ($datelong); } if ( $logfile_extension eq $current_logfile_extension ) { print OUT $line; $interval_number++; } else { close(OUT); $real_extension = $logfile_extension; $real_extension =~ s/(\d\d\d\d)(\d\d)(\d\d)/${1}-${2}-${3}/; open(OUT,">> $base.$real_extension"); print OUT $line; $current_logfile_extension = $logfile_extension; } #find start & finish dates of the logging period if( $datelong < $currDateStart ) { $currDateStart = $datelong; $dateStart = $date; $dateStart =~ s/\//\ /g; } elsif( $datelong > $currDateFinish ) { $currDateFinish = $datelong; $dateFinish = $date; $dateFinish =~ s/\//\ /g; } } close(OUT); printf STDERR "\n\n"; } sub day_of_interval { $daten = pop(@_); @MONTHSTART=( 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); %MMONTHS=( Jan, 0, Feb, 1, Mar, 2, Apr, 3, May, 4, Jun, 5, Jul, 6, Aug, 7, Sep, 8, Oct, 9, Nov, 10, Dec, 11 ); %YYEARS=( 1992, 2, 1994, 4, 1994, 5, 1995, 6, 1996, 0, 1997, 2, 1998, 3, 1999, 4, 2000, 5, 2001, 0, 2002, 1, 2003, 2, 2004, 4 ); $daten=~s#\[|\/|\:# #g; ($day, $monthname, $year, $hour)=split( ' ', $daten ); $doy=$MONTHSTART[$MMONTHS{$monthname}]+$day; $doy++ if( ($year % 4 ==0) && ($MMONTHS{$monthname} > $MMONTHS{'Feb'})); $dow=(($doy+$YYEARS{$year}) % 7); if( $interval eq "monthly" ) { $day; } elsif ( $interval eq "weekly" ) { $dow; } } ### ----------------------------------------------------------------------- ### sub time_now { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst); ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $hour = "0" . $hour if($hour < 10); $min = "0" . $min if($min < 10); $sec = "0" . $sec if($sec < 10); $now = "$hour:$min:$sec on $mday $longmonths{$mon} $year"; $now; } ### ----------------------------------------------------------------------- ### # newgetopt.pl -- new options parsing # # # Included here for bug fix purposes - Martin Gleeson # # # SCCS Status : @(#)@ newgetopt.pl 1.13 # Author : Johan Vromans # Created On : Tue Sep 11 15:00:12 1990 # Last Modified By: Johan Vromans # Last Modified On: Tue Jun 2 11:24:03 1992 # Update Count : 75 # Status : Okay # This package implements a new getopt function. This function adheres # to the new syntax (long option names, no bundling). # # Arguments to the function are: # # - a list of possible options. These should designate valid perl # identifiers, optionally followed by an argument specifier ("=" # for mandatory arguments or ":" for optional arguments) and an # argument type specifier: "n" or "i" for integer numbers, "f" for # real (fix) numbers or "s" for strings. # If an "@" sign is appended, the option is treated as an array. # Value(s) are not set, but pushed. # # - if the first option of the list consists of non-alphanumeric # characters only, it is interpreted as a generic option starter. # Everything starting with one of the characters from the starter # will be considered an option. # Likewise, a double occurrence (e.g. "--") signals end of # the options list. # The default value for the starter is "-", "--" or "+". # # Upon return, the option variables, prefixed with "opt_", are defined # and set to the respective option arguments, if any. # Options that do not take an argument are set to 1. Note that an # option with an optional argument will be defined, but set to '' if # no actual argument has been supplied. # A return status of 0 (false) indicates that the function detected # one or more errors. # # Special care is taken to give a correct treatment to optional arguments. # # E.g. if option "one:i" (i.e. takes an optional integer argument), # then the following situations are handled: # # -one -two -> $opt_one = '', -two is next option # -one -2 -> $opt_one = -2 # # Also, assume "foo=s" and "bar:s" : # # -bar -xxx -> $opt_bar = '', '-xxx' is next option # -foo -bar -> $opt_foo = '-bar' # -foo -- -> $opt_foo = '--' # # HISTORY # 2-Jun-1992 Johan Vromans # Do not use //o to allow multiple NGetOpt calls with different delimeters. # Prevent typeless option from using previous $array state. # Prevent empty option from being eaten as a (negative) number. # 25-May-1992 Johan Vromans # Add array options. "foo=s@" will return an array @opt_foo that # contains all values that were supplied. E.g. "-foo one -foo -two" will # return @opt_foo = ("one", "-two"); # Correct bug in handling options that allow for a argument when followed # by another option. # 4-May-1992 Johan Vromans # Add $ignorecase to match options in either case. # Allow '' option. # 19-Mar-1992 Johan Vromans # Allow require from packages. # NGetOpt is now defined in the package that requires it. # @ARGV and $opt_... are taken from the package that calls it. # Use standard (?) option prefixes: -, -- and +. # 20-Sep-1990 Johan Vromans # Set options w/o argument to 1. # Correct the dreadful semicolon/require bug. #{ package newgetopt; # $debug = 0; # for debugging # $ignorecase = 0; # ignore case when matching options #} sub NGetOpt { @newgetopt'optionlist = @_; *newgetopt'ARGV = *ARGV; package newgetopt; local ($[) = 0; local ($genprefix) = "(--|-|\\+)"; local ($argend) = "--"; local ($error) = 0; local ($opt, $optx, $arg, $type, $mand, %opctl); local ($pkg) = (caller)[0]; print STDERR "NGetOpt 1.13 -- called from $pkg\n" if $debug; # See if the first element of the optionlist contains option # starter characters. if ( $optionlist[0] =~ /^\W+$/ ) { $genprefix = shift (@optionlist); # Turn into regexp. $genprefix =~ s/(\W)/\\\1/g; $genprefix = "[" . $genprefix . "]"; undef $argend; } # Verify correctness of optionlist. %opctl = (); foreach $opt ( @optionlist ) { $opt =~ tr/A-Z/a-z/ if $ignorecase; if ( $opt !~ /^(\w*)([=:][infse]@?)?$/ ) { print STDERR ("Error in option spec: \"", $opt, "\"\n"); $error++; next; } $opctl{$1} = defined $2 ? $2 : ""; } return 0 if $error; if ( $debug ) { local ($arrow, $k, $v); $arrow = "=> "; while ( ($k,$v) = each(%opctl) ) { print STDERR ($arrow, "\$opctl{\"$k\"} = \"$v\"\n"); $arrow = " "; } } # Process argument list while ( $#ARGV >= 0 ) { # >>> See also the continue block <<< # Get next argument $opt = shift (@ARGV); print STDERR ("=> option \"", $opt, "\"\n") if $debug; $arg = undef; # Check for exhausted list. if ( $opt =~ /^$genprefix/ ) { # Double occurrence is terminator return ($error == 0) if ($opt eq "$+$+") || ((defined $argend) && $opt eq $argend); $opt = $'; # option name (w/o prefix) } else { # Apparently not an option - push back and exit. unshift (@ARGV, $opt); return ($error == 0); } # Look it up. $opt =~ tr/A-Z/a-z/ if $ignorecase; unless ( defined ( $type = $opctl{$opt} ) ) { print STDERR ("Unknown option: ", $opt, "\n"); $error++; next; } # Determine argument status. print STDERR ("=> found \"$type\" for ", $opt, "\n") if $debug; # If it is an option w/o argument, we're almost finished with it. if ( $type eq "" ) { $arg = 1; # supply explicit value $array = 0; next; } # Get mandatory status and type info. ($mand, $type, $array) = $type =~ /^(.)(.)(@?)$/; # Check if the argument list is exhausted. if ( $#ARGV < 0 ) { # Complain if this option needs an argument. if ( $mand eq "=" ) { print STDERR ("Option ", $opt, " requires an argument\n"); $error++; } if ( $mand eq ":" ) { $arg = $type eq "s" ? "" : 0; } next; } # Get (possibly optional) argument. $arg = shift (@ARGV); # Check if it is a valid argument. A mandatory string takes # anything. if ( "$mand$type" ne "=s" && $arg =~ /^$genprefix/ ) { # Check for option list terminator. if ( $arg eq "$+$+" || ((defined $argend) && $arg eq $argend)) { # Push back so the outer loop will terminate. unshift (@ARGV, $arg); # Complain if an argument is required. if ($mand eq "=") { print STDERR ("Option ", $opt, " requires an argument\n"); $error++; undef $arg; # don't assign it } else { # Supply empty value. $arg = $type eq "s" ? "" : 0; } next; } # Maybe the optional argument is the next option? if ( $mand eq ":" && ($' eq "" || $' =~ /[a-zA-Z_]/) ) { # Yep. Push back. unshift (@ARGV, $arg); $arg = $type eq "s" ? "" : 0; next; } } if ( $type eq "n" || $type eq "i" ) { # numeric/integer if ( $arg !~ /^-?[0-9]+$/ ) { print STDERR ("Value \"", $arg, "\" invalid for option ", $opt, " (number expected)\n"); $error++; undef $arg; # don't assign it } next; } if ( $type eq "f" ) { # fixed real number, int is also ok if ( $arg !~ /^-?[0-9.]+$/ ) { print STDERR ("Value \"", $arg, "\" invalid for option ", $opt, " (real number expected)\n"); $error++; undef $arg; # don't assign it } next; } if ( $type eq "s" ) { # string next; } } continue { if ( defined $arg ) { if ( $array ) { print STDERR ('=> push (@', $pkg, '\'opt_', $opt, ", \"$arg\")\n") if $debug; eval ('push(@' . $pkg . '\'opt_' . $opt . ", \$arg);"); } else { print STDERR ('=> $', $pkg, '\'opt_', $opt, " = \"$arg\"\n") if $debug; # eval ('$' . $pkg . '\'opt_' . $opt . " = \$arg;"); # here is the error!!! eval ('$' . $pkg . '\'opt_' . $opt . " = \"$arg\";"); } } } return ($error == 0); } 1;