+#! /usr/bin/perl -w
+
+# Copyright (c) 2003 Henk Penning, all rights reserved.
+# penning@cs.uu.nl, http://www.cs.uu.nl/staff/henkp.html
+# Version 1.1 was donated to the Apache Software Foundation 2003 Jan 28
+# $Id: mirmon,v 1.36 2004/12/28 17:54:10 henkp Exp $
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+my $PRG = 'mirmon' ;
+my $VER = '$Id: mirmon,v 1.36 2004/12/28 17:54:10 henkp Exp $' ;
+
+use strict ;
+use IO::Pipe ;
+use IO::Select ;
+use Net::hostent ;
+
+my $DEF_CNF = "/etc/$PRG.conf" ;
+
+my %CNF =
+ qw( timeout 300
+ max_probes 25
+ min_poll 1h
+ max_poll 4h
+ min_sync 1d
+ max_sync 2d
+ list_style plain
+ put_histo top
+ randomize 1
+ ) ;
+
+my @REQ_KEYS =
+ qw( web_page state countries mirror_list probe
+ project_name project_url icons
+ ) ;
+my @OPT_KEYS =
+ qw( project_logo min_poll min_sync max_sync list_style htm_top htm_foot
+ put_histo
+ ) ;
+my %CNF_KEYS ; for ( @REQ_KEYS, @OPT_KEYS, keys %CNF )
+ { $CNF_KEYS { $_ } ++ ; }
+
+my $TIM_PAT = '^(\d+)([smhd])$' ;
+my @LIST_STYLE = qw(plain apache) ;
+my @GET_OPTS = qw(all update) ;
+my @PUT_HGRAM = qw(top bottom nowhere) ;
+my $HIST = 14 ;
+my %APA_TYPES = () ; for ( qw(backup ftp http) ) { $APA_TYPES { $_ } ++ ; }
+
+my $prog = substr($0,rindex($0,'/')+1) ;
+my $Usage = <<USAGE ;
+Usage: $prog [ -v ] [ -q ] [ -t timeout ] [ -get opt ] [ -c conf ]
+option v : be verbose
+option q : be quiet
+option t : set timeout [ default $CNF{timeout} ] ;
+option get : 'all' : probe all sites
+ : 'update' : probe a selection of the sites (see doc)
+option c : configuration file [ default $DEF_CNF ]
+-------------------------------------------------------------------
+Documentation : the program contains 'pod' style documentation.
+Extract the doc with 'pod2text $prog' or 'pod2html $prog OUT', etc.
+-------------------------------------------------------------------
+USAGE
+sub Usage { die "$_[0]$Usage" ; }
+sub Error { die "$prog: $_[0]\n" ; }
+sub Warn { warn "$prog: $_[0]\n" ; }
+
+# usage: &GetOptions(ARG,ARG,..) defines $opt_ID as 1 or user spec'ed value
+# usage: &GetOptions(\%opt,ARG,ARG,..) defines $opt{ID} as 1 or user value
+# ARG = 'ID' | 'ID=SPC' | 'ID:SPC' for no-arg, required-arg or optional-arg
+# ID = perl identifier
+# SPC = i|f|s for integer, fixedpoint real or string argument
+
+use Getopt::Long ;
+Getopt::Long::config('no_ignore_case') ;
+# Usage() unless GetOptions() ;
+my %opt = () ; Usage() unless GetOptions (\%opt,'v','q','t=i','get=s','c=s') ;
+Usage("Arg count\n") unless @ARGV >= 0 ;
+
+my %WGT ;
+my $GET = IO::Select -> new () ;
+my %URL ;
+my %RES ;
+my %OLD ;
+my %LST ;
+my %CCS ;
+my %HREF ;
+
+# <META HTTP-EQUIV=Expires CONTENT="Tue, 04 Dec 1993 21:29:02 GMT">
+sub exp_date
+ { my @day = qw(Sun Mon Tue Wed Thu Fri Sat) ;
+ my @mon = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) ;
+ my @gmt = gmtime time + 3600 ;
+ sprintf "%s, %02d %s %4d %02d:%02d:%02d GMT"
+ , $day [ $gmt [ 6 ] ]
+ , $gmt [ 3 ]
+ , $mon [ $gmt [ 4 ] ]
+ , $gmt [ 5 ] + 1900
+ , @gmt [ 2, 1, 0 ]
+ ;
+ }
+
+sub find_conf
+ { return $opt{c} if $opt{c} ;
+ my $HOME = ( getpwuid $< ) [ 7 ] or Error "can get homedir '$<' ($!)" ;
+ my @LIST = ( "$PRG.conf" , "$HOME/.$PRG.conf" , $DEF_CNF ) ;
+ for my $conf ( @LIST ) { return $conf if -f $conf ; }
+ Error sprintf "can't find a config file :\n %s" , join "\n ", @LIST ;
+ }
+
+sub show_conf
+ { print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n" ;
+ for my $key ( sort keys %CNF )
+ { next if $key =~ m/^_/ ;
+ print "show_conf : $key = '$CNF{$key}'\n" ;
+ }
+ for my $key ( sort keys %HREF )
+ { printf "show_conf : for site '%s' use instead\n '%s'\n",
+ $key, $HREF { $key } if $opt{v} ;
+ }
+ printf "show_conf : included '%s'\n", join "', '", @{ $CNF{_include} } ;
+ print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n" ;
+ }
+
+sub get_conf ;
+
+sub get_conf
+ { my $FILE = shift ;
+
+ if ( grep $_ eq $FILE, @{ $CNF {_include} } )
+ { Error "already included : '$FILE'" ; }
+ else
+ { push @{ $CNF {_include} }, $FILE ; }
+
+ open FILE, $FILE or Error "can't open '$FILE' ($!)" ;
+ my $CONF = join "\n", grep /./, <FILE> ;
+ close FILE ;
+
+ $CONF =~ s/\t/ /g ; # replace tabs
+ $CONF =~ s/^[+ ]+// ; # delete leading space, plus
+ $CONF =~ s/\n\n\s+/ /g ; # glue continuation lines
+ $CONF =~ s/\n\n\+\s+//g ; # glue concatenation lines
+ $CONF =~ s/\n\n\./\n/g ; # glue concatenation lines
+
+ chop $CONF ;
+ print "--$CONF--\n" if $opt{d} ;
+ for ( grep ! /^#/, split /\n\n/, $CONF )
+ { my ($key,$val) = split ' ', $_, 2 ;
+ $val = '' unless defined $val ;
+ print "conf '$FILE' : key '$key', val '$val'\n" if $opt{d} ;
+ if ( exists $CNF_KEYS { $key } )
+ { $CNF { $key } = $val ; }
+ elsif ( $key eq 'site_url' )
+ { my ( $site, $url ) = split ' ' , $val ;
+ $url .= '/' unless $url =~ m!/$! ;
+ $HREF { lc $site } = $url ;
+ printf "config : for site '%s' use instead\n '%s'\n",
+ $site, $url if $opt{v} ;
+ }
+ elsif ( $key eq 'env' )
+ { my ( $x, $y ) = split ' ' , $val ;
+ $ENV { $x } = $y ;
+ printf "config : setenv '%s'\n '%s'\n", $x, $y if $opt{v} ;
+ }
+ elsif ( $key eq 'no_randomize' )
+ { $CNF { randomize } = 0 ; }
+ elsif ( $key eq 'include' )
+ { get_conf $val ; }
+ elsif ( $key eq 'show' )
+ { show_conf unless $opt{q} ; }
+ elsif ( $key eq 'exit' )
+ { Error 'exit per config directive' ; }
+ elsif ( $key eq 'max_age' )
+ { $CNF { max_sync } = $val ; }
+ else
+ { show_conf ;
+ Error "unknown keyword '$key' (value '$val')" ;
+ }
+ }
+ }
+
+sub get_conf_opt
+ { my $err = '' ;
+ get_conf find_conf ;
+ $CNF { timeout } = $opt{t} if $opt{t} ;
+ for my $key ( @REQ_KEYS )
+ { unless ( exists $CNF { $key } )
+ { $err .= "$prog error: missing config for '$key'\n" ; }
+ }
+ for my $key ( qw(min_poll max_poll max_sync min_sync) )
+ { my $max = $CNF { $key } ;
+ unless ( $max =~ /$TIM_PAT/o )
+ { $err .= "$prog error: $key ($max) doesn't match /$TIM_PAT/\n" ; }
+ }
+ unless ( grep $CNF { list_style } eq $_, @LIST_STYLE )
+ { $err .= sprintf "%s : error: unknown 'list_style' '%s'\n",
+ $prog, $CNF { list_style } ;
+ }
+ unless ( grep $CNF { put_histo } eq $_, @PUT_HGRAM )
+ { $err .= sprintf "%s : error: unknown 'put_histo' '%s'\n",
+ $prog, $CNF { put_histo } ;
+ }
+ if ( $opt { get } and not grep $opt { get } eq $_, @GET_OPTS )
+ { $err .= sprintf "%s : error: unknown 'get option' '%s'\n",
+ $prog, $opt { get } ;
+ }
+ Error $err if $err ;
+ $opt{q} = 0 if $opt{v} ;
+ }
+
+sub tim_to_s
+ { my $tim = shift ;
+ my %tab = ( 's' => 1, 'm' => 60, 'h' => 60 * 60, 'd' => 60 * 60 * 24 ) ;
+ Error "wrong time '$tim'" unless $tim =~ /$TIM_PAT/o ;
+ my $m = $1 ; my $u = $2 ;
+ return $m * $tab { $u } ;
+ }
+
+sub aprx_eq { my ( $t1, $t2 ) = @_ ; abs ( $t1 - $t2 ) < 60 ; }
+sub aprx_ge { my ( $t1, $t2 ) = @_ ; $t1 > $t2 or aprx_eq $t1, $t2 ; }
+sub aprx_le { my ( $t1, $t2 ) = @_ ; $t1 < $t2 or aprx_eq $t1, $t2 ; }
+sub aprx_gt { my ( $t1, $t2 ) = @_ ; $t1 > $t2 and not aprx_eq $t1, $t2 ; }
+sub aprx_lt { my ( $t1, $t2 ) = @_ ; $t1 < $t2 and not aprx_eq $t1, $t2 ; }
+
+sub pr_interval
+ { my $s = shift ;
+ my ( $magn, $unit ) ;
+ my $mins = $s / 60 ; my $m = int ( $mins + 0.5 ) ;
+ my $hours = $s / ( 60 * 60 ) ; my $h = int ( $hours + 0.5 ) ;
+
+ if ( $s < 50 )
+ { $magn = $s ; $unit = 'second' ; }
+ elsif ( $m < 50 )
+ { $magn = $m ; $unit = 'minute' ; }
+ elsif ( $h < 36 )
+ { $magn = $h ; $unit = 'hour' ; }
+ else
+ { $magn = sprintf "%.1f", $hours / 24 ; $unit = 'day' ; }
+
+ $unit .= 's' unless $magn == 1 ;
+
+ return "$magn $unit" ;
+ }
+
+sub max_age1
+ { ( tim_to_s $CNF { min_sync } ) + ( tim_to_s $CNF { max_poll } ) ; }
+sub max_age2
+ { ( tim_to_s $CNF { max_sync } ) + ( tim_to_s $CNF { max_poll } ) ; }
+
+sub max_vrfy
+ { ( tim_to_s $CNF { min_poll } ) + ( tim_to_s $CNF { max_poll } ) ; }
+
+sub age_code
+ { my $time = shift ;
+ return 'z' unless $time =~ /^\d+$/ ;
+ return
+ ( ( aprx_ge ( $time, $^T - max_age1 ) )
+ ? 's'
+ : ( aprx_ge ( $time, $^T - max_age2 ) ? 'b' : 'f' )
+ ) ;
+ }
+
+sub err
+ { my $url = shift ;
+ my $stat = shift ;
+ printf "*** %-10s %s\n", $stat, $url unless $opt{q} ;
+ my ( $time, $vrfy, $hstp, $hsts ) ;
+ if ( exists $OLD { $url } )
+ { $time = $OLD { $url } [ 0 ] ;
+ $vrfy = $OLD { $url } [ 2 ] ;
+ $hstp = substr $OLD { $url } [ 3 ], 1 - $HIST ;
+ $hsts = $OLD { $url } [ 4 ] ;
+ }
+ else
+ { $time = 'undef' ;
+ $vrfy = 'undef' ;
+ $hstp = '' ;
+ $hsts = '' ;
+ }
+ $RES { $url } = [ $time, $stat, $vrfy, $hstp . 'f', $hsts, $^T ] ;
+ }
+
+sub res
+ { my $url = shift ;
+ my $time = shift ;
+ my $stat = shift ;
+ my $hstp =
+ ( exists $OLD { $url }
+ ? substr ( $OLD { $url } [ 3 ], 1 - $HIST )
+ : ''
+ ) ;
+ my $hsts = ( exists $OLD { $url } ? $OLD { $url } [ 4 ] : '') ;
+ printf "result %d %s\n", $time, $url if $opt{v} ;
+ $RES { $url } = [ $time, $stat, $^T, $hstp . 's', $hsts, $^T ] ;
+ }
+
+sub get_state
+ { my $STT = shift ;
+ open STT, $STT or Error "can't open '$STT' ($!)" ;
+ while ( <STT> )
+ { chop ;
+ my ( $url, $time, $stat, $vrfy, $hstp, $hsts, $lprb ) = split ' ' ;
+ $stat =~ s/_/ /g ;
+ $hstp = '' unless defined $hstp ;
+ $hsts = '' unless defined $hsts ;
+ $hsts = '' if $hsts eq 'undef' ;
+ $lprb = 'undef' unless defined $lprb ;
+ $OLD { $url } = [ $time, $stat, $vrfy, $hstp, $hsts, $lprb ] ;
+ }
+ close STT ;
+ }
+
+sub check_hist
+ { my $time = shift ;
+ my $hsts = shift ;
+ printf "check_hist: last '$time' hsts '$hsts'\n" if $opt{d} ;
+
+ my $res = $hsts ;
+ my ( $stmp, $hist ) ;
+
+ if ( $hsts eq '' )
+ { $stmp = 0 ; $hist = '' ; }
+ else
+ { ( $stmp, $hist ) = split '-', $hsts ; }
+
+ if ( aprx_le $stmp, $^T - tim_to_s '1d' )
+ { $res = sprintf "%s-%s%s"
+ , $^T
+ , substr ( $hist, 1 - $HIST )
+ , age_code ( $time )
+ ;
+ }
+ return $res ;
+ }
+
+sub put_state
+ { my $STT = shift ;
+ my $TMP = "$STT.tmp" ;
+ open TMP, ">$TMP" or Error "can't write '$TMP' ($!)" ;
+ for my $url ( sort keys %RES )
+ { $RES { $url } [ 4 ]
+ = check_hist $RES { $url } [ 0 ], $RES { $url } [ 4 ] ;
+ my @OUT = @{ $RES { $url } } ;
+ $OUT [ 1 ] =~ s/\s/_/g ;
+ printf TMP "%s %s\n", $url, join ' ', @OUT
+ or Error "can't print to $TMP ($!)" ;
+ }
+ close TMP ;
+ if ( -z $TMP )
+ { Warn "wrote empty state file; keeping previous version" ; }
+ else
+ { rename $TMP, $STT or Error "can't rename '$TMP', '$STT' ($!)" ; }
+ }
+
+sub get_ccs
+ { my $CCS = shift ;
+ open CCS, $CCS or Error "can't open '$CCS' ($!)" ;
+ while ( <CCS> )
+ { chop ;
+ next if /^#/ ;
+ my ( $code, $dash, $reg ) = split ' ', $_, 3 ;
+ $CCS { lc $code } = lc $reg ;
+ }
+ close CCS ;
+ }
+
+sub type_site
+ { my $url = shift ;
+ my ( $type, $site, $home ) ;
+ if ( $url =~ m!^(ftp|http)://([^/:]+)(:\d+)?/! )
+ { $type = $1 ; $site = $2 ; $home = $& ; }
+ return $type, $site, $home ;
+ }
+
+sub type { my ( $t, $s, $h) = type_site $_[0] ; $t ; }
+sub site { my ( $t, $s, $h) = type_site $_[0] ; $s ; }
+sub home { my ( $t, $s, $h) = type_site $_[0] ; $h ; }
+
+sub get_list
+ { my $LST = shift ;
+ my ( $reg, $url ) ;
+ open LST, $LST or Error "can't open '$LST' ($!)" ;
+ while ( <LST> )
+ { chop ;
+ next if /^#/ ;
+ next if /^\s*$/ ;
+ if ( $CNF { list_style } eq 'plain' )
+ { ( $reg, $url ) = split ' ' ;
+ unless ( $url =~ m!/$! )
+ { print "*** mirmon appended '/' to $url\n" unless $opt{q} ;
+ $url .= '/' ;
+ }
+ }
+ elsif ( $CNF { list_style } eq 'apache' )
+ { my $apache_type ;
+ ( $apache_type, $reg, $url ) = split ' ' ;
+ unless ( defined $APA_TYPES { $apache_type } )
+ { print "*** strange type : $apache_type\n" unless $opt{q} ;
+ next ;
+ }
+ unless ( $url =~ m!/$! )
+ { print "*** missing '/' in $url\n" unless $opt{q} ;
+ $url .= '/' ;
+ }
+ }
+
+ my $site = site $url ;
+ my $type = type $url ;
+
+ unless ( defined $site )
+ { print "*** strange url : '$url'\n" unless $opt{q} ; next ; }
+
+ $LST { $url } = [ $type , $site, $reg ] ;
+ }
+ }
+
+sub url { sprintf '<A HREF="%s">%s</A>', $_[0], $_[1] ; }
+sub nam { sprintf '<A NAME="%s">%s</A>', $_[0], $_[1] ; }
+sub SMA { sprintf "<FONT SIZE=\"-1\">%s</FONT>", $_[0] ; }
+sub BLD { sprintf "<B>%s</B>", $_[0] ; }
+sub NSS { sprintf SMA('%s site%s'), $_[0], ( $_[0] == 1 ? '' : 's' ) ; }
+sub TAB { sprintf "<TABLE BORDER=2 CELLPADDING=3>%s</TABLE>", $_[0] ; }
+sub TR { sprintf "<TR>%s</TR>\n", $_[0] ; }
+sub TH { sprintf "<TH>%s</TH>\n", $_[0] ; }
+sub TD { sprintf "<TD>%s</TD>\n", $_[0] ; }
+sub TDr { sprintf "<TD ALIGN=\"RIGHT\">%s</TD>\n", $_[0] ; }
+sub RED { sprintf "<FONT COLOR=\"RED\">%s</FONT>", $_[0] ; }
+sub GRN { sprintf '<FONT COLOR="GREEN">%s</FONT>', $_[0] ; }
+
+sub htmlquote
+ { my $x = shift ;
+ $x =~ s/&/&/g ;
+ $x =~ s/</</g ;
+ $x =~ s/>/>/g ;
+ return $x ;
+ }
+
+sub diff
+ { my $time = shift ;
+ my $max = shift ;
+ my $res ;
+
+ if ( $time == $^T )
+ { $res = BLD 'renewed' ; }
+ else
+ { $res = pr_interval $^T - $time ;
+ $res = BLD RED $res if aprx_lt $time, $max ;
+ }
+ return $res ;
+ }
+
+sub img_sf_cnt
+ { sprintf '<IMG BORDER=1 SRC="%s/mm%s%02d.gif" ALT="">'
+ , $CNF { icons }, $_[0], $_[1] ;
+ }
+
+sub img_sf { img_sf_cnt $_[0], 1 ; }
+
+sub show_hist
+ { my $hst = shift ;
+ return '' unless $hst =~ m/^[sbfz]+$/ ;
+ if ( length $hst == $HIST and $hst =~ /^(s*b)s*$/ )
+ { return img_sf_cnt 'sb', length $1 ; }
+ elsif ( length $hst == $HIST and $hst =~ /^(s*f)s*$/ )
+ { return img_sf_cnt 'sf', length $1 ; }
+ elsif ( length $hst == $HIST and $hst =~ /^(s*b)fs*$/ )
+ { return img_sf_cnt 'sbf', length $1 ; }
+ my $res = '' ;
+ my $cnt = 1 ;
+ my $prf = substr $hst, 0, 1 ;
+ $hst = substr $hst, 1 ;
+ while ( $hst ne '' )
+ { if ( substr ( $prf, 0, 1 ) eq substr ( $hst, 0, 1 ) )
+ { $cnt ++ ;
+ $hst = substr $hst, 1 ;
+ }
+ else
+ { $res .= img_sf_cnt $prf, $cnt ;
+ $prf = substr $hst, 0, 1 ;
+ $hst = substr $hst, 1 ;
+ $cnt = 1 ;
+ }
+ }
+ $res .= img_sf_cnt $prf, $cnt if $cnt ;
+ return $res ;
+ }
+
+sub show_hist_age
+ { my $hsts = shift ;
+ my $time = shift ;
+ return '' if $hsts eq '' ;
+ my ( $t, $h ) = split '-', $hsts ;
+ if ( aprx_lt $t, $^T ) { $h .= age_code $time ; }
+ return show_hist substr $h, - $HIST ;
+ }
+
+sub gen_histogram_probes
+ { my ( $time, $stat, $vrfy, $hstp, $hsts, $lprb ) ;
+ my %tab = () ;
+ my %bad = () ;
+ my $res = '' ;
+ my $s_cnt = 0 ;
+ my $f_cnt = 0 ;
+ my $hr_min ;
+ my $hr_max ;
+ return '' unless scalar keys %RES ;
+ for my $url ( keys %RES )
+ { ( $time, $stat, $vrfy, $hstp, $hsts, $lprb ) = @{ $RES { $url } } ;
+ my $hr = int ( ( $^T - $lprb ) / 3600 + 0.5 ) ;
+ $hr_min = $hr if ! defined $hr_min or $hr < $hr_min ;
+ $hr_max = $hr if ! defined $hr_max or $hr > $hr_max ;
+ if ( $stat eq 'ok' )
+ { $tab { $hr } ++ ; $s_cnt ++ ; }
+ else
+ { $bad { $hr } ++ ; $f_cnt ++ ; }
+ }
+ $res = TR
+ ( TH ( 'hours ago' )
+ . TH ( 'succ' )
+ . TH ( 'fail' )
+ . TH sprintf
+ ( '%s %s, %s %s'
+ , $s_cnt , GRN ( 'successful' )
+ , $f_cnt , RED ( 'failed' )
+ )
+ ) ;
+
+ my $max = 0 ;
+ for my $x ( keys %tab )
+ { my $tot = $tab { $x } + ( $bad { $x } || 0 ) ;
+ $max = $tot if $max < $tot ;
+ } ;
+
+ return "<BLOCKQUOTE>\nnothing yet\n</BLOCKQUOTE>\n" unless $max ;
+
+ for my $hr ( $hr_min .. $hr_max )
+ { my $x = $tab { $hr } || 0 ;
+ my $y = $bad { $hr } || 0 ;
+ my $n = int ( $x / $max * $HIST ) ;
+ my $b = int ( $y / $max * $HIST ) ;
+ $res .= TR
+ ( TDr ( $hr )
+ . TDr ( $x )
+ . TDr ( $y )
+ . TD
+ ( ( $n ? img_sf_cnt ( 's', $n ) : '' )
+ . ( $b ? img_sf_cnt ( 'f', $b ) : '' )
+ . ( ( $n + $b ) ? '' : ' ' )
+ )
+ ) ;
+ }
+ return "<BLOCKQUOTE>\n" . TAB ( $res ) . "</BLOCKQUOTE>\n" ;
+ }
+
+sub gen_histogram
+ { my $MAX_H = max_age1 ;
+ my $MAX_h = 1 +
+ ( ( 20 * 3600 <= $MAX_H and $MAX_H <= 36 * 3600 )
+ ? int ( $MAX_H / 3600 )
+ : 25
+ ) ;
+ my $MAX_O = max_age2 ;
+ my $MAX_o = int ( $MAX_O / 3600 + 0.5 ) ;
+ my $H = 18 ;
+ my %W = ( 'old' => 1, 'ded' => 1, 'bad' => 1 ) ;
+ my %Wmx = ( 'old' => 5, 'ded' => 3, 'bad' => 3 ) ;
+ my %tab ;
+ my %hst ;
+ my $res ;
+ for ( my $x = 0 ; $x < $MAX_h ; $x ++ ) { $tab { $x } = 0 ; }
+ $tab { old } = 0 ; $tab { ded } = 0 ; $tab { bad } = 0 ;
+ for my $url ( keys %RES )
+ { my $time = $RES { $url } [ 0 ] ;
+ if ( $time =~ /^\d+$/ )
+ { my $s = $^T - $time ;
+ my $hr = int ( $s / $MAX_H * ( $MAX_h - 1 ) + 0.5 ) ;
+ if ( $s <= $MAX_H ) { $tab { $hr } ++ ; }
+ elsif ( $s <= $MAX_O ) { $tab { old } ++ ; }
+ else { $tab { ded } ++ ; }
+ }
+ else
+ { $tab { bad } ++ ; }
+ }
+ my $max = 0 ;
+ for ( grep ! exists $Wmx { $_ }, keys %tab )
+ { $max = $tab { $_ } if $tab { $_ } > $max ; }
+
+ my %bad ;
+
+ for my $aux ( keys %Wmx )
+ { $bad { $aux } = $tab { $aux } ;
+ if ( $bad { $aux } > $max )
+ { $W { $aux } = $Wmx { $aux } ;
+ my $d = int ( $bad { $aux } / $W { $aux } ) ;
+ for ( my $i = 1 ; $i < $W { $aux } ; $i++ )
+ { $tab { $aux . $i } = $d ;
+ if ( $bad { $aux } % $Wmx { $aux } > $i )
+ { $tab { $aux . $i } ++ ;
+ $tab { $aux } -- ;
+ }
+ }
+ $tab { $aux } -= ( $W { $aux } - 1 ) * $d ;
+ $max = $tab { $aux } if $max < $tab { $aux } ;
+ }
+ }
+
+# if ( $opt{v} )
+# { for my $hr ( keys %tab )
+# { printf "tab '%s' = '%s'\n", $hr, $tab { $hr } ; }
+# }
+
+ return 'nothing yet' unless $max ;
+ $H = $max if 8 <= $max and $max <= 26 ;
+ for ( keys %tab )
+ { $hst { $_ } = int ( $H * $tab { $_ } / $max + 0.5 ) ; }
+ my @keys = sort { $a <=> $b } grep /^\d+$/, keys %hst ;
+ my $tab_hr = 0 ;
+ for my $hr ( @keys ) { $tab_hr += $tab { $hr } ; }
+ push @keys
+ , grep ( m/^old/, sort keys %tab )
+ , grep ( m/^ded/, sort keys %tab )
+ , grep ( m/^bad/, sort keys %tab )
+ ;
+ for ( my $h = $H ; $h > 0 ; $h -- )
+ { $res .= "<TR>\n" ;
+ $res .= sprintf "<TH ROWSPAN=3 VALIGN=\"TOP\">↑</TH>\n"
+ if $h == $H ;
+ $res .= sprintf '<TD ROWSPAN=%d ALIGN="CENTER">%s</TD>' . "\n"
+ , $H-6, NSS ( $max ) if $h == $H - 3 ;
+ $res .= sprintf "<TH ROWSPAN=3 VALIGN=\"BOTTOM\">↓</TH>\n"
+ if $h == 3 ;
+ for my $x ( @keys )
+ { $res .= sprintf "<TH>%s</TH>\n"
+ , ( ( $hst { $x } >= $h )
+ ? img_sf
+ ( $x =~ /^\d+$/
+ ? 's'
+ : ( $x =~ /^old/ ? 'b' : ( $x =~ /^ded/ ? 'f' : 'z' ) )
+ )
+ : ( ( $h == 1 and $hst { $x } == 0 )
+ ? sprintf
+ ( '<IMG SRC="%s/bar.gif" ALT="" BORDER=0>'
+ , $CNF { icons }
+ )
+ : ''
+ )
+ ) ;
+ }
+ $res .= "</TR>\n" ;
+ }
+
+ my $HR = '<HR SIZE=2 WIDTH="95%%" NOSHADE>' ;
+
+ $res .= "<TR>\n" ;
+ $res .= sprintf "<TD COLSPAN=%d>$HR</TD>\n", 1 ;
+ $res .= sprintf "<TD COLSPAN=%d>$HR</TD>\n", $MAX_h ;
+ $res .= sprintf "<TD COLSPAN=%d>$HR</TD>\n", $W { old } ;
+ $res .= sprintf "<TD COLSPAN=%d>$HR</TD>\n", $W { ded } ;
+ $res .= sprintf "<TD COLSPAN=%d>$HR</TD>\n", $W { bad } ;
+ $res .= "</TR>\n" ;
+
+ $res .= "<TR>\n" ;
+ $res .= '<TD ALIGN="CENTER"> <B>age</B> → </TD>' ;
+
+ $res .= "<TH>|</TH>\n" ;
+ $res .= sprintf
+ ( '<TD COLSPAN=%d ALIGN="CENTER">'
+ . '← 0 ≤ <B>age</B> ≤ %s →'
+ . "</TD>\n"
+ , $MAX_h - 2, pr_interval ( $MAX_H )
+ )
+ ;
+ $res .= "<TH>|</TH>\n" ;
+ $res .= sprintf
+ ( '<TD ALIGN="CENTER" COLSPAN=%d>'
+ . ' %sh < %s ≤ %sh '
+ . "</TD>\n"
+ , $W { old }, int($MAX_H/60/60) , BLD ( 'age' ), $MAX_o
+ ) ;
+ $res .= sprintf
+ ( '<TD ALIGN="CENTER" COLSPAN=%d>'
+ . ' <FONT COLOR="RED">old</FONT> '
+ . "</TD>\n"
+ , $W { ded }
+ ) ;
+ $res .= sprintf
+ ( '<TD ALIGN="CENTER" COLSPAN=%d>'
+ . ' <FONT COLOR="RED">bad</FONT> '
+ . "</TD>\n"
+ , $W { bad }
+ ) ;
+ $res .= "</TR>\n" ;
+
+ my $FRMT = '<TD ALIGN="CENTER" COLSPAN=%d> %s </TD>' ;
+
+ $res .= "<TR>\n" ;
+ $res .= sprintf "$FRMT\n", 1, NSS scalar keys %RES ;
+ $res .= "<TH>|</TH>\n" ;
+ $res .= sprintf "$FRMT\n", $MAX_h - 2, NSS $tab_hr ;
+ $res .= "<TH>|</TH>\n" ;
+ $res .= sprintf "$FRMT\n", $W { old }, NSS $bad { old } ;
+ $res .= sprintf "$FRMT\n", $W { ded }, NSS $bad { ded } ;
+ $res .= sprintf "$FRMT\n", $W { bad }, NSS $bad { bad } ;
+ $res .= "</TR>\n" ;
+
+ $res = "<TABLE CELLSPACING=0 CELLPADDING=1 BORDER=0>\n$res\n</TABLE>\n" ;
+ $res = sprintf "<TABLE CELLPADDING=5 BORDER=4>%s</TABLE>\n"
+ , "<TR><TH>\n$res\n</TH></TR>" ;
+ if ( $max == $H )
+ { $res .= sprintf "<BR>units %s %s %s %s represent one mirror site.\n"
+ , img_sf ( 's' ), img_sf ( 'f' ), img_sf ( 'b' ), img_sf ( 'z' ) ;
+ }
+ else
+ { $res .= sprintf
+ "<BR>each %s %s %s %s unit represents %s mirror sites.\n"
+ , img_sf ( 's' ) , img_sf ( 'f' ), img_sf ( 'b' ) , img_sf ( 'z' )
+ , sprintf ( "%.1f", $max / $H )
+ }
+ return $res ;
+ }
+
+sub revdom { my $dom = shift ; join '.', reverse split /\./, $dom ; }
+
+sub by_type_site
+ { my $a_type = $a -> [ 0 ] ;
+ my $b_type = $b -> [ 0 ] ;
+ my $a_site = $a -> [ 2 ] ;
+ my $b_site = $b -> [ 2 ] ;
+ ( revdom $a_site ) cmp ( revdom $b_site )
+ or
+ $a_type cmp $b_type
+ ;
+ }
+
+sub by_CCS { ( $CCS { $a } || $a ) cmp ( $CCS { $b } || $b ) ; }
+
+sub legend ;
+
+sub gen_page
+ { my $PPP = shift ;
+ my $TMP = "$PPP.tmp" ;
+ my %tab ;
+ my $refs ;
+ for my $url ( keys %LST )
+ { my ( $type , $site, $reg ) = @{ $LST { $url } } ;
+ push @{ $tab { $reg } }, [ $type, $url, $site ] ;
+ }
+
+ my $bad = 0 ; my $old = 0 ; my $unr = 0 ;
+ my %stats ;
+ my @stats ;
+ my $ok = 0 ;
+
+ for my $url ( keys %RES )
+ { my ( $time, $stat, $vrfy ) = @{ $RES { $url } } ;
+ if ( $stat eq 'ok' ) { $ok ++ ; } else { $stats { $stat } ++ ; }
+ if ( $time eq 'undef' )
+ { $bad ++ ; }
+ elsif ( 'f' eq age_code $time )
+ { $old ++ ; }
+ if ( $vrfy eq 'undef' or aprx_lt $vrfy, $^T - max_vrfy )
+ { $unr ++ ; }
+ }
+
+ my $STAT = sprintf
+ "%d bad -- %d older than %s -- %s unreachable for more than %s"
+ , $bad
+ , $old
+ , pr_interval ( max_age2 )
+ , $unr
+ , pr_interval ( max_vrfy )
+ ;
+
+ my $PROB = 'last probes : ' ;
+ push @stats, "$ok were ok" if $ok ;
+ for my $stat ( sort keys %stats )
+ { push @stats, sprintf "%s had %s", $stats { $stat }, RED $stat ; }
+ $PROB .= join ', ', @stats ;
+
+ for my $reg ( sort keys %tab )
+ { $refs .= sprintf " %s \n"
+ , url "#$reg"
+ , "<FONT SIZE=\"+1\">$reg</FONT>"
+ ;
+ }
+
+ my $COLS = 5 ;
+ my $LOGO = $CNF { project_logo }
+ ? url
+ ( $CNF { project_url }
+ , sprintf
+ ( '<IMG SRC="%s" ALT="%s" ALIGN="RIGHT" BORDER=0>'
+ , $CNF { project_logo }
+ , $CNF { project_name }
+ )
+ )
+ : ''
+ ;
+ my $HTOP = $CNF{htm_top} ? $CNF{htm_top} . "\n" : '' ;
+ my $FOOT = $CNF{htm_foot} ? $CNF{htm_foot} . "\n" : '' ;
+ my $TITL = url $CNF{project_url}, $CNF{project_name} ;
+ my $EXPD = exp_date ;
+
+ open PPP, ">$TMP" or Error "can't write $TMP ($!)" ;
+ print PPP '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01'
+ . ' Transitional//EN"'
+# . ' "http://www.w3.org/TR/html4/loose.dtd"'
+ . '>' ;
+ print PPP "<HTML>\n" ;
+ print PPP "<HEAD>\n" ;
+ print PPP "<TITLE>the status of $CNF{project_name} mirrors</TITLE>\n" ;
+ printf PPP "%s\n", '<meta HTTP-EQUIV="content-type" '
+ . 'CONTENT="text/html; charset=ISO-8859-1">' ;
+ print PPP "<META HTTP-EQUIV=\"refresh\" CONTENT=\"3600\">\n" ;
+ print PPP "<META HTTP-EQUIV=\"Expires\" CONTENT=\"$EXPD\">\n" ;
+ print PPP "</HEAD>\n" ;
+ print PPP "<BODY BGCOLOR=\"#FFFFFF\">\n" ;
+ print PPP $LOGO ;
+
+ print PPP "<H2>the status of $TITL mirrors</H2>\n" ;
+
+ print PPP "<TABLE BORDER=0 CELLPADDING=2>\n" ;
+ printf PPP "<TR><TD>date</TD><TD>:</TD><TD>%s (GMT)</TD></TR>\n"
+ , scalar gmtime $^T ;
+ printf PPP "<TR><TD>last check</TD><TD>:</TD><TD>%s (GMT)</TD></TR>\n"
+ , scalar gmtime ( $opt{get} ? $^T : ( stat $CNF { state } ) [9] ) ;
+ print PPP "</TABLE>\n" ;
+
+ printf PPP "<P>%s</P>\n", $HTOP if $HTOP ;
+
+ if ( $CNF { put_histo } eq 'top' )
+ { print PPP "<H2>age histogram</H2>\n" ;
+
+ print PPP "<BLOCKQUOTE>\n" ;
+ print PPP gen_histogram ;
+ print PPP "</BLOCKQUOTE>\n" ;
+ }
+
+ print PPP "<H2>regions</H2>\n" ;
+
+ print PPP "<BLOCKQUOTE>\n" ;
+ print PPP "<CENTER>\n" ;
+ printf PPP "%s\n", $refs ;
+ print PPP "</CENTER>\n" ;
+ print PPP "</BLOCKQUOTE>\n" ;
+
+ print PPP "<H2>report</H2>\n" ;
+
+ my $attr1 = "COLSPAN=$COLS BGCOLOR=\"LIME\"" ;
+ my $attr2 = 'BGCOLOR="AQUA"' ;
+
+ print PPP "<BLOCKQUOTE>\n" ;
+ print PPP "<TABLE BORDER=2 CELLPADDING=5>\n" ;
+ printf PPP "<TR><TH $attr1>%d sites in %d regions</TH></TR>\n"
+ , scalar keys %LST
+ , scalar keys %tab
+ ;
+ printf PPP "<TR><TH $attr1>%s</TH></TR>\n", $STAT ;
+ printf PPP "<TR><TH $attr1>%s</TH></TR>\n", $PROB ;
+ print PPP "<TR>\n" ;
+ printf PPP " <TH $attr2>%s site -- home</TH>\n"
+ , $CNF { project_name } ;
+ printf PPP " <TH $attr2>%s</TH>\n", 'type' ;
+ printf PPP " <TH $attr2>%s</TH>\n", 'mirror age,<BR>daily stats' ;
+ printf PPP " <TH $attr2>%s</TH>\n", 'last probe,<BR>probe stats' ;
+ printf PPP " <TH $attr2>%s</TH>\n", 'last stat' ;
+ print PPP "</TR>\n" ;
+ for my $reg ( sort by_CCS keys %tab )
+ { my $itms = $tab { $reg } ;
+
+ my $ccs = exists $CCS { $reg } ? $CCS { $reg } : $reg ;
+ $ccs = nam $reg,
+ ( scalar @{ $itms } > 6
+ ? sprintf "%s - %d sites"
+ , $ccs, scalar @{ $itms }
+ : $ccs
+ ) ;
+
+ my $attr3 = "COLSPAN=$COLS BGCOLOR=\"YELLOW\"" ;
+ printf PPP "<TR><TH $attr3>$ccs</TH></TR>\n" ;
+
+ for my $itm ( sort by_type_site @{ $itms } )
+ { my ( $type, $url, $site ) = @{ $itm } ;
+ my ( $time, $stat, $hstp, $hsts, $vrfy ) ;
+ my ( $pr_time, $pr_last, $pr_hstp, $pr_hsts ) ;
+
+ print PPP "<TR>\n" ;
+ printf PPP
+ " <TD ALIGN=\"RIGHT\">%s %s</TD>\n"
+ . " <TD>%s</TD>\n"
+ , url ( $url , $site )
+ , url ( home ( $url ), '@' )
+ , $type
+ ;
+
+ if ( exists $RES { $url } )
+ { ( $time, $stat, $vrfy, $hstp, $hsts ) = @{ $RES { $url } } ;
+ $pr_time = $time =~ /^\d+$/
+ ? diff $time, $^T - max_age2 : ' ' ;
+ $pr_last = $vrfy =~ /^\d+$/
+ ? diff $vrfy, $^T - max_vrfy : ' ' ;
+ $pr_hstp = show_hist $hstp ;
+ $pr_hsts = show_hist_age $hsts, $time ;
+
+ }
+ else
+ { ( $pr_time, $pr_last, $pr_hstp, $pr_hsts, $stat ) =
+ ( ' ', ' ', '', '', ' ' ) ;
+ }
+
+ $stat = RED $stat if $stat ne 'ok' ;
+ printf PPP " <TD ALIGN=\"RIGHT\">%s<BR>%s</TD>\n"
+ , $pr_time, $pr_hsts ;
+ printf PPP " <TD ALIGN=\"RIGHT\">%s<BR>%s</TD>\n"
+ , $pr_last, $pr_hstp ;
+ printf PPP " <TD>%s</TD>\n", $stat ;
+ print PPP "</TR>\n" ;
+ }
+ }
+ print PPP "</TABLE>\n" ;
+ print PPP "</BLOCKQUOTE>\n" ;
+
+ if ( $CNF { put_histo } eq 'bottom' )
+ { print PPP "<H2>age histogram</H2>\n" ;
+
+ print PPP "<BLOCKQUOTE>\n" ;
+ print PPP gen_histogram ;
+ print PPP "</BLOCKQUOTE>\n" ;
+ }
+
+ print PPP legend ;
+
+ print PPP "<H3>probe results</H3>\n" ;
+ print PPP gen_histogram_probes ;
+
+ print PPP "<H3>software</H3>\n" ;
+
+ print PPP "<BLOCKQUOTE><TABLE><TR>\n" ;
+ my $MIR_IMG = sprintf
+ '<IMG BORDER=2 ALT="mirmon" SRC="%s/mirmon.gif">' , $CNF { icons } ;
+ print PPP sprintf "<TH><A HREF=\"%s\">%s</A></TH>\n"
+ , 'http://www.cs.uu.nl/people/henkp/mirmon/', $MIR_IMG ;
+ print PPP "<TD>$VER</TD>\n" ;
+ print PPP "</TR></TABLE></BLOCKQUOTE>\n" ;
+ print PPP $FOOT ;
+ print PPP "</BODY>\n" ;
+ print PPP "</HTML>" ;
+
+ if ( print PPP "\n" )
+ { close PPP ;
+ if ( -z $TMP )
+ { Warn "wrote empty html file; keeping previous version" ; }
+ else
+ { rename $TMP, $PPP or Error "can't rename $TMP, $PPP ($!)" ; }
+ }
+ else
+ { Error "can't print to $TMP ($!)" ; }
+ }
+
+sub legend
+ { return <<LEGENDA ;
+<H3>legend</H3>
+
+<H4><I>project</I> site -- home</H4>
+
+<BLOCKQUOTE>
+<B><I>project</I> site</B> is an url.
+The <B>href</B> is the href for the site in the list of mirrors,
+usually the root of the mirrored file tree.
+The <B>text</B> is the <I>site</I> of that url.
+<P>
+<B>home</B> (represented by the <B>@</B>-symbol) is an url
+pointing to the document root of the site. This pointer is
+useful if the <B><I>project</I> site</B> url is invalid,
+possibly because the mirror site moved the archive.
+</BLOCKQUOTE>
+
+<H4>type</H4>
+
+<BLOCKQUOTE>
+Indicates the type (<B>ftp</B> or <B>http</B>) of
+the <B><I>project</I> site</B> and <B>home</B> urls.
+</BLOCKQUOTE>
+
+<H4>mirror age, daily stats</H4>
+
+<BLOCKQUOTE>
+The <B>mirror age</B> is based upon the last successful probe.
+<P>
+Once a day the status of a mirror site is determined.
+The status (represented by a colored block) is appended
+to the <B>right</B> of the status history (<I>right</I>
+is <I>recent</I>). More precise, the status block is appended
+if the last status block was appended 24 (or more) hours ago.
+<P>The status of a mirror depends on its age and a few
+configuration parameters :
+<BLOCKQUOTE>
+<TABLE BORDER=1 CELLPADDING=5>
+<TR>
+ <TH ROWSPAN=3>status</TH>
+ <TH COLSPAN=4>age</TH>
+</TR>
+<TR>
+ <TH COLSPAN=2 BGCOLOR="YELLOW">this project</TH>
+ <TH COLSPAN=2 BGCOLOR="AQUA">in general</TH>
+</TR>
+<TR>
+ <TH BGCOLOR="YELLOW">min</TH>
+ <TH BGCOLOR="YELLOW">max</TH>
+ <TH BGCOLOR="AQUA">min</TH>
+ <TH BGCOLOR="AQUA">max</TH>
+</TR>
+<TR>
+ <TH><FONT COLOR="GREEN">fresh</FONT></TH>
+
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">0</TD>
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">
+ @{[$CNF{min_sync}]} + @{[$CNF{max_poll}]}</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">0</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">min_sync + max_poll</TD>
+</TR>
+<TR>
+ <TH><FONT COLOR="BLUE">oldish</FONT></TH>
+
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">
+ @{[$CNF{min_sync}]} + @{[$CNF{max_poll}]}</TD>
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">
+ @{[$CNF{max_sync}]} + @{[$CNF{max_poll}]}</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">min_sync + max_poll</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">max_sync + max_poll</TD>
+</TR>
+<TR>
+ <TH><FONT COLOR="RED">old</FONT></TH>
+
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">
+ @{[$CNF{max_sync}]} + @{[$CNF{max_poll}]}</TD>
+ <TD BGCOLOR="YELLOW" ALIGN="CENTER">∞</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">max_sync + max_poll</TD>
+ <TD BGCOLOR="AQUA" ALIGN="CENTER">∞</TD>
+</TR>
+<TR>
+ <TH><FONT COLOR="BLACK">bad</FONT></TH>
+ <TH COLSPAN=4 BGCOLOR="BLACK">
+ <FONT COLOR="WHITE">the site or mirror tree was never found</FONT></TH>
+</TR>
+</TABLE>
+</BLOCKQUOTE>
+</BLOCKQUOTE>
+
+<H4>last probe, probe stats</H4>
+
+<BLOCKQUOTE>
+<B>Last probe</B> indicates when the last successful probe was made.
+<B>Probe stats</B> gives the probe history (<I>right</I> is <I>recent</I>).
+A probe is either a
+<FONT COLOR="GREEN"><B>success</B></FONT> or a
+<FONT COLOR="RED"><B>failure</B></FONT>.
+</BLOCKQUOTE>
+
+<H4>last stat</H4>
+
+<BLOCKQUOTE>
+<B>Last stat</B> gives the status of the last probe.
+</BLOCKQUOTE>
+
+LEGENDA
+ }
+
+sub start_date
+ { my $url = shift ;
+ my $CMD = shift ;
+ my $TIMEOUT = $CNF { timeout } ;
+ my $src = $HREF { lc site $url } || $url ;
+ $CMD =~ s/%TIMEOUT%/$TIMEOUT/g ;
+ $CMD =~ s/%URL%/$src/g ;
+ printf "*** SUBSTITUTE site %s\n+ url %s\n+ %s\n",
+ site($url), $HREF { lc site $url }, $CMD
+ if $HREF { lc site $url } and $opt{v} ;
+ my $WGT = new IO::Pipe ;
+ my $res = $WGT -> reader ( split ' ', $CMD ) ;
+ if ( $res )
+ { $WGT -> blocking ( 0 ) ;
+ $GET -> add ( $WGT ) ;
+ $URL { $WGT } = $url ;
+ }
+ else
+ { err $url, 'no pipe' ; }
+ }
+
+sub get_date
+ { my $WGT = shift ;
+ my $url = $URL { $WGT } ;
+ my $time = undef ;
+
+ $WGT -> blocking ( 1 ) ;
+ unless ( $WGT -> eof () ) { $time = $WGT -> getline () ; }
+ $GET -> remove ( $WGT ) ;
+ $WGT -> flush ;
+ $WGT -> close ;
+
+ unless ( defined $time ) { return err $url, 'no time' ; }
+
+ $time = ( split ' ', $time ) [ 0 ] ;
+
+ if ( $time eq '' )
+ { err $url, "empty" ; }
+ elsif ( $time !~ /^\d+$/ )
+ { $time = htmlquote $time ;
+ $time = substr ( $time, 0, 15 ) . '..' if length $time > 15 ;
+ err $url, "'$time'" ;
+ }
+ else
+ { res $url, $time, 'ok' ; }
+ }
+
+sub get_dates
+ { my $CMD = shift ;
+ my @QUE ;
+ my $PAR = $CNF { max_probes } ;
+ my $cnt_LST = scalar keys %LST ;
+ for my $url ( sort keys %LST )
+ { if ( $opt{get} eq 'all' or ! exists $OLD { $url } )
+ { push @QUE, $url ; }
+ elsif ( $opt{get} eq 'update' )
+ { my $stat = $OLD { $url } [ 1 ] ;
+ my $vrfy = $OLD { $url } [ 2 ] ;
+ my $lprb = $OLD { $url } [ 5 ] ;
+ if ( ( $lprb eq 'undef'
+ or aprx_le $lprb, $^T - tim_to_s $CNF { min_poll }
+ )
+ and ( $stat ne 'ok'
+ or aprx_le $vrfy, $^T - tim_to_s $CNF { max_poll }
+ )
+ )
+ { push @QUE, $url ; }
+ elsif ( $CNF { randomize } and 0 == int rand $cnt_LST )
+ { push @QUE, $url ; }
+ else
+ { $RES { $url } = $OLD { $url } ; }
+ }
+ else
+ { Error "unknown opt_get '$opt{get}'" ; }
+ }
+
+ while ( @QUE )
+ { while ( $GET -> count () < $PAR and @QUE )
+ { my $url = shift @QUE ;
+ if ( gethost site $url )
+ { start_date $url, $CMD ; }
+ else
+ { err $url, 'site not found' ; }
+ }
+
+ my @can_read = $GET -> can_read ( 0 ) ;
+
+ printf "que %d, get %d, can %d\n",
+ scalar @QUE, $GET -> count (), scalar @can_read
+ if $opt{v} ;
+
+ for my $can_read ( @can_read )
+ { get_date $can_read ; }
+
+ sleep 1 ;
+ }
+
+ my $stop = time + $CNF { timeout } + 10 ;
+
+ while ( $GET -> count () and time < $stop )
+ { sleep 1 ;
+
+ my @can_read = $GET -> can_read ( 0 ) ;
+
+ printf "wait %2d, get %d, can %d\n",
+ $stop - scalar time, $GET -> count (), scalar @can_read
+ if $opt{v} ;
+
+ for my $can_read ( @can_read )
+ { get_date $can_read ; }
+ }
+
+ for my $WGT ( $GET -> handles () )
+ { my $url = $URL { $WGT } ;
+ err $url, 'hangs' ;
+ }
+ }
+
+get_conf_opt ;
+get_ccs $CNF { countries } ;
+get_state $CNF { state } ;
+get_list $CNF { mirror_list } ;
+
+if ( $opt{get} )
+ { get_dates $CNF { probe } ;
+ put_state $CNF { state } ;
+ }
+else
+ { %RES = %OLD }
+
+gen_page $CNF { web_page } ;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ mirmon - monitor the state of mirrors
+
+=head1 SYNOPSIS
+
+ mirmon [ -v ] [ -q ] [ -t timeout ] [ -get opt ] [ -c conf ]
+
+=head1 OPTIONS
+
+ option v : be verbose
+ option q : be quiet
+ option t : set timeout [ default 300 ] ;
+ option get : 'all' : probe all sites
+ : 'update' : probe a selection of the sites (see doc)
+ option c : configuration file ; default list :
+ ./mirmon.conf $HOME/.mirmon.conf /etc/mirmon.conf
+ -------------------------------------------------------------------
+ Documentation : the program contains 'pod' style documentation.
+ Extract the doc with 'pod2text mirmon' or 'pod2html mirmon OUT', etc.
+ -------------------------------------------------------------------
+
+=head1 USAGE
+
+ The program is intended to be run by cron every hour.
+
+ 42 * * * * perl /path/to/mirmon -q -get update
+
+ It quietly probes a subset of the sites in a given list,
+ writes the results in the 'state' file and generates a web page
+ with the results. The subset contains the sites that are new, bad
+ and/or not probed for a specified time.
+
+ When no 'get' option is specified, the program just generates a
+ new web page from the last known state.
+
+ The program checks the mirrors by running a (user specified)
+ program on a pipe. A (user specified) number of probes is
+ run in parallel using nonblocking IO. When something can be
+ read from the pipe, it switches the pipe to blocking IO and
+ reads one line from the pipe. Then it flushes and closes the
+ pipe. No attempt is made to kill the probe.
+
+ The probe should return something that looks like "1043625600\n",
+ that is, a timestamp followed by a newline. The exit status of
+ the probe is ignored.
+
+=head1 CONFIG FILE
+
+=head2 location
+
+ A config file can be specified with the -c option.
+ If -c is not used, the program looks for a config file in
+ -- ./mirmon.conf
+ -- $HOME/.mirmon.conf
+ -- /etc/mirmon.conf
+
+=head2 syntax
+
+ A config file looks like this :
+
+ +--------------------------------------------------
+ |# lines that start with '#' are comment
+ |# blank lines are ignored too
+ |# tabs are replaced by a space
+ |
+ |# the config entries are 'key' and 'value' pairs
+ |# a 'key' begins in column 1
+ |# the 'value' is the rest of the line
+ |somekey A_val B_val ...
+ |otherkey X_val Y_val ...
+ |
+ |# indented lines are glued
+ |# the next three lines mean 'somekey part1 part2 part3'
+ |somekey part1
+ | part2
+ | part3
+ |
+ |# lines starting with a '+' are concatenated
+ |# the next three lines mean 'somekey part1part2part3'
+ |somekey part1
+ |+ part2
+ |+ part3
+ |
+ |# lines starting with a '.' are glued too
+ |# don't use a '.' on a line by itself
+ |# 'somekey' gets the value "part1\n part2\n part3"
+ |somekey part1
+ |. part2
+ |. part3
+ +--------------------------------------------------
+
+=head1 CONFIG FILE : required entries
+
+=head2 project_name <name>
+
+ Specify a short plaintext name for the project.
+
+ project_name Apache
+ project_name CTAN
+
+=head2 project_url <url>
+
+ Specify an url pointing to the 'home' of the project.
+
+ project_url http://www.apache.org/
+
+=head2 mirror_list <file name>
+
+ Specify the file containing the mirrors to probe.
+ Two formats are supported :
+
+ -- plain : lines like
+
+ us http://www.tux.org/
+ nl http://apache.cs.uu.nl/dist/
+
+ -- apache : lines like those in the apache mirrors.list
+
+ ftp us ftp://ftp.tux.org/pub/net/apache/dist/ user@tux.org
+ http nl http://apache.cs.uu.nl/dist/ user@cs.uu.nl
+
+ Specify the required format with 'list_style' (see below).
+ The default style is 'plain'.
+
+ If the url part of a line doesn't end in a slash ('/'), mirmon
+ adds a slash and issues a warning unless it is in quiet mode.
+
+=head2 web_page <file name>
+
+ Specify where the html report page is written.
+
+=head2 icons <directory name>
+
+ Specify the directory where the icons can be found.
+
+=head2 probe <program + arguments>
+
+ Specify the program+args to probe the mirrors. Example:
+
+ probe /sw/bin/wget -q -O - -T %TIMEOUT% -t 1 %URL%TIME
+
+ Before the program is started, %TIMEOUT% and %URL% are
+ substituted with the proper timeout and url values.
+
+ Here it is assumed that each hour the root server writes
+ a timestamp in /path/to/archive/TIME, for instance with
+ a crontab entry like
+
+ 42 * * * * perl -e 'printf "%s\n", time' > /path/to/archive/TIME
+
+ Mirmon reads one line of output from the probe and interprets
+ the first word on that line as a timestamp ; for example :
+
+ 1043625600
+ 1043625600 Mon Jan 27 00:00:00 2003
+ 1043625600 www.apache.org Mon Jan 27 00:00:00 2003
+
+=head2 state <file name>
+
+ Specify where the file containing the state is written.
+ The program reads this file on startup and writes the
+ file when mirrors are probed (-get is specified).
+
+=head2 countries <file name>
+
+ Specify the file containing the country codes;
+ The file should contain lines like
+
+ us - united states
+ nl - netherlands
+
+ The mirmon package contains a recent ISO list.
+
+=head1 CONFIG FILE : optional entries
+
+=head2 max_probes <number>
+
+ Optionally specify the number of parallel probes (default 25).
+
+=head2 timeout <seconds>
+
+ Optionally specify the timeout for the probes (default 300).
+ After the last probe is started, the program waits for
+ <timeout> + 10 seconds, cleans up and exits.
+
+=head2 project_logo <logo>
+
+ Optionally specify (the SRC of the IMG of) a logo to be placed
+ top right on the page.
+
+ project_logo /icons/apache.gif
+ project_logo http://www.apache.org/icons/...
+
+=head2 htm_foot <html>
+
+ Optionally specify HTML to be placed near the bottom of the page.
+
+ htm_foot
+ <HR>
+ <A HREF="..."><IMG SRC="..." BORDER=0></A>
+ <HR>
+
+=head2 htm_top <html>
+
+ Optionally specify some HTML to be placed near the top of the page.
+ The supplied text is placed between <P> and </P>.
+
+ htm_top testing 1, 2, 3
+
+=head2 put_histo top|bottom|nowhere
+
+ Optionally specify where the age histogram must be placed.
+ The default is 'top'.
+
+=head2 min_poll <time spec>
+
+ For 'min_poll' see next item. A <time spec> is a number followed by
+ a unit 's' (seconds), or 'm' (minutes), or 'h' (hours), or 'd' (days).
+ For example '3d' (three days) or '36h' (36 hours).
+
+=head2 max_poll <time spec>
+
+ Optionally specify the maximum probe interval. When the program is
+ called with option '-get update', all sites are probed which are :
+ -- new : the site appears in the list, but there is no known state
+ -- bad : the last probe of the site was unsuccessful
+ -- old : the last probe was more than 'max_poll' ago.
+ Sites are not probed if the last probe was less than 'min_poll' ago.
+
+ So, if you specify
+
+ min_poll 4h
+ max_poll 12h
+
+ the 'reachable' sites are probed twice daily and the 'unreachable'
+ sites are probed at most six times a day.
+
+ The default 'min_poll' is '1h' (1 hour).
+ The default 'max_poll' is '4h' (4 hours).
+
+=head2 min_sync <time spec>
+
+ Optionally specify how often the mirrors are required to
+ make an update. The default 'min_sync' is '1d' (1 day).
+
+=head2 max_sync <time spec>
+
+ Optionally specify the maximum allowable sync interval.
+ Sites exceeding the limit will be considered 'old'.
+ The default 'max_sync' is '2d' (2 days).
+
+=head2 no_randomize
+
+ With a low probablility, mirmon probes mirrors that would
+ otherwise not be probed. In the long run, this balances
+ the number of mirror probes over the hourly mirmon runs.
+ Specifically, if there are N mirrors in the list and some
+ mirmon run would probe K sites, on average (N-K)/N extra
+ sites will be probed.
+
+ If you don't want this behaviour, use 'no_randomize'.
+
+=head2 list_style plain|apache
+
+ Optionally specify the format ('plain' or 'apache') of the
+ mirror-list. See the description of 'mirror_list' above.
+ The default list_style is 'plain'.
+
+=head2 site_url <site> <url>
+
+ Optionally specify a substitute url for a site. When access to
+ a site is restricted (in Australia, for instance), another
+ (sometimes secret) url can be used to probe the site. The <site>
+ of an url is the part between '://' and the first '/'.
+
+=head2 env <key> <value>
+
+ Optionally specify an environment variable.
+
+=head2 include <file name>
+
+ Optionally specify a file to include. The specified file is processed
+ 'in situ'. After the specified file is read and processed, config
+ processing is resumed in the file where the 'include' was encountered.
+ The 'include' depth is unlimited. However, it is a fatal error to
+ include a file twice under the same name.
+
+=head2 show
+
+ When the config processor encounters the 'show' command, it
+ dumps the content of the current config to standout, if option
+ -v is specified. This is intented for debugging.
+
+=head2 exit
+
+ When the config processor encounters the 'exit' command, it
+ terminates the program. This is intented for debugging.
+
+=head1 STATE FILE FORMAT
+
+ The state file consists of lines; one line per site.
+ Each line consists of white space separated fields.
+ The seven fields are :
+
+=head2 field 1 : url
+
+ The url as given in the mirror list.
+
+=head2 field 2 : age
+
+ The age of the site, or 'undef' if no probe was ever successful.
+
+=head2 field 3 : status last probe
+
+ The status of the last probe.
+
+=head2 field 4 : time last succesful probe
+
+ The timestamp of the last succesful probe or 'undef'
+ if the site was never successfully probed.
+
+=head2 field 5 : probe history
+
+ The probe history is a list of 's' (for success) and 'f' (for failure)
+ characters indicating the result of the probe. New results are appended
+ whenever the site is probed.
+
+=head2 field 6 : state history
+
+ The state history consists of a timestamp, a '-' char, and a list of
+ chars indicating a past status: 's' (fresh), 'b' (oldish), 'f' (old)
+ or 'z' (bad). The timestamp indicates when the state history was last
+ updated. The state history is updated when the state file is updated
+ and the last update of the history state was 24 (or more) hours ago.
+ The status is determined by the site's age and a few configuration
+ parameters. The details are explained in the legend of the report page.
+
+=head2 field 7 : last probe
+
+ The timestamp of the last probe.
+
+=head1 INSTALLATION
+
+=over
+
+=item *
+
+ The '#!' path for perl is probably wrong.
+
+=back
+
+=head1 AUTHOR
+
+=begin html
+
+<BLOCKQUOTE>
+ © 2003
+ <A HREF="http://www.cs.uu.nl/staff/henkp.html">Henk P. Penning</A>,
+ <A HREF="http://www.cs.uu.nl/">Computer Science Department</A>,
+ <A HREF="http://www.uu.nl/">Utrecht University</A>
+ <BR>
+ $Id: mirmon,v 1.36 2004/12/28 17:54:10 henkp Exp $
+</BLOCKQUOTE>
+
+=end html
+
+=begin text
+
+ (c) 2003 Henk P. Penning, Computer Science Department, Utrecht University
+ http://www.cs.uu.nl/staff/henkp.html -- penning@cs.uu.nl
+
+=end text
+
+=cut
+