#!/usr/bin/perl -w
use strict;
# Copyright (c) 2002/2003 Eike Frost
# This is Free Software under the terms of the General Public License (GPL).

# This program parses a BitTorrent tracker logfile and extracts various
# data from it. It's neither perfect nor pretty.

# Oh, and it's sufficiently slow, too.

# $Id: trackerlyze.pl,v 1.11 2003/02/23 10:15:31 eike Exp $

use Time::Local; #FOLD00
use POSIX qw(strftime);
use Date::Parse;
use Storable;

# use Data::Dumper; # for Debugging purposes

# Some configuration settings #FOLD01
my $reannounce_interval_max = 30*60; # This is the reannounce interval set; controls backdrift
my $checkpoint_interval = 5*60;      # How often the graph is updated
my $timeout_downloaders_interval = 45*60; # after this much time, we stop considering peers
my $drop_downloaders_interval = 800*60; # after this much time, we drop peers
my $drop_overdue_stats = 1;          # If stats don't fit into writestats_time_back, drop previous data
my $writestats_time_back = 30*60;    # only generate stats for data this old
my $reannounce_interval_avg_samples = 5000; # do the stats on the last X sampls
my $do_reannounce_stats = 1;         # do reannounce stats. SLOW.
my $reannounce_age = 1;              # 0 = consider ALL reannounces, 1 = only past $checkpoint_interval
my $display_file_stats = 0;          # display statistics while running
my $verbose = 1;                     # Be verbose on errors & current status
my $always_do_filestats = 0;         # do filestats even if the data isn't current. SLOW !

my $statefilename = 'scary.state';

#my $io = new trackalyze::IO::file;  # Object to fetch/save stats to files
#my $io = new trackalyze::IO::sql;   # use this for SQL, also take care 
                                     # of the settings in that object         
#my $io = new trackalyze::IO::multiplex  # Of course you can also use more
#        (new trackalyze::IO::file,      # than one module if your heart so
#         new trackalyze::IO::sql);      # desires.
my $graph = new trackalyze::Graph::rrdtool # Object to graph "stuff"

my (%peers, %files);

my ($inittime,                     # date/time of first read line
    $upped,                        # graphed data (usually < transferred data)
    $lasttime,                     # date of last read line
    $totalmaxconnected,            # maximum of connected users of all time
    $terminate,                    # terminate the loop ?
    $proclines,                    # Processed lines in this run
    $parts,                        # counter of intervals processed
    $finishedpeers,                # peers we have served
    $totalconnected)               # currently connected peers
    = (0,0,0,0,0,0,0,0,0);

# Timeslots to consider
my $nslots = int($reannounce_interval_max / $checkpoint_interval);
my @slots = ();

# for reannounce interval stats
my %reannounce;
$reannounce{number} = 0;
$reannounce{current} = 0;

# Slots start out empty
for (my $i = $nslots-1; $i >=0 ; $i--) {
    $slots[$i]{timestamp} = 0;
    $slots[$i]{upped} = 0;
}

loadruntime ();

# Should we catch a sigint, stop after the current line
$SIG{INT} = sub { $terminate++; };

my %regex;
my %r; # Result from regexes with code for named params

$regex{line} = qr/
               ((?:\d{1,3}\.){3}\d{1,3})\s # IP                  $1
               [^ ]+\s [^ ]+\s             # Ident, Username
               \[([^\]]+)\]\s              # Timestamp           $2
               \"([^"\s]+)\s               # Method              $3
                 ([^"\s]+)\s               # Request             $4
                 ([^"]+)"\s                # Parameters          $5
               (\d)+\s                     # Status Code         $6
               (\d)+                       # Size returned       $7
              /x;

my $begin = time;


while ((! $terminate) and (my $line = <STDIN>)) { #FOLD00
    # Process a whole line
    ($line =~ m/$regex{line}/o) or next; #FOLD01

    my ($ip, $timestamp, $getmethod, $getrequest, $getparam,
        $statuscode, $size) = ($1, str2time($2), $3, $4, $5, $6, $7);
    
    if ($timestamp < $lasttime) {
    	next;
    }

    # we only process announces in the further lines. If any other other
    # lines are to be processed, process them here.
    $getrequest =~ m/announce\?(.*)$/o or next;
    my @r = map { split '=' } split '&', $1;
    if (scalar @r % 2 == 0) { %r = @r; } else { next; }

    # test whether needed parameters have been given
    (defined $r{peer_id}) and (defined $r{uploaded}) and 
    (defined $r{downloaded}) and (defined $r{left}) and
    (defined $r{left}) and (defined $r{info_hash}) or next;

    # Convert hashes into readable form
    $r{info_hash} =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; 
    $r{info_hash} = unpack ("H*", $r{info_hash});
    $r{peer_id} =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; 
    $r{peer_id} = unpack ("H*", $r{peer_id});
    
    # Sometimes, the client submits its own IP instead of the originating host
    if (defined $r{$ip}) {
      $ip = $r{$ip};
    }    
    
    # If we don't really know this peer, create some empty hashes #FOLD01
    (! exists $peers{$r{peer_id}}) and createpeerentry ($r{peer_id});
    (! exists $files{$r{info_hash}}) and createfileentry ($r{info_hash});

    # Set some data about the peer from the current line
    $peers{$r{peer_id}}{ip} = $ip;
    $peers{$r{peer_id}}{hash} = $r{info_hash};
    $peers{$r{peer_id}}{left} = $r{left};        

    # Have we seen this peer before ?
    if (! defined $peers{$r{peer_id}}{firstseen}) {
  	    $peers{$r{peer_id}}{firstseen} = $timestamp;
    }

    # Did this announce contain an event ? If so, handle it accordingly #FOLD01
    if (defined $r{event}) {
        if ($r{event} eq 'started') {
            if (! defined $files{$r{info_hash}}{started}) {
                $files{$r{info_hash}}{started} = 0;
            }
            $files{$r{info_hash}}{started}++; 
            $peers{$r{peer_id}}{started} = $timestamp;
        } elsif ($r{event} eq 'stopped') {
            $peers{$r{peer_id}}{stopped} = $timestamp;
        } elsif ($r{event} eq 'completed') {
            if (! defined $files{$r{info_hash}}{completed}) { 
                $files{$r{info_hash}}{completed} = 0;
            }
            $files{$r{info_hash}}{completed}++; 
            $peers{$r{peer_id}}{completedat} = $timestamp;
        }
    }

    # Grab some info about files/torrents from the line and take note of them
    $files{$r{info_hash}}{lastseen} = $timestamp;
    if (!defined $files{$r{info_hash}}{firstseen}) {
  	    $files{$r{info_hash}}{firstseen} = $timestamp;
    }
       
    # So, how do the transfers look ? #FOLD01
    if (defined $peers{$r{peer_id}}{up}) {
        my $diff = $r{uploaded} - $peers{$r{peer_id}}{up};
       
        # some sanity limits
        if (($diff > 0) and ($diff < 2**32*$nslots)) {
            # The traffic difference is over this long a timespan ...
            my $timediff = $timestamp - $peers{$r{peer_id}}{lastseen};

            # ... so we distribute it over these many timeslots ...
            my $takeslots = int ($timediff / $checkpoint_interval);
            ($takeslots < 1) and $takeslots = 1;
	    ($takeslots > $nslots) and (! $drop_overdue_stats) and $takeslots = $nslots;

            # ... which gives us this much per timeslot
	    my $taketraffic = int ($diff / $takeslots);
            # sanity-check, again        
            if (! ($taketraffic > 2**32)) { 
                # increment each slot
                for (my $i = $nslots-1 ; ($i > $nslots-1-$takeslots) and ($i >= 0); $i--) {
                    $slots[$i]{upped} += $taketraffic;
                }
                # Note the difference for the file
                $files{$r{info_hash}}{up} += $diff;
            } else {
                # Speed is way higher than anticipated; either fake or errors
                if (($diff > 0) and ($verbose)) {
                    print "HUGE traffic : diff : $diff, peer : $r{peer_id}, " .
                           "ip : $ip, time : $timestamp; " .
                           "Logline : \n $line \n";
                }
            }
        }
    } else {
        # This is a new peer -- or too much data.
        if (($r{uploaded} < 2**32) and ($r{uploaded} > 0)) {
            $slots[$nslots-1]{upped} += $r{uploaded};
            $files{$r{info_hash}}{up} += $r{uploaded};
        } else {
            if (($r{uploaded} > 0) and ($verbose)) {
                print "HUGE traffic : diff : $r{uploaded}, " .
                       "peer : $r{peer_id}, ip : $ip, time : " .
                       "$timestamp; Logline : \n $line \n";
            }
        }
    }
    
    # If we have seen this peer before, it's a reannounce #FOLD01
    if (exists $peers{$r{peer_id}}{lastseen}) {
	my $announce_interval = $timestamp - $peers{$r{peer_id}}{lastseen};

        # These are done over the trackers life or over the past
        # writeout-interval
        # Create new average
	if ($do_reannounce_stats) {
          $reannounce{current} = ($reannounce{current} * $reannounce{number} 
                                  + $announce_interval);
          $reannounce{current} /= ++$reannounce{number}; #/
        }

        # If the client has been seen < 1 second prior to this, avoid
        # divisions by zero
	if ($announce_interval == 0) { $announce_interval++; }

        # Current upload speed of this peer
	$peers{$r{peer_id}}{speedup} = (($r{uploaded} - $peers{$r{peer_id}}{up}) / $announce_interval);
        # Current download speed of this peer
	$peers{$r{peer_id}}{speeddown} = (($r{downloaded} - $peers{$r{peer_id}}{down}) / $announce_interval);
    } else {
        # We don't know anything about speeds, yet.
	$peers{$r{peer_id}}{speedup} = 0;
	$peers{$r{peer_id}}{speeddown} = 0;
    }
    
    # Some more data we might be interested in, later.
    $peers{$r{peer_id}}{lastseen} = $timestamp;        
    $peers{$r{peer_id}}{up} = $r{uploaded};
    $peers{$r{peer_id}}{down} = $r{downloaded};    
    
    # The graphing magic. Ouch. #FOLD01

    # if we are just starting, set up some stuff
    if ($inittime == 0) {
        $inittime = $timestamp; 

        # slots for before the big bang
        for (my $i = 0; $i < $nslots; $i++) {
            $slots[$i]{timestamp} = $timestamp - (($nslots-$i)*$checkpoint_interval);
        }

        $graph->setup ($slots[0]{timestamp} - $checkpoint_interval);
    } else { # else, we may have stuff to graph
        # but only if a checkpint_interval has passed ...
        if ($inittime + $checkpoint_interval-1 < $timestamp) {
            # ... and as often as is needed to get to the current interval
            while ($inittime + $checkpoint_interval-1 < $timestamp) {
	            $inittime += $checkpoint_interval;
                
                # move slots back one
                my %curslot;
                $curslot{timestamp} = $slots[0]{timestamp};
                $curslot{upped} = $slots[0]{upped};
                for (my $i = 0; $i < $nslots-1; $i++) {
                    $slots[$i]{timestamp} = $slots[$i+1]{timestamp}; 
                    $slots[$i]{upped} = $slots[$i+1]{upped}; 
                }
                $slots[$nslots-1]{timestamp} = $inittime;
                $slots[$nslots-1]{upped} = 0;

                # increase $upped with value from moved-out slot
                $upped += $curslot{upped};
                
                # update the difference in the database
                $graph->update ($curslot{timestamp}, $upped, $totalconnected);

                # If there's need for graphing, graph.
                if (time - $timestamp < $writestats_time_back) {
                    $graph->graph ($timestamp - $reannounce_interval_max); 
                }
                # Give some state information if so instructed
                if (((time - $timestamp < $writestats_time_back) or 
                    ($parts % 72 == 0)) and $verbose) {
                    print strftime ("%a %b %e %H:%M:%S %Y", 
                          localtime ($curslot{timestamp})), "\n";
                    print "Total transferred so far : ", readable($upped), 
                          "\n";
                }
                # Just in case, save state information.
    	        if ($parts++ % 36 == 0) {
                    saveruntime ();
                }
            }

            # Clean up after dead & dropped peers
            peerclean ();

            # If used, calculate filespeeds & write file  stats
            if ((time - $timestamp < $writestats_time_back) or 
                ($always_do_filestats)) {
               filespeeds ();
               filestats ();
            }

            # set back reannounce-stats
            if ($reannounce_age) {
               $reannounce{number} = 0;
               $reannounce{current} = 0;
            }
        }
    }
    
    $proclines++; #FOLD01

    # Let's give the user some feedback how far along we are in parsing
    if ($proclines % 50000 == 0) { print "processed $proclines lines ... \n"; }

    # This line is done for, save the time
    $lasttime = $timestamp;
}

# The loop ended; now let's see what final stats we have for this run, #FOLD01
# and clean up after ourselves

print "Total uploaded and graphed   : " . readable($upped) . " ($upped)\n";
print "Final reannounce avg         : " . $reannounce{current} . "\n";
print "Connected clients            : " . $totalconnected . "\n";
print "Clients in recent memory     : " . scalar(keys %peers) . "\n";
print "Log lines in this run        : " . $proclines . "\n";
print "Time taken                   : " . (time () - $begin) . "\n";
print "Lines/sec                    : " . ($proclines / (time - $begin + 1)) . "\n";

filestats ();
saveruntime ();

exit 0;

# Taking care of the hashes #FOLD00

# This writes out statistics for files. Clunky, slow.
sub filestats { #FOLD01

    # Gathering of filenames/sizes
    if ($display_file_stats) {
        foreach my $file (keys %files) {
            if ($files{$file}{filename} eq '') { 
                $files{$file}{filename} = $io->getfilename($file);
            }
            if ($files{$file}{filesize} == 0) { 
                $files{$file}{filesize} = $io->getfilesize($file);
            }
        }
    }

    my @recs;
    # Fetch / deduce statistics for each file 
    foreach my $file (keys %files) {   	    
        my %record;

        $record{hash} = $file;

        # If we know a filename, use that. If not, use the hash.    
        if ($files{$file}{filename} ne '') {
            $record{name} = $files{$file}{filename};
        } else {	
    	    $record{name} = $file;
	}
	    
        # Prepare some information from the hashes
        $record{up} = $files{$file}{up};
  	$record{upfmt} = sprintf("%10s", readable($files{$file}{up}));

        $record{dl} = $files{$file}{completed};
        $record{dlfmt} = sprintf ("%6s", $files{$file}{completed});

	$record{firstseen} = $files{$file}{firstseen};
        $record{firstseenfmt} = strftime ("%m%d %H:%M", localtime ($files{$file}{firstseen}));

	$record{lastseen} = $files{$file}{lastseen};	
	$record{lastseenfmt} = strftime ("%m%d %H:%M", localtime ($files{$file}{lastseen}));

	$record{span} = $files{$file}{lastseen} - $files{$file}{firstseen};
	$record{spanfmt} = readablespan ($files{$file}{lastseen} - $files{$file}{firstseen});

        $record{peercompleteavgfmt} = readablespan ($files{$file}{peercomplduraverage});
        $record{peercompletemaxfmt} = readablespan ($files{$file}{peercompldurmax});
        $record{peercompleteminfmt} = readablespan ($files{$file}{peercompldurmin});

        $record{peerconnavgfmt} = readablespan ($files{$file}{peerconnduraverage});
        $record{peerconnmaxfmt} = readablespan ($files{$file}{peerconndurmax});

        $record{peercompleteavg} = $files{$file}{peercomplduraverage};
        $record{peercompletemax} = $files{$file}{peercompldurmax};
        $record{peercompletemin} = $files{$file}{peercompldurmin};

        $record{peerconnavg} = $files{$file}{peerconnduraverage};
        $record{peerconnmax} = $files{$file}{peerconndurmax};

        $record{sourcesfmt} = pad(3, $files{$file}{sources}) . "/" . 
                              pad(3, $files{$file}{connected});

        $record{sources} = $files{$file}{sources};
        $record{connected} = $files{$file}{connected};
        $record{leechers} = $files{$file}{connected} - $files{$file}{sources};
	
        # only do peer statistics if it's actually requested by IO
        if ($io->dopeerstats($file)) {    
            $record{peerstats} = "";
     	    for (my $i = 0; $i < 2; $i++) {
  	        foreach my $peer (@{$files{$file}{peers}}) {
		    if ((defined $peers{$peer}{killed}) and ($i != 1)) {
		        next;
     		    }
	            if ((! defined $peers{$peer}{killed}) and ($i == 1)) {
		        next;
		    }  
		
     	            $record{peerstats} .= "IP : " . sprintf ("%15s", $peers{$peer}{ip});
	            (defined $peers{$peer}{killed}) and $record{peerstats}.='D';
	            $record{peerstats} .= " :: ";
	            $record{peerstats} .= "speedup : " .    sprintf ("%9s", readable ($peers{$peer}{speedup}))  . "/s :: " ;
	            $record{peerstats} .= "speeddown : " .  sprintf ("%9s", readable ($peers{$peer}{speeddown})) . "/s :: ";
    	            $record{peerstats} .= "left : " .       sprintf ("%8s", readable ($peers{$peer}{left})) . 
		                          " : (" . sprintf ("%5s", sprintf ("%.1f", $peers{$peer}{left}/($peers{$peer}{down} + $peers{$peer}{left} + 1)*100)) . "%) :: "; 
  	            $record{peerstats} .= "uploaded : ".    sprintf ("%8s", readable ($peers{$peer}{up})) . " :: ";
	            $record{peerstats} .= "downloaded : " . sprintf ("%8s", readable ($peers{$peer}{down})) . " :: ";
	            $record{peerstats} .= "firstseen : ".   $peers{$peer}{firstseen} . " :: ";
	            $record{peerstats} .= "lastseen : " .   $peers{$peer}{lastseen} . " :: ";
		    $record{peerstats} .= "duration : " .   readablespan ($peers{$peer}{lastseen} - $peers{$peer}{firstseen}) . " :: ";
		    if (defined $peers{$peer}{completedat}) {
		        my $duration = $peers{$peer}{completedat} - $peers{$peer}{firstseen};
  		        $record{peerstats} .= "completed : " . readable ($peers{$peer}{down}) .
		                              " in " . readablespan ($duration) . " @ " . 
					      sprintf ("%9s", readable ($peers{$peer}{down} / ($duration+1))) .
  					      "/s :: ";
    		    }
	        $record{peerstats} .= "\n";
                }
            }
	}  
        # Use the max speed as speed indicator (figures should be close,
        # anyway.
	$record{peerspeed} = readablemetric ($files{$file}{speedup} > 
                               $files{$file}{speeddown} ? 
                               $files{$file}{speedup} : 
                               $files{$file}{speeddown}) . "/s";
	$record{speed} = $files{$file}{speedup} > 
                               $files{$file}{speeddown} ? 
                               $files{$file}{speedup} : 
                               $files{$file}{speeddown};

        push @recs, \%record;
    }

    # if we want to display stats on screen, sort them first, then give columns
    if ($display_file_stats) { 
        @recs = sort {${$a}{up} <=> ${$b}{up}} @recs; 
        print "Hash/Filename                                 ".
              "upped    dls  firstseen  timespan src/con\n";
    }
    
    foreach my $re (@recs) {
        my %r = %{$re};
        # If stats are to be displayed, shrink name, print out
        if ($display_file_stats) {
	    my $fname;
	    if (length ($r{name}) > 40) {
		$fname = substr ($r{name},0,20) . '...' . 
                         substr ($r{name}, -17);
	    } else {
	       $fname = $r{name};
	    }

            print $fname . " " . $r{upfmt} . " ".  $r{dlfmt} . " " . 
                  $r{firstseenfmt} . " " . $r{spanfmt} . " " . 
                  $r{sourcesfmt} . "\n";
        }

        # either way, hand the data over to IO	      
        if ((time - $r{lastseen} < $writestats_time_back) or 
            $always_do_filestats) {
            $io->save (\%r);
        }
    }

    # Some global stats that might be of interest
    $io->savemisc (totalconnected => $totalconnected,
         totalmaxconnected        => $totalmaxconnected,
         reannounce_interval_avg  => $reannounce{current});
}

# calculates current up/down speeds
sub filespeeds { #FOLD01
    foreach my $file (keys %files) {
         $files{$file}{speedup} = 0;
         $files{$file}{speeddown} = 0;
    }
    foreach my $peer (keys %peers) {
         $files{$peers{$peer}{hash}}{speedup} += $peers{$peer}{speedup};
         $files{$peers{$peer}{hash}}{speeddown} += $peers{$peer}{speeddown};
    }
}

# calculates the current connections/file
sub connectionsperfile { #FOLD01
    foreach my $file (keys %files) {
       $files{$file}{connected} = 0;
       $files{$file}{sources} = 0;
       $files{$file}{peers} = ();
    }

    foreach my $peer (keys %peers) {
        if (! defined $peers{$peer}{killed}) {
            $files{$peers{$peer}{hash}}{connected}++;
	    if ((defined $peers{$peer}{left}) and ($peers{$peer}{left} == 0)) {
                $files{$peers{$peer}{hash}}{sources}++;
	    }
	}

        push @{$files{$peers{$peer}{hash}}{peers}}, $peer;
    }
}

# cleans up the peers hash (kill dead peers, tally up stats)
sub peerclean { #FOLD01
    $totalconnected = 0;

    foreach my $peer (keys %peers) {
        if (($peers{$peer}{lastseen} + $timeout_downloaders_interval > $lasttime) and 
	    (defined $peers{$peer}{killed})) {
	    # client came back to life after timeout
	    delete $peers{$peer}{killed};
	    $peers{$peer}{nostat} = 1;
	    $finishedpeers--;
	} elsif (($peers{$peer}{lastseen} + $drop_downloaders_interval < $lasttime) and
	         (defined $peers{$peer}{killed})) {
            # drop time has been reached
	    delete $peers{$peer};
	} elsif ((! defined $peers{$peer}{killed}) and 
	         ($peers{$peer}{lastseen} + $timeout_downloaders_interval < $lasttime) or 
                 (defined $peers{$peer}{stopped})) {
	    # client exceeded timeout
            $finishedpeers++;
            # Has this peer been looked at before and come back from the dead ?
	    if (! defined $peers{$peer}{nostat}) {
                # How long have we tracked this peer
                my $peerlifetime = $peers{$peer}{lastseen} - 
                                   $peers{$peer}{firstseen};
                # What's the current average for the current torrent ?
                my $avg = $files{$peers{$peer}{hash}}{peerconnduraverage};
                # How many peers have finished this torrent before ?
                my $fpeers = $files{$peers{$peer}{hash}}{finishedpeers}++;

                # Calculate new average
                $files{$peers{$peer}{hash}}{peerconnduraverage} = 
                  int (($fpeers * $avg + $peerlifetime) / ($fpeers + 1));
                # Is this the new king for longest lifetime ?
                ($files{$peers{$peer}{hash}}{peerconndurmax} < $peerlifetime) 
                   and $files{$peers{$peer}{hash}}{peerconndurmax} = $peerlifetime;

                # if we have seen both start and complet events, we can do some more stuff
                if (defined $peers{$peer}{completedat} and defined $peers{$peer}{started}) {
                    # if we know the filesize of this torrent, we can deduce
                    # whether more than 90% were transferred. If so, proceed,
                    # otherwise just drop it
		    if (($files{$peers{$peer}{hash}}{filesize} == 0) or 
		        (($files{$peers{$peer}{hash}}{filesize} != 0) and 
			 (abs ($peers{$peer}{down} - $files{$peers{$peer}{hash}}{filesize}) <
			   $files{$peers{$peer}{hash}}{filesize}*0.1))) {

                        # Calculate new average for duration for completion
                        $files{$peers{$peer}{hash}}{peercomplduraverage} =
                            int (($files{$peers{$peer}{hash}}{completedpeers} 
                                * $files{$peers{$peer}{hash}}{peercomplduraverage} 
                                + $peerlifetime) / (++$files{$peers{$peer}{hash}}{completedpeers}));

                        # new winner for maximum ?
                        ($files{$peers{$peer}{hash}}{peercompldurmax} < $peerlifetime) and
                            $files{$peers{$peer}{hash}}{peercompldurmax} = $peerlifetime;

                        # and what about the minimum ?
                        (($files{$peers{$peer}{hash}}{peercompldurmin} > $peerlifetime) or
                         ($files{$peers{$peer}{hash}}{peercompldurmin} == 0)) and
                            $files{$peers{$peer}{hash}}{peercompldurmin} = $peerlifetime;
		    }			    
                }
	    }	
            # Do not consider this peer for anything anymore; keep it around
            # in case it comes back

	    $peers{$peer}{killed} = 1;

	    if (defined $peers{$peer}{stopped}) {
	        # if a "stopped" event was received, no need to keep the
                # peer around
		delete $peers{$peer};
	    }	

        } else {
            $totalconnected++;
        }
    }

    if ($totalconnected > $totalmaxconnected) {
        $totalmaxconnected = $totalconnected;
    }
    
    connectionsperfile ();        
}

# initialize fields for a new/unknown torrent/file
sub createfileentry { #FOLD01
    my $info_hash = shift;
    $files{$info_hash} = {};
    $files{$info_hash}{peerconnduraverage} = 0;  # peer connection duration average
    $files{$info_hash}{peerconndurmax} = 0;      # peer connection duration maximum
    $files{$info_hash}{finishedpeers} = 0;       # peers that have stopped coming back
    $files{$info_hash}{completedpeers} = 0;      # peers that have downloaded the whole file
    $files{$info_hash}{peercomplduraverage} = 0; # peer completion duration average
    $files{$info_hash}{peercompldurmax} = 0;     # peer completion duration maximum 
    $files{$info_hash}{peercompldurmin} = 0;     # peer completion duration minimum (>90%)
    $files{$info_hash}{filename} = '';           # filename associated with this torrent
    $files{$info_hash}{filesize} = 0;            # filesize associated with this torrent
    $files{$info_hash}{completed} = 0;           # number of completed downloads
    $files{$info_hash}{up} = 0;                  # number of transferred bytes
    $files{$info_hash}{speedup} = 0;             # current upload-speed
    $files{$info_hash}{speeddown} = 0;           # current download-speed
}

# initialize fields for a new/unknown peer
sub createpeerentry { #FOLD01
    my $peer_id = shift;
    $peers{$peer_id} = {};
}

# Auxiliary Routines #fold00

# Saves runtime data for picking up later
sub saveruntime { #fold01
    my %save = (
             'peers' => \%peers,
	     'files' => \%files,
	     'lasttime' => $lasttime,
	     'upped' => $upped,
	     'inittime' => $inittime,
             'slots' => \@slots,
             'totalmaxconnected' => $totalmaxconnected,
             'totalconnected' => $totalconnected,
             'finishedpeers' => $finishedpeers,
             'reannounce' => \%reannounce);

    store \%save, $statefilename;
}

# Loads runtime data from previous runs
sub loadruntime { #fold01
    if (-e $statefilename) {
        my $retrieve = retrieve('scary.state');
        my %retrieve = %$retrieve;
        %peers = %{$retrieve{peers}};
        %files = %{$retrieve{files}};
        %reannounce = %{$retrieve{reannounce}};
        $lasttime = $retrieve{lasttime};
        $inittime = $retrieve{inittime};
        $totalmaxconnected = $retrieve{totalmaxconnected};
        $totalconnected = $retrieve{totalconnected};
        $upped = $retrieve{upped};
        $finishedpeers = $retrieve{finishedpeers};
        @slots = @{$retrieve{slots}};
    }
}

# spits out some more readable numbers for byte figures
sub readable  { #fold01
    my $d = shift;    
    ($d > 1024**5) and return (sprintf ("%.2f", $d/(1024**5))) . "PiB";
    ($d > 1024**4) and return (sprintf ("%.2f", $d/(1024**4))) . "TiB";
    ($d > 1024**3) and return (sprintf ("%.2f", $d/(1024**3))) . "GiB";
    ($d > 1024**2) and return (sprintf ("%.2f", $d/(1024**2))) . "MiB";
    ($d > 1024)    and return (sprintf ("%.2f", $d/(1024)))    . "KiB";        
    return sprintf ("%.2f", $d) . "b";
}

# Spits out some more readable numbers for metric bit figures
sub readablemetric  { #fold01
    my $d = shift;    
    ($d > 1000**5) and return (sprintf ("%.2f", $d/(1000**5))) . "Pb";
    ($d > 1000**4) and return (sprintf ("%.2f", $d/(1000**4))) . "Tb";
    ($d > 1000**3) and return (sprintf ("%.2f", $d/(1000**3))) . "Gb";
    ($d > 1000**2) and return (sprintf ("%.2f", $d/(1000**2))) . "Mb";
    ($d > 1000)    and return (sprintf ("%.2f", $d/(1000)))    . "Kb";        
    return sprintf ("%.2f", $d) . "b";
}

# 0-pad numbers to x columns
sub pad { #fold01
    my $cols = shift;
    my $num = my $temp = shift;            
    while ($temp >= 10) {
        $temp /= 10;
        $cols--;
    }    
    return $cols > 0 ? ('0' x --$cols).$num : $num;
}

# a more readable timespan
sub readablespan { #fold01
    my $span = shift;
    my $days = int ($span / 86400);
    my $hours = int (($span % 86400) / 3600);
    my $minutes = int ((($span % 86400) % 3600) / 60);
    return pad(2, $days) . 'd' . pad(2, $hours) . 'h' . 
           pad(2, $minutes) . 'm';
}

# Handle IO (with files)
package trackalyze::IO::file; #FOLD00

no strict 'refs';

sub new { #FOLD01
    my $proto = shift;
    my $class = ref($proto) || $proto;
    my $self = {statusdir => '/home/eike/torrent/status',
                miscsuffix => ''
               };

    bless ($self, $class);
    return $self;
}

# returns filename for torrent-hash
sub getfilename ($) { #FOLD01
    my $self = shift;
    my $file = shift;
    if (-e "${$self}{statusdir}/$file.filename") {
        return `cat ${$self}{statusdir}/$file.filename`;
    }
    return '';
}

# returns filesize for torrent-hash
sub getfilesize ($) { #FOLD01
    my $self = shift;
    my $file = shift;
    if (-e "${$self}{statusdir}/$file.filename") {
        return `cat ${$self}{statusdir}/$file.filesize`;
    }    
    return 0;
}

# saves data about one torrent
sub save ($) { #FOLD01
    my $self = shift;
    my $rrecord = shift;
    my %r = %{$rrecord};
    my $statusdir = ${$self}{statusdir};

    open TR, ">$statusdir/$r{hash}.transferred";
    print TR $r{upfmt};
    close TR;

    open TR, ">$statusdir/$r{hash}.completed";
    print TR $r{dl};
    close TR;

    open TR, ">$statusdir/$r{hash}.duration";
    print TR $r{spanfmt};
    close TR;

    open TR, ">$statusdir/$r{hash}.peercompleteavg";
    print TR $r{peercompleteavgfmt};
    close TR;

    open TR, ">$statusdir/$r{hash}.peercompletemax";
    print TR $r{peercompletemaxfmt};
    close TR;

    open TR, ">$statusdir/$r{hash}.peercompletemin";
    print TR $r{peercompleteminfmt};
    close TR;
    
    open TR, ">$statusdir/$r{hash}.peerconnectionaverage";
    print TR $r{peerconnavgfmt};
    close TR;
    
    open TR, ">$statusdir/$r{hash}.peerconnectiomax";
    print TR $r{peerconnmaxfmt};
    close TR;

    open TR, ">$statusdir/$r{hash}.connected";
    print TR $r{connected};
    close TR;

    open TR, ">$statusdir/$r{hash}.sources";
    print TR $r{sources};
    close TR;
    
    open TR, ">$statusdir/$r{hash}.peerstats";
    print TR $r{peerstats};
    close TR;

    open TR, ">$statusdir/$r{hash}.speed";
    print TR $r{peerspeed};
    close TR;
}

# saves miscellaneous data
sub savemisc ($) { #FOLD01
    my $self = shift;
    my %r = @_;
    my $statusdir = ${$self}{statusdir};

    open TR, ">$statusdir/total.connected${$self}{miscsuffix}";
    print TR $r{totalconnected};
    close TR;        
    
    open TR, ">$statusdir/total.maxconnected${$self}{miscsuffix}";
    print TR $r{totalmaxconnected};
    close TR;        

    open TR, ">$statusdir/announce_interval_avg${$self}{miscsuffix}";
    print TR $r{reannounce_interval_avg};
    close TR;
}

# should peerstats be generated for file X ?
sub dopeerstats ($) { #FOLD01
    return 1;
}

package trackalyze::IO::sql; #FOLD00
# Provisions for saving all this data to SQL tables

# This isn't optimized very much. It gets the job done, though.

# If you want peerstats to be generated for a specific hash,
# insert a line into the peerstats table with the hash specified.
# there is also a table for metadata, which isn't touched by this
# code; you can store metadata there that may be used here, though.

no strict 'refs';

use DBI;
use DBD::mysql;

sub new { #FOLD01
    my $proto = shift;
    my $class = ref($proto) || $proto;
    my $self = { 
                 dbname => 'DATABASE',
                 dbuser => 'USERNAME',
                 dbpass => 'PASSWORD',
                 statstable => 'torrents',
                 misctable => 'misc',
                 peerstatstable => 'peerstats',
                 metadatatable => 'metadata',
                
               };
    my %self = %{$self};
    ${$self}{sqlhandle} = DBI->connect ("dbi:mysql:" . 
                            $self{dbname},
                            $self{dbuser}, 
                            $self{dbpass}) 
                            || die 
                            ('Could not establish connection to database');
    %self = %{$self};
    my $st_handle = $self{sqlhandle}->prepare 
                    ('show tables from ' . $self{dbname});
    my $result = $st_handle->execute ();
    my $arrayref = $st_handle->fetchall_arrayref;

    my @rows = @$arrayref;
    $result = 0;
    foreach my $key (@rows) {
        if (@$key[0] eq $self{statstable}) { $result++; }
    }
    
    if ($result eq 0) {
        my $st_handle = $self{sqlhandle}->prepare 
           ('create table ' . $self{statstable} . 
            '(info_hash CHAR(40) not null unique,
              time INT unsigned,
              transferred BIGINT unsigned,
              completed INT unsigned,
              duration INT unsigned,
              peercompletemin INT unsigned,
              peercompleteaverage INT unsigned,
              peercompletemax INT unsigned,
              peerconnectionaverage INT unsigned,
              peerconnectionmax INT unsigned,
              connected INT unsigned,
              sources INT unsigned,
              speed FLOAT unsigned,
              primary key (info_hash)
             )');
        my $result = $st_handle->execute ();
        $st_handle = $self{sqlhandle}->prepare 
           ('create table ' . $self{misctable} . 
            '(name VARCHAR(255) not null unique,              
              value VARCHAR(255),
              primary key (name)
             )');
        $result = $st_handle->execute ();
        $st_handle = $self{sqlhandle}->prepare 
           ('create table ' . $self{peerstatstable} . 
            '(info_hash CHAR(40) not null unique,
              peerstats mediumblob,
              primary key (info_hash)
             )');
        $result = $st_handle->execute ();
        $st_handle = $self{sqlhandle}->prepare 
           ('create table ' . $self{metadatatable} . 
            '(info_hash CHAR(40) not null unique,
              filename VARCHAR(255),
              filesize BIGINT unsigned,
              tracker VARCHAR(255),
              piecesize INT unsigned,
              primary key (info_hash)
             )');
        $result = $st_handle->execute ();

    }
 
    bless ($self, $class);
    return $self;
}

# returns filename for torrent-hash
sub getfilename ($) { #FOLD01
    my $self = shift;
    my $info_hash = shift;

    my $st_handle = ${$self}{sqlhandle}->prepare 
                   ('select filename from ' . ${$self}{metadatatable} . 
                    ' where info_hash="' . $info_hash . '"');
    my $result = $st_handle->execute () or die;
    my $arrayref = $st_handle->fetchall_arrayref;

    my @rows = @$arrayref;
    return scalar @rows != 0 ? ${$rows[0]}[0] : ''
}

# returns filesize for torrent-hash
sub getfilesize ($) { #FOLD01
    my $self = shift;
    my $info_hash = shift;

    my $st_handle = ${$self}{sqlhandle}->prepare 
                   ('select filesize from ' . ${$self}{metadatatable} . 
                    ' where info_hash="' . $info_hash . '"');
    my $result = $st_handle->execute ()  or die;
    my $arrayref = $st_handle->fetchall_arrayref;

    my @rows = @$arrayref;
    return scalar @rows != 0 ? ${$rows[0]}[0] : ''
}

# saves data about one torrent
sub save ($) { #FOLD01
    my $self = shift;
    my $rrecord = shift;
    my %r = %{$rrecord};

    my $st_handle = ${$self}{sqlhandle}->prepare 
       ('insert ignore into ' . ${$self}{statstable} . 
        '(info_hash,time,transferred,completed,duration,peercompletemin,'.
        'peercompleteaverage,peercompletemax,peerconnectionaverage,'.
        'peerconnectionmax,connected,sources,speed) 
         values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
   
    my $result = $st_handle->execute ($r{hash}, $r{lastseen}, $r{up},
                              $r{dl}, $r{span}, $r{peercompletemin},
                              $r{peercompleteavg}, $r{peercompletemax},
                              $r{peerconnavg}, $r{peerconnmax}, 
                              $r{connected}, $r{sources}, $r{speed}) 
                 or die $!;

    if ($result eq '0E0') {
        $st_handle = ${$self}{sqlhandle}->prepare 
           ('update ' . ${$self}{statstable} . 
            ' set time=?, transferred=?, completed=?, duration=?,' .
            ' peercompletemin=?, peercompleteaverage=?, peercompletemax=?,' .
            ' peerconnectionaverage=?, peerconnectionmax=?, connected=?,' .
            ' sources=?, speed=? ' . 
            ' where info_hash=?;');

        $result = $st_handle->execute ($r{lastseen}, $r{up},
                              $r{dl}, $r{span}, $r{peercompletemin},
                              $r{peercompleteavg}, $r{peercompletemax},
                              $r{peerconnavg}, $r{peerconnmax}, 
                              $r{connected}, $r{sources}, $r{speed}, $r{hash})
                  or die $!;
    }

    if ($self->dopeerstats ($r{hash})) {
        $st_handle = ${$self}{sqlhandle}->prepare 
           ('update ' . ${$self}{peerstatstable} . 
            ' set peerstats=? where info_hash=?;');
        $result = $st_handle->execute ($r{peerstats}, $r{hash})
                  or die $!;
    }
}

# saves miscellaneous data
sub savemisc ($) { #FOLD01
    my $self = shift;
    my %r = @_;
    my $statusdir = ${$self}{statusdir};

    my $st_handle = ${$self}{sqlhandle}->prepare 
       ('insert ignore into ' . ${$self}{misctable} . 
        '(name, value) values (?, ?)');
   
    my $result = $st_handle->execute ('totalconnected', $r{totalconnected})
                 or die $!;
    $result = $st_handle->execute ('totalmaxconnected', $r{totalmaxconnected})
                 or die $!;
    $result = $st_handle->execute ('reannounce_interval_avg', $r{reannounce_interval_avg})
                 or die $!;

    if ($result eq '0E0') {
        $st_handle = ${$self}{sqlhandle}->prepare 
           ('update ' . ${$self}{misctable} . 
            ' set value=? where name=?;');

        $result = $st_handle->execute ('totalconnected', $r{totalconnected})
                 or die $!;
        $result = $st_handle->execute ('totalmaxconnected', $r{totalmaxconnected})
                 or die $!;
        $result = $st_handle->execute ('reannounce_interval_avg', $r{reannounce_interval_avg})
                 or die $!;
    }
}

# should peerstats be generated for file X ?
sub dopeerstats ($) { #FOLD01
    my $self = shift;
    my $info_hash = shift;

    my $st_handle = ${$self}{sqlhandle}->prepare 
                   ('select peerstats from ' . ${$self}{peerstatstable} . 
                    ' where info_hash="' . $info_hash . '"');
    my $result = $st_handle->execute ();
    my $arrayref = $st_handle->fetchall_arrayref;

    my @rows = @$arrayref;
    return (scalar @rows != 0);
}

sub destroy {
    my $self = shift;
    ${$self}{sqlhandle}->disconnect () || 
                         die ('Could not disconnect from database');
}

package trackalyze::IO::multiplex; #fold00
# multiplexes two or more IO:: modules.

no strict 'refs';

sub new {
    my $proto = shift;
    my $class = ref($proto) || $proto;
    my $self = { 
                 objects => \@_
               }; 

    bless ($self, $class);
    return $self;
}

# returns filename for torrent-hash; use first object.
sub getfilename ($) {
    my $self = shift;
    return ${${$self}{objects}}[0]->getfilename (@_);
}

# returns filesize for torrent-hash
sub getfilesize ($) {
    my $self = shift;
    return ${${$self}{objects}}[0]->getfilesize (@_);
}

# saves data about one torrent
sub save ($) {
    my $self = shift;
    foreach my $object (@{${$self}{objects}}) {
        $object->save (@_);
    }
}

# saves miscellaneous data
sub savemisc ($) {
    my $self = shift;
    foreach my $object (@{${$self}{objects}}) {
        $object->savemisc (@_);
    }
}

# should peerstats be generated for file X ?
sub dopeerstats ($) {
    my $self = shift;
    return ${${$self}{objects}}[0]->dopeerstats (@_);
}

sub destroy {
    my $self = shift;
    foreach my $object (@{${$self}{objects}}) {
        $object->destroy (@_);
    }
}

package trackalyze::Graph::rrdtool; #FOLD00
# Provides Graphing via external rrdtool

no strict 'refs';

sub new { #fold01
    my $proto = shift;
    my $class = ref($proto) || $proto;
    my $self = {rrd => 'torrent.rrd',                  # Database to use
                outputdir => '~/public_html/torrent',  # Output directory
                useexternal => './graph.sh'};          # Use external script
                                                       # (useful for multi-
                                                       # graphs)

    bless ($self, $class);
    return $self;
}

sub setup { #fold01
    my $self = shift;
    my $ts = shift;

    if (! -e ${$self}{rrd}) {
        my $a  = `rrdtool create ${$self}{rrd} --start $ts \\
                    DS:torrent:COUNTER:600:0:U \\
                    DS:users:GAUGE:600:0:U \\
                    --step 300 \\
                    RRA:AVERAGE:0.5:1:2000 \\
                    RRA:AVERAGE:0.5:6:2000 \\
                    RRA:AVERAGE:0.5:24:2000 \\
                    RRA:AVERAGE:0.5:288:2000 \\
                    RRA:MAX:0.5:1:2000 \\
                    RRA:MAX:0.5:6:2000 \\
                    RRA:MAX:0.5:24:2000 \\
                    RRA:MAX:0.5:288:2000`;
    }
}

sub update { #fold01
    my $self = shift;
    my $ts = shift;
    my $upped = shift;
    my $users = shift;
 
    my $a = `rrdtool update ${$self}{rrd} $ts:$upped:$users`;
}

sub graph { #FOLD01
    my $self = shift;
    my $endtime = shift;

    unless (${$self}{useexternal}) {
 
        my $daystart = $endtime - 86400;
        my $weekstart = $endtime - 604800;
        my $monthstart = $endtime - 2592000;
        my $yearstart = $endtime - 31536000;

        my $title = 'BitTorrent Tracker $tracker network traffic';
        my $date = `date +'%b %d %H:%M:%S %Y'`;
        chomp $date;
 
        my $a = `rrdtool graph ${$self}{outputdir}/torrent_day.gif \\
                     --start $daystart -e $endtime \\
                     DEF:torrent_in_bytes=${$self}{rrd}:torrent:AVERAGE \\
                     "CDEF:torrent_in_bits=torrent_in_bytes,8,*" \\
                     "CDEF:torrent_bytes_in=torrent_in_bytes,0,1250000000,LIMIT,UN,0,torrent_in_bytes,IF,86400,*" \\
                     AREA:torrent_in_bits#00dd00:torrent \\
                     COMMENT:"                                        +--------------------------\\n" \\
                     COMMENT:"            maximum       average       current " \\
                     COMMENT:" | $date\\n" \\
                     COMMENT:"in    " \\
                     GPRINT:torrent_in_bits:MAX:'%7.2lf %sb/s' \\
                     GPRINT:torrent_in_bits:AVERAGE:"%7.2lf %Sb/s" \\
                     GPRINT:torrent_in_bits:LAST:"%7.2lf %Sb/s" \\
                     COMMENT:" |\\n" \\
                     GPRINT:torrent_bytes_in:AVERAGE:"ROUGHLY %7.2lf %sb total" \\
                     COMMENT:"                         |" \\
                     -v "bits/sec" \\
                     -t "$title (day) 5 min avg" -h 100 -w 392 \\
                     -x "HOUR:1:HOUR:6:HOUR:2:0:%H" -l 0`;

        $a = `rrdtool graph ${$self}{outputdir}/torrent_week.gif \\
                     --start $weekstart -e $endtime \\
                     DEF:torrent_in_bytes=${$self}{rrd}:torrent:AVERAGE \\
                     "CDEF:torrent_in_bits=torrent_in_bytes,8,*" \\
                     "CDEF:torrent_bytes_in=torrent_in_bytes,0,1250000000,LIMIT,UN,0,torrent_in_bytes,IF,604800,*" \\
                     AREA:torrent_in_bits#00dd00:torrent \\
                     COMMENT:"                                        +--------------------------\\n" \\
                     COMMENT:"            maximum       average       current " \\
                     COMMENT:" | $date\\n" \\
                     COMMENT:"in    " \\
                     GPRINT:torrent_in_bits:MAX:'%7.2lf %sb/s' \\
                     GPRINT:torrent_in_bits:AVERAGE:"%7.2lf %Sb/s" \\
                     GPRINT:torrent_in_bits:LAST:"%7.2lf %Sb/s" \\
                     COMMENT:" |\\n" \\
                     GPRINT:torrent_bytes_in:AVERAGE:"ROUGHLY %7.2lf %sb total" \\
                     COMMENT:"                         |" \\
                     -v "bits/sec" \\
                     -t "$title (week) 30 min avg" -h 100 -w 392 \\
                     -x "HOUR:6:DAY:1:DAY:1:0:%a" -l 0`;

        $a = `rrdtool graph ${$self}{outputdir}/torrent_month.gif \\
                     --start $monthstart -e $endtime \\
                     DEF:torrent_in_bytes=${$self}{rrd}:torrent:AVERAGE \\
                     "CDEF:torrent_in_bits=torrent_in_bytes,8,*" \\
                     "CDEF:torrent_bytes_in=torrent_in_bytes,0,1250000000,LIMIT,UN,0,torrent_in_bytes,IF,2592000,*" \\
                     AREA:torrent_in_bits#00dd00:torrent \\
                     COMMENT:"                                        +--------------------------\\n" \\
                     COMMENT:"            maximum       average       current " \\
                     COMMENT:" | $date\\n" \\
                     COMMENT:"in    " \\
                     GPRINT:torrent_in_bits:MAX:'%7.2lf %sb/s' \\
                     GPRINT:torrent_in_bits:AVERAGE:"%7.2lf %Sb/s" \\
                     GPRINT:torrent_in_bits:LAST:"%7.2lf %Sb/s" \\
                     COMMENT:" |\\n" \\
                     GPRINT:torrent_bytes_in:AVERAGE:"ROUGHLY %7.2lf %sb total" \\
                     COMMENT:"                         |" \\
                     -v "bits/sec" \\
                     -t "$title (month) 2 hour avg" -h 100 -w 392 \\
                     -x "DAY:1:WEEK:1:WEEK:1:0:Week %W" -l 0`;

        $a = `rrdtool graph ${$self}{outputdir}/torrent_year.gif \\
                     --start $yearstart -e $endtime \\
                     DEF:torrent_in_bytes=${$self}{rrd}:torrent:AVERAGE \\
                     "CDEF:torrent_in_bits=torrent_in_bytes,8,*" \\
                     "CDEF:torrent_bytes_in=torrent_in_bytes,0,1250000000,LIMIT,UN,0,torrent_in_bytes,IF,31536000,*" \\
                     AREA:torrent_in_bits#00dd00:torrent \\
                     COMMENT:"                                        +--------------------------\\n" \\
                     COMMENT:"            maximum       average       current " \\
                     COMMENT:" | $date\\n" \\
                     COMMENT:"in    " \\
                     GPRINT:torrent_in_bits:MAX:'%7.2lf %sb/s' \\
                     GPRINT:torrent_in_bits:AVERAGE:"%7.2lf %Sb/s" \\
                     GPRINT:torrent_in_bits:LAST:"%7.2lf %Sb/s" \\
                     COMMENT:" |\\n" \\
                     GPRINT:torrent_bytes_in:AVERAGE:"ROUGHLY %7.2lf %sb total" \\
                     COMMENT:"                         |" \\
                     -v "bits/sec" \\
                     -t "$title (year) 1 day avg" -h 100 -w 392 \\
                     -x "MONTH:1:MONTH:1:MONTH:1:0:%b" -l 0`;
    } else {
       my $a = `${$self}{useexternal}`;
    }
}