#!/usr/bin/perl -w
#
# phpWebLogAnalyzer - powerful weblog and analyzer
#
# utils/sessions_split.pl - Perl script used for spliting
# accesses in database into sessions according to timeout
# ____________________________________________________________
#
# Developed by Ondrej Jombik <nepto@platon.sk>
# Copyright (c) 2001-2005 Platon SDG, http://platon.sk/
# All rights reserved.
#
# See README file for more information about this software.
# See COPYING file for license information.
#
# Download the latest version from
# http://platon.sk/projects/phpWebLogAnalyzer/
#
# $Platon: phpWebLogAnalyzer/utils/sessions-split.pl,v 1.8 2003/01/27 00:57:49 nepto Exp $
use strict;
use DBI;
#
# Configuration.
#
#use sessions_split_config;
use config;
#
# Variables.
#
my @ids = ();
my @tmp_ar = ();
my $i;
my $query;
my $max_session_id;
my $total_records;
my $total_sessions;
my $dbh;
my $sth;
#
# Database connection.
#
$dbh = DBI->connect(
'dbi:' . $config::db{driver}
. ':database=' . $config::db{name}
. ';host=' . $config::db{host}
. ';port=' . $config::db{port},
$config::db{username},
$config::db{password} )
|| die "can't connect to database: $DBI::errstr";
#
# Main group by session split.
#
$query = q{
SELECT remote_addr_id AS ra_id, http_user_agent_id AS ua_id
FROM weblog_accesses
GROUP BY remote_addr_id, http_user_agent_id };
$sth = $dbh->prepare($query)
|| die "can't prepare statement: $DBI::errstr";
$sth->execute
|| die "can't execute statement: $DBI::errstr";
for ( $i = 0 ; $ids[$i] = $sth->fetchrow_hashref() ; $i++ ) { }
die $sth->errstr if $sth->err;
pop (@ids);
#
# Counting max_session_id.
#
if ( 0 && scalar(@ids) > 0 ) {
$query = q{
SELECT MAX(session_id) AS max_session_id
FROM weblog_accesses };
$sth = $dbh->prepare($query)
|| die "can't prepare statement: $DBI::errstr";
$sth->execute
|| die "can't execute statement: $DBI::errstr";
($max_session_id) = $sth->fetchrow_array();
print "Maximal session ID is $max_session_id\n";
}
else {
$max_session_id = 1;
}
#
# Main processing.
#
$total_records = $total_sessions = 0;
foreach (@ids) {
print 'Processing ('
. ( $_->{ra_id} ? $_->{ra_id} : 'NULL' ) . ', '
. ( $_->{ua_id} ? $_->{ua_id} : 'NULL' ) . '): ';
#
# Fetching all click for current remote_addr and http_user_agent.
#
$query = sprintf(
'SELECT id, UNIX_TIMESTAMP(datetime) AS time'
. ' FROM weblog_accesses'
. ' WHERE remote_addr_id %s'
. ' AND http_user_agent_id %s'
. ' ORDER BY time',
$_->{ra_id} ? '= ' . $_->{ra_id} : 'IS NULL',
$_->{ua_id} ? '= ' . $_->{ua_id} : 'IS NULL' );
$sth = $dbh->prepare($query)
|| die "can't prepare statement: $DBI::errstr";
$sth->execute
|| die "can't execute statement: $DBI::errstr";
my @data = ();
my $start_session_id = $max_session_id;
for ( $i = 0 ; $data[$i] = $sth->fetchrow_hashref() ; $i++ ) {
$data[$i]->{time} = 0 if not defined $data[$i]->{time};
}
die $sth->errstr if $sth->err;
pop(@data);
$total_records += scalar(@data);
print scalar(@data) . ' records, ';
#
# Split fetched data into sessions according to $session_timeout.
#
for ( $i = 0 ; $i < scalar(@data) ; $i++ ) {
#print "$i | " .$data[$i]->{time}." | ".$data[ $i - 1 ]->{time}."\n";
if ($i) {
if ( $data[$i]->{time} - $data[ $i - 1 ]->{time}
> $config::session_timeout ) {
$max_session_id++;
}
}
$query = sprintf( 'UPDATE weblog_accesses'
. ' SET session_id = %d WHERE id = %d',
$max_session_id, $data[$i]->{id} );
$sth = $dbh->prepare($query)
|| die "can't prepare statement: $DBI::errstr";
$sth->execute()
|| die "can't execute statement: $DBI::errstr";
}
$max_session_id++ if ( scalar(@data) > 0 );
$total_sessions += $max_session_id - $start_session_id;
print $max_session_id - $start_session_id . " sessions.\n";
}
#
# Closing database connection.
#
$dbh->disconnect();
#
# Total statistics print.
#
print '-' x 60 . "\n";
print "Total records = $total_records\n";
print "Total sessions = $total_sessions\n";
Platon Group <platon@platon.org> http://platon.org/
|