#NAME

DBD::SQLite::Cookbook - The DBD::SQLite Cookbook

#DESCRIPTION

This is the DBD::SQLite cookbook.

It is intended to provide a place to keep a variety of functions and formals for use in callback APIs in DBD::SQLite.

#AGGREGATE FUNCTIONS

#Variance

This is a simple aggregate function which returns a variance. It is adapted from an example implementation in pysqlite.

package variance;

sub new { bless [], shift; }

sub step {
    my ( $self, $value ) = @_;

    push @$self, $value;
}

sub finalize {
    my $self = $_[0];

    my $n = @$self;

    # Variance is NULL unless there is more than one row
    return undef unless $n || $n == 1;

    my $mu = 0;
    foreach my $v ( @$self ) {
        $mu += $v;
    }
    $mu /= $n;

    my $sigma = 0;
    foreach my $v ( @$self ) {
        $sigma += ($v - $mu)**2;
    }
    $sigma = $sigma / ($n - 1);

    return $sigma;
}

# NOTE: If you use an older DBI (< 1.608),
# use $dbh->func(..., "create_aggregate") instead.
$dbh->sqlite_create_aggregate( "variance", 1, 'variance' );

The function can then be used as:

SELECT group_name, variance(score)
FROM results
GROUP BY group_name;

#Variance (Memory Efficient)

A more efficient variance function, optimized for memory usage at the expense of precision:

package variance2;

sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }

sub step {
    my ( $self, $value ) = @_;
    my $hash = $self->{hash};

    # by truncating and hashing, we can comsume many more data points
    $value = int($value); # change depending on need for precision
                          # use sprintf for arbitrary fp precision
    if (exists $hash->{$value}) {
        $hash->{$value}++;
    } else {
        $hash->{$value} = 1;
    }
    $self->{sum} += $value;
    $self->{count}++;
}

sub finalize {
    my $self = $_[0];

    # Variance is NULL unless there is more than one row
    return undef unless $self->{count} > 1;

    # calculate avg
    my $mu = $self->{sum} / $self->{count};

    my $sigma = 0;
    while (my ($h, $v) = each %{$self->{hash}}) {
        $sigma += (($h - $mu)**2) * $v;
    }
    $sigma = $sigma / ($self->{count} - 1);

    return $sigma;
}

The function can then be used as:

SELECT group_name, variance2(score)
FROM results
GROUP BY group_name;

#Variance (Highly Scalable)

A third variable implementation, designed for arbitrarily large data sets:

package variance3;

sub new { bless {mu=>0, count=>0, S=>0}, shift; }

sub step {
    my ( $self, $value ) = @_;
    $self->{count}++;
    my $delta = $value - $self->{mu};
    $self->{mu} += $delta/$self->{count};
    $self->{S} += $delta*($value - $self->{mu});
}

sub finalize {
    my $self = $_[0];
    return $self->{S} / ($self->{count} - 1);
}

The function can then be used as:

SELECT group_name, variance3(score)
FROM results
GROUP BY group_name;

#SUPPORT

Bugs should be reported via the CPAN bug tracker at

http://rt.cpan.org/NoAuth/ReportBug.html?Queue=DBD-SQLite

#TO DO

Add more and varied cookbook recipes, until we have enough to turn them into a separate CPAN distribution.
Create a series of tests scripts that validate the cookbook recipes.

#AUTHOR

Adam Kennedy <adamk@cpan.org>

#COPYRIGHT

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the LICENSE file included with this module.

CONTENTS

#NAME

#DESCRIPTION

#AGGREGATE FUNCTIONS

#Variance

#Variance (Memory Efficient)

#Variance (Highly Scalable)

#SUPPORT

#TO DO

#AUTHOR

#COPYRIGHT