#====================================================================== package DBD::SQLite::VirtualTable::FileContent; #====================================================================== use strict; use warnings; use base 'DBD::SQLite::VirtualTable'; my %option_ok = map {($_ => 1)} qw/source content_col path_col expose root get_content/; my %defaults = ( content_col => "content", path_col => "path", expose => "*", get_content => "DBD::SQLite::VirtualTable::FileContent::get_content", ); #---------------------------------------------------------------------- # object instanciation #---------------------------------------------------------------------- sub NEW { my $class = shift; my $self = $class->_PREPARE_SELF(@_); local $" = ", "; # for array interpolation in strings # initial parameter check !@{$self->{columns}} or die "${class}->NEW(): illegal options: @{$self->{columns}}"; $self->{options}{source} or die "${class}->NEW(): missing (source=...)"; my @bad_options = grep {!$option_ok{$_}} keys %{$self->{options}}; !@bad_options or die "${class}->NEW(): bad options: @bad_options"; # defaults ... tempted to use //= but we still want to support perl 5.8 :-( foreach my $k (keys %defaults) { defined $self->{options}{$k} or $self->{options}{$k} = $defaults{$k}; } # get list of columns from the source table my $src_table = $self->{options}{source}; my $sql = "PRAGMA table_info($src_table)"; my $dbh = ${$self->{dbh_ref}}; # can't use method ->dbh, not blessed yet my $src_info = $dbh->selectall_arrayref($sql, {Slice => [1, 2]}); @$src_info or die "${class}->NEW(source=$src_table): no such table in database"; # associate each source colname with its type info or " " (should eval true) my %src_col = map { ($_->[0] => $_->[1] || " ") } @$src_info; # check / complete the exposed columns my @exposed_cols; if ($self->{options}{expose} eq '*') { @exposed_cols = map {$_->[0]} @$src_info; } else { @exposed_cols = split /\s*,\s*/, $self->{options}{expose}; my @bad_cols = grep { !$src_col{$_} } @exposed_cols; die "table $src_table has no column named @bad_cols" if @bad_cols; } for (@exposed_cols) { die "$class: $self->{options}{content_col} cannot be both the " . "content_col and an exposed col" if $_ eq $self->{options}{content_col}; } # build the list of columns for this table $self->{columns} = [ "$self->{options}{content_col} TEXT", map {"$_ $src_col{$_}"} @exposed_cols ]; # acquire a coderef to the get_content() implementation, which # was given as a symbolic reference in %options no strict 'refs'; $self->{get_content} = \ &{$self->{options}{get_content}}; bless $self, $class; } sub _build_headers { my $self = shift; my $cols = $self->sqlite_table_info; # headers : names of columns, without type information $self->{headers} = [ map {$_->{name}} @$cols ]; } #---------------------------------------------------------------------- # method for initiating a search #---------------------------------------------------------------------- sub BEST_INDEX { my ($self, $constraints, $order_by) = @_; $self->_build_headers if !$self->{headers}; my @conditions; my $ix = 0; foreach my $constraint (grep {$_->{usable}} @$constraints) { my $col = $constraint->{col}; # if this is the content column, skip because we can't filter on it next if $col == 0; # for other columns, build a fragment for SQL WHERE on the underlying table my $colname = $col == -1 ? "rowid" : $self->{headers}[$col]; push @conditions, "$colname $constraint->{op} ?"; $constraint->{argvIndex} = $ix++; $constraint->{omit} = 1; # SQLite doesn't need to re-check the op } # TODO : exploit $order_by to add ordering clauses within idxStr my $outputs = { idxNum => 1, idxStr => join(" AND ", @conditions), orderByConsumed => 0, estimatedCost => 1.0, estimatedRows => undef, }; return $outputs; } #---------------------------------------------------------------------- # method for preventing updates #---------------------------------------------------------------------- sub _SQLITE_UPDATE { my ($self, $old_rowid, $new_rowid, @values) = @_; die "attempt to update a readonly virtual table"; } #---------------------------------------------------------------------- # file slurping function (not a method!) #---------------------------------------------------------------------- sub get_content { my ($path, $root) = @_; $path = "$root/$path" if $root; my $content = ""; if (open my $fh, "<", $path) { local $/; # slurp the whole file into a scalar $content = <$fh>; close $fh; } else { warn "can't open $path"; } return $content; } #====================================================================== package DBD::SQLite::VirtualTable::FileContent::Cursor; #====================================================================== use strict; use warnings; use base "DBD::SQLite::VirtualTable::Cursor"; sub FILTER { my ($self, $idxNum, $idxStr, @values) = @_; my $vtable = $self->{vtable}; # build SQL local $" = ", "; my @cols = @{$vtable->{headers}}; $cols[0] = 'rowid'; # replace the content column by the rowid push @cols, $vtable->{options}{path_col}; # path col in last position my $sql = "SELECT @cols FROM $vtable->{options}{source}"; $sql .= " WHERE $idxStr" if $idxStr; # request on the index table my $dbh = $vtable->dbh; $self->{sth} = $dbh->prepare($sql) or die DBI->errstr; $self->{sth}->execute(@values); $self->{row} = $self->{sth}->fetchrow_arrayref; return; } sub EOF { my ($self) = @_; return !$self->{row}; } sub NEXT { my ($self) = @_; $self->{row} = $self->{sth}->fetchrow_arrayref; } sub COLUMN { my ($self, $idxCol) = @_; return $idxCol == 0 ? $self->file_content : $self->{row}[$idxCol]; } sub ROWID { my ($self) = @_; return $self->{row}[0]; } sub file_content { my ($self) = @_; my $root = $self->{vtable}{options}{root}; my $path = $self->{row}[-1]; my $get_content_func = $self->{vtable}{get_content}; return $get_content_func->($path, $root); } 1; __END__ =head1 NAME DBD::SQLite::VirtualTable::FileContent -- virtual table for viewing file contents =head1 SYNOPSIS Within Perl : $dbh->sqlite_create_module(fcontent => "DBD::SQLite::VirtualTable::FileContent"); Then, within SQL : CREATE VIRTUAL TABLE tbl USING fcontent( source = src_table, content_col = content, path_col = path, expose = "path, col1, col2, col3", -- or "*" root = "/foo/bar" get_content = Foo::Bar::read_from_file ); SELECT col1, path, content FROM tbl WHERE ...; =head1 DESCRIPTION A "FileContent" virtual table is bound to some underlying I, which has a column containing paths to files. The virtual table behaves like a database view on the source table, with an added column which exposes the content from those files. This is especially useful as an "external content" to some fulltext table (see L) : the index table stores some metadata about files, and then the fulltext engine can index both the metadata and the file contents. =head1 PARAMETERS Parameters for creating a C virtual table are specified within the C statement, just like regular column declarations, but with an '=' sign. Authorized parameters are : =over =item C The name of the I. This parameter is mandatory. All other parameters are optional. =item C The name of the virtual column exposing file contents. The default is C. =item C The name of the column in C that contains paths to files. The default is C. =item C A comma-separated list (within double quotes) of source column names to be exposed by the virtual table. The default is C<"*">, which means all source columns. =item C An optional root directory that will be prepended to the I column when opening files. =item C Fully qualified name of a Perl function for reading file contents. The default implementation just slurps the entire file into a string; but this hook can point to more sophisticated implementations, like for example a function that would remove html tags. The hooked function is called like this : $file_content = $get_content->($path, $root); =back =head1 AUTHOR Laurent Dami Edami@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright Laurent Dami, 2014. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut