package CHI::Driver::File; $CHI::Driver::File::VERSION = '0.61'; use Carp; use Cwd qw(realpath cwd); use CHI::Util qw(fast_catdir fast_catfile unique_id read_dir read_file write_file); use Digest::JHash qw(jhash); use File::Basename qw(basename dirname); use File::Find qw(find); use File::Path qw(mkpath rmtree); use File::Spec::Functions qw(catdir catfile splitdir tmpdir); use Log::Any qw($log); use Moo; use MooX::Types::MooseLike::Base qw(:all); use strict; use warnings; extends 'CHI::Driver'; has '+max_key_length' => ( default => sub { 248 } ); has 'depth' => ( is => 'ro', isa => Int, default => sub { 2 } ); has 'dir_create_mode' => ( is => 'ro', isa => Int, default => sub { oct(775) } ); has 'file_create_mode' => ( is => 'ro', isa => Int, default => sub { oct(666) } ); has 'file_extension' => ( is => 'ro', isa => Str, default => sub { '.dat' } ); has 'path_to_namespace' => ( is => 'lazy' ); has 'root_dir' => ( is => 'ro', isa => Str, default => sub { catdir( tmpdir(), 'chi-driver-file' ) } ); sub BUILDARGS { my ( $class, %params ) = @_; # Backward compat # if ( defined( $params{key_digest} ) ) { $params{key_digester} = $params{key_digest}; $params{max_key_length} = 0; } return \%params; } sub _build_path_to_namespace { my $self = shift; my $namespace = $self->escape_for_filename( $self->namespace ); $namespace = $self->digest_key($namespace) if length($namespace) > $self->max_key_length; return catdir( $self->root_dir, $namespace ); } # Escape key to make safe for filesystem; if it then grows larger than # max_key_length, digest it. # sub escape_key { my ( $self, $key ) = @_; my $new_key = $self->escape_for_filename($key); if ( length($new_key) > length($key) && length($new_key) > $self->max_key_length() ) { $new_key = $self->digest_key($new_key); } return $new_key; } sub unescape_key { my ( $self, $key ) = @_; return $self->unescape_for_filename($key); } sub fetch { my ( $self, $key ) = @_; my $file = $self->path_to_key($key); if ( defined $file && -f $file ) { return read_file($file); } else { return undef; } } sub store { my ( $self, $key, $data ) = @_; my $dir; my $file = $self->path_to_key( $key, \$dir ) or return undef; mkpath( $dir, 0, $self->{dir_create_mode} ) if !-d $dir; # Possibly generate a temporary file - if generate_temporary_filename returns undef, # store to the destination file directly # my $temp_file = $self->generate_temporary_filename( $dir, $file ); my $store_file = defined($temp_file) ? $temp_file : $file; write_file( $store_file, $data, $self->{file_create_mode} ); if ( defined($temp_file) ) { # Rename can fail in rare race conditions...try multiple times # for ( my $try = 0 ; $try < 3 ; $try++ ) { last if ( rename( $temp_file, $file ) ); } if ( -f $temp_file ) { my $error = $!; unlink($temp_file); die "could not rename '$temp_file' to '$file': $error"; } } } sub remove { my ( $self, $key ) = @_; my $file = $self->path_to_key($key) or return undef; unlink($file); } sub clear { my ($self) = @_; my $namespace_dir = $self->path_to_namespace; return if !-d $namespace_dir; my $renamed_dir = join( ".", $namespace_dir, unique_id() ); rename( $namespace_dir, $renamed_dir ); rmtree($renamed_dir); die "could not remove '$renamed_dir'" if -d $renamed_dir; } sub get_keys { my ($self) = @_; my @filepaths; my $re = quotemeta( $self->file_extension ); my $wanted = sub { push( @filepaths, $_ ) if -f && /${re}$/ }; my @keys = $self->_collect_keys_via_file_find( \@filepaths, $wanted ); return @keys; } sub _collect_keys_via_file_find { my ( $self, $filepaths, $wanted ) = @_; my $namespace_dir = $self->path_to_namespace; return () if !-d $namespace_dir; find( { wanted => $wanted, no_chdir => 1 }, $namespace_dir ); my @keys; my $key_start = length($namespace_dir) + 1 + $self->depth * 2; my $subtract = -1 * length( $self->file_extension ); foreach my $filepath (@$filepaths) { my $key = substr( $filepath, $key_start, $subtract ); $key = $self->unescape_key( join( "", splitdir($key) ) ); push( @keys, $key ); } return @keys; } sub generate_temporary_filename { my ( $self, $dir, $file ) = @_; # Generate a temporary filename using unique_id - faster than tempfile, as long as # we don't need automatic removal. # Note: $file not used here, but might be used in an override. # return fast_catfile( $dir, unique_id() ); } sub get_namespaces { my ($self) = @_; my $root_dir = $self->root_dir(); return () if !-d $root_dir; my @contents = read_dir($root_dir); my @namespaces = map { $self->unescape_for_filename($_) } grep { $self->is_escaped_for_filename($_) } grep { -d fast_catdir( $root_dir, $_ ) } @contents; return @namespaces; } my %hex_strings = map { ( $_, sprintf( "%x", $_ ) ) } ( 0x0 .. 0xf ); sub path_to_key { my ( $self, $key, $dir_ref ) = @_; return undef if !defined($key); my @paths = ( $self->path_to_namespace ); my $orig_key = $key; $key = $self->escape_key($key); # Hack: If key is exactly 32 hex chars, assume it's an md5 digest and # take a prefix of it for bucket. Digesting will usually happen in # transform_key and there's no good way for us to know it occurred. # if ( $key =~ /^[0-9a-f]{32}$/ ) { push( @paths, map { substr( $key, $_, 1 ) } ( 0 .. $self->{depth} - 1 ) ); } else { # Hash key to a 32-bit integer (using non-escaped key for back compat) # my $bucket = jhash($orig_key); # Create $self->{depth} subdirectories, containing a maximum of 64 # subdirectories each, by successively shifting 4 bits off the # bucket and converting to hex. # for ( my $d = $self->{depth} ; $d > 0 ; $d-- ) { push( @paths, $hex_strings{ $bucket & 0xf } ); $bucket >>= 4; } } # Join paths together, computing dir separately if $dir_ref was passed. # my $filename = $key . $self->file_extension; my $filepath; if ( defined $dir_ref && ref($dir_ref) ) { my $dir = fast_catdir(@paths); $filepath = fast_catfile( $dir, $filename ); $$dir_ref = $dir; } else { $filepath = fast_catfile( @paths, $filename ); } return $filepath; } 1; __END__ =pod =head1 NAME CHI::Driver::File - File-based cache using one file per entry in a multi-level directory structure =head1 VERSION version 0.61 =head1 SYNOPSIS use CHI; my $cache = CHI->new( driver => 'File', root_dir => '/path/to/cache/root', depth => 3, max_key_length => 64 ); =head1 DESCRIPTION This cache driver stores data on the filesystem, so that it can be shared between processes on a single machine, or even on multiple machines if using NFS. Each item is stored in its own file. By default, during a set, a temporary file is created and then atomically renamed to the proper file. While not the most efficient, it eliminates the need for locking (with multiple overlapping sets, the last one "wins") and makes this cache usable in environments like NFS where locking might normally be undesirable. By default, the base filename is the key itself, with unsafe characters escaped similar to URL escaping. If the escaped key is larger than L (default 248 characters), it will be L. You may want to lower L if you are storing a lot of items as long filenames can be more expensive to work with. The files are evenly distributed within a multi-level directory structure with a customizable L, to minimize the time needed to search for a given entry. =head1 CONSTRUCTOR OPTIONS When using this driver, the following options can be passed to CHI->new() in addition to the L. =over =item root_dir The location in the filesystem that will hold the root of the cache. Defaults to a directory called 'chi-driver-file' under the OS default temp directory (e.g. '/tmp' on UNIX). This directory will be created as needed on the first cache set. =item depth The number of subdirectories deep to place cache files. Defaults to 2. This should be large enough that no leaf directory has more than a few hundred files. Each non-leaf directory contains up to 16 subdirectories (0-9, A-F). =item dir_create_mode Permissions mode to use when creating directories. Defaults to 0775. =item file_create_mode Permissions mode to use when creating files, modified by the current umask. Defaults to 0666. =item file_extension Extension to append to filename. Default is C<.dat>. =back =head1 METHODS =over =item path_to_key ( $key ) Returns the full path to the cache file representing $key, whether or not that entry exists. Returns the empty list if a valid path cannot be computed, for example if the key is too long. =item path_to_namespace Returns the full path to the directory representing this cache's namespace, whether or not it has any entries. =back =head1 TEMPORARY FILE RENAME By default, during a set, a temporary file is created and then atomically renamed to the proper file. This eliminates the need for locking. You can subclass and override method I to either change the path of the temporary filename, or skip the temporary file and rename altogether by having it return undef. =head1 SEE ALSO L =head1 AUTHOR Jonathan Swartz =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2021 by Jonathan Swartz. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut