# vim: ts=8 sw=4 expandtab: ########################################################## # This script is part of the Devel::NYTProf distribution # # Copyright, contact and other information can be found # at the bottom of this file, or by going to: # http://metacpan.org/release/Devel-NYTProf/ # ########################################################### package Devel::NYTProf::Data; =head1 NAME Devel::NYTProf::Data - L data loading and manipulation =head1 SYNOPSIS use Devel::NYTProf::Data; $profile = Devel::NYTProf::Data->new( { filename => 'nytprof.out' } ); $profile->dump_profile_data(); =head1 DESCRIPTION Reads a profile data file written by L, aggregates the contents, and returns the results as a blessed data structure. Access to the data should be via methods in this class to avoid breaking encapsulation (and thus breaking your code when the data structures change in future versions). B the documentation is out of date and may not be updated soon. It's also likely that the API will change drastically in future. It's possible, for example, that the data model will switch to use SQLite and the http://metacpan.org/pod/ORLite ORM. Let me know if you come to depend on a particular API and I'll try to preserve it if practical. =head1 METHODS =cut use warnings; use strict; use Carp qw(carp croak cluck); use Cwd qw(getcwd); use Scalar::Util qw(blessed); use Devel::NYTProf::Core; use Devel::NYTProf::FileInfo; use Devel::NYTProf::SubInfo; use Devel::NYTProf::Util qw( trace_level _dumper ); our $VERSION = '6.14'; =head2 new $profile = Devel::NYTProf::Data->new( ); $profile = Devel::NYTProf::Data->new( { filename => 'nytprof.out', # default quiet => 0, # default, 1 to silence message } ); Reads the specified file containing profile data written by L, aggregates the contents, and returns the results as a blessed data structure. =cut sub new { my $class = shift; my $args = shift || { }; my $file = $args->{filename} ||= 'nytprof.out'; croak "Devel::NYTProf::new() could not locate file for processing" unless -f $file; print "Reading $file\n" unless $args->{quiet}; my $profile = load_profile_data_from_file( $file, $args->{callback}, ); return undef if $args->{callback}; print "Processing $file data\n" unless $args->{quiet}; bless $profile => $class; my $fid_fileinfo = $profile->{fid_fileinfo}; my $sub_subinfo = $profile->{sub_subinfo}; # add profile ref so fidinfo & subinfo objects # XXX circular ref, add weaken for (@$fid_fileinfo) { $_ and $_->[7] = $profile; } $_->[7] = $profile for values %$sub_subinfo; # bless sub_subinfo data (my $sub_class = $class) =~ s/\w+$/SubInfo/; $_ and bless $_ => $sub_class for values %$sub_subinfo; # create profiler_active attribute by subtracting from profiler_duration # currently we only subtract cumulative_overhead_ticks my $attribute = $profile->{attribute}; my $overhead_time = $attribute->{cumulative_overhead_ticks} / $attribute->{ticks_per_sec}; $attribute->{profiler_active} = $attribute->{profiler_duration} - $overhead_time; # find subs that have calls but no fid my @homeless_subs = grep { $_->calls and not $_->fid } values %$sub_subinfo; if (@homeless_subs) { # give them a home... # currently just the first existing fileinfo # XXX ought to create a new dummy fileinfo for them my $new_fi = $profile->fileinfo_of(1); $_->_alter_fileinfo(undef, $new_fi) for @homeless_subs; } # Where a given eval() has been invoked more than once # rollup the corresponding fids if they're "uninteresting". if (not $args->{skip_collapse_evals}) { for my $fi ($profile->noneval_fileinfos) { $profile->collapse_evals_in($fi); } } $profile->_clear_caches; # a hack for testing/debugging # $ENV{NYTPROF_ONLOAD} must be a colon-delimited string of # equal-sign-delimited substrings, e.g., # 'alpha=beta:gamma=delta:dump=1:exit=1'; if (my $env = $ENV{NYTPROF_ONLOAD}) { my %onload = map { split /=/, $_, 2 } split /:/, $env, -1; warn _dumper($profile) if $onload{dump}; exit $onload{exit} if defined $onload{exit}; } return $profile; } sub collapse_evals_in { my ($profile, $parent_fi) = @_; my $parent_fid = $parent_fi->fid; my %evals_on_line; for my $fi ($parent_fi->has_evals) { $profile->collapse_evals_in($fi); # recurse first push @{ $evals_on_line{$fi->eval_line} }, $fi; } while ( my ($line, $siblings) = each %evals_on_line) { next if @$siblings == 1; # compare src code of evals and collapse identical ones my %src_keyed; for my $fi (@$siblings) { my $key = $fi->src_digest; if (!$key) { # include extra info to segregate when there's no src $key .= ',evals' if $fi->has_evals; $key .= ',subs' if $fi->subs_defined; } push @{$src_keyed{$key}}, $fi; } if (trace_level() >= 2) { my @subs = map { $_->subs_defined } @$siblings; my @evals = map { $_->has_evals(0) } @$siblings; warn sprintf "%d:%d: has %d sibling evals (subs %d, evals %d, keys %d) in %s; fids: %s\n", $parent_fid, $line, scalar @$siblings, scalar @subs, scalar @evals, scalar keys %src_keyed, $parent_fi->filename, join(" ", map { $_->fid } @$siblings); for my $si (@subs) { warn sprintf "%d:%d evals: define sub %s in fid %s\n", $parent_fid, $line, $si->subname, $si->fid; } for my $fi (@evals) { warn sprintf "%d:%d evals: execute eval %s\n", $parent_fid, $line, $fi->filename; } } # if 'too many' distinct eval source keys then simply collapse all my $max_evals_siblings = $ENV{NYTPROF_MAX_EVAL_SIBLINGS} || 200; if (values %src_keyed > $max_evals_siblings) { $parent_fi->collapse_sibling_evals(@$siblings); } else { # finesse: consider each distinct src in turn while ( my ($key, $src_same_fis) = each %src_keyed ) { next if @$src_same_fis == 1; # unique src key my @fids = map { $_->fid } @$src_same_fis; if (grep { $_->has_evals(0) } @$src_same_fis) { warn "evals($key): collapsing skipped due to evals in @fids\n" if trace_level() >= 3; } else { warn "evals($key): collapsing identical: @fids\n" if trace_level() >= 3; my $fi = $parent_fi->collapse_sibling_evals(@$src_same_fis); @$src_same_fis = ( $fi ); # update list in-place } } } } return 1; } sub _caches { return shift->{caches} ||= {} } sub _clear_caches { return delete shift->{caches} } sub attributes { return shift->{attribute} || {}; } sub options { return shift->{option} || {}; } sub subname_subinfo_map { return { %{ shift->{sub_subinfo} } }; # shallow copy } sub _disconnect_subinfo { my ($self, $si) = @_; my $subname = $si->subname; my $si2 = delete $self->{sub_subinfo}{$subname}; # sanity check carp sprintf "disconnect_subinfo: deleted entry %s %s doesn't match argument %s %s", ($si2) ? ($si2, $si2->subname) : ('undef', 'undef'), $si, $subname if $si2 != $si or $si2->subname ne $subname; # do more? } # package_tree_subinfo_map is like package_subinfo_map but returns # nested data instead of flattened. # for "Foo::Bar::Baz" package: # { Foo => { '' => [...], '::Bar' => { ''=>[...], '::Baz'=>[...] } } } # if merged is true then array contains a single 'merged' subinfo sub package_subinfo_map { my $self = shift; my ($merge_subs, $nested_pkgs) = @_; my %pkg; my %to_merge; my $all_subs = $self->subname_subinfo_map; while ( my ($name, $subinfo) = each %$all_subs ) { $name =~ s/^(.*::).*/$1/; # XXX $subinfo->package my $subinfos; if ($nested_pkgs) { my @parts = split /::/, $name; my $node = $pkg{ shift @parts } ||= {}; # TODO: Need to figure out how to provide a multi-part name, e.g., 'alpha::beta' # Otherwise @parts is now empty and so next line is not exercised # during testing. $node = $node->{ shift @parts } ||= {} while @parts; $subinfos = $node->{''} ||= []; } else { $subinfos = $pkg{$name} ||= []; } push @$subinfos, $subinfo; $to_merge{$subinfos} = $subinfos if $merge_subs; } for my $subinfos (values %to_merge) { my $subinfo = shift(@$subinfos)->clone; $subinfo->merge_in($_, src_keep => 1) for @$subinfos; # replace the many with the one @$subinfos = ($subinfo); } return \%pkg; } sub all_fileinfos { my @all = @{shift->{fid_fileinfo}}; shift @all; # drop fid 0 # return all non-nullified fileinfos return grep { $_->fid } @all; } sub eval_fileinfos { return grep { $_->eval_line } shift->all_fileinfos; } sub noneval_fileinfos { return grep { !$_->eval_line } shift->all_fileinfos; } sub fileinfo_of { my ($self, $arg, $silent_if_undef) = @_; if (not defined $arg) { carp "Can't resolve fid of undef value" unless $silent_if_undef; return undef; } # check if already a file info object return $arg if ref $arg and UNIVERSAL::can($arg,'fid') and $arg->isa('Devel::NYTProf::FileInfo'); my $fid = $self->resolve_fid($arg); if (not $fid) { carp "Can't resolve fid of '$arg'"; return undef; } my $fi = $self->{fid_fileinfo}[$fid]; return undef unless defined $fi->fid; # nullified? return $fi; } sub subinfo_of { my ($self, $subname) = @_; if (not defined $subname) { cluck "Can't resolve subinfo of undef value"; return undef; } my $si = $self->{sub_subinfo}{$subname} or cluck "Can't resolve subinfo of '$subname'"; return $si; } sub inc { # XXX should return inc from profile data, when it's there return @INC; } =head2 dump_profile_data $profile->dump_profile_data; $profile->dump_profile_data( { filehandle => \*STDOUT, separator => "", } ); Writes the profile data in a reasonably human friendly format to the specified C (default STDOUT). For non-trivial profiles the output can be very large. As a guide, there'll be at least one line of output for each line of code executed, plus one for each place a subroutine was called from, plus one per subroutine. The default format is a Data::Dumper style whitespace-indented tree. The types of data present can depend on the options used when profiling. If C is true then instead of whitespace, each item of data is indented with the I through the structure with C used to separate the elements of the path. This format is especially useful for grep'ing and diff'ing. =cut sub dump_profile_data { my $self = shift; my $args = shift || {}; my $separator = $args->{separator} || ''; my $filehandle = $args->{filehandle} || \*STDOUT; # shallow clone and add sub_caller for migration of tests my $startnode = $self; $self->_clear_caches; my $callback = sub { my ($path, $value) = @_; # not needed currently #if ($path->[0] eq 'attribute' && @$path == 1) { my %v = %$value; return ({}, \%v); } if (my $hook = $args->{skip_fileinfo_hook}) { # for fid_fileinfo elements... if ($path->[0] eq 'fid_fileinfo' && @$path==2) { my $fi = $value; # skip nullified fileinfo return undef unless $fi->fid; # don't dump internal details of lib modules return ({ skip_internal_details => scalar $hook->($fi, $path, $value) }, $value); } # skip sub_subinfo data for 'library modules' if ($path->[0] eq 'sub_subinfo' && @$path==2 && $value->[0]) { my $fi = $self->fileinfo_of($value->[0]); return undef if !$fi or $hook->($fi, $path, $value); } # skip fid_*_time data for 'library modules' if ($path->[0] =~ /^fid_\w+_time$/ && @$path==2) { my $fi = $self->fileinfo_of($path->[1]); return undef if !$fi or $hook->($fi, $path, $value); } } return ({}, $value); }; _dump_elements($startnode, $separator, $filehandle, [], $callback); } sub _dump_elements { my ($r, $separator, $fh, $path, $callback) = @_; my $pad = " "; my $padN; my $is_hash = (UNIVERSAL::isa($r, 'HASH')); my ($start, $end, $colon, $keys) = ($is_hash) ? ('{', '}', ' => ', [sort keys %$r]) : ('[', ']', ': ', [0 .. @$r - 1]); if ($separator) { ($start, $end, $colon) = (undef, undef, $separator); $padN = join $separator, @$path, ''; } else { $padN = $pad x (@$path + 1); } my $format = {sub_subinfo => {compact => 1},}; print $fh "$start\n" if $start; my $key1 = $path->[0] || $keys->[0]; for my $key (@$keys) { next if $key eq 'fid_srclines'; my $value = ($is_hash) ? $r->{$key} : $r->[$key]; # skip undef elements in array next if !$is_hash && !defined($value); # skip refs to empty arrays in array next if !$is_hash && ref $value eq 'ARRAY' && !@$value; my $dump_opts = {}; if ($callback) { ($dump_opts, $value) = $callback->([ @$path, $key ], $value); next if not $dump_opts; } my $prefix = "$padN$key$colon"; if (UNIVERSAL::can($value,'dump')) { $value->dump($separator, $fh, [ @$path, $key ], $prefix, $dump_opts); } else { # special case some common cases to be more compact: # fid_*_time [fid][line] = [N,N] # sub_subinfo {subname} = [fid,startline,endline,calls,incl_time] my $as_compact = $format->{$key1}{compact}; if (not defined $as_compact) { # so guess... $as_compact = (UNIVERSAL::isa($value, 'ARRAY') && @$value <= 9 && !grep { ref or !defined } @$value); } $as_compact = 0 if not ref $value eq 'ARRAY'; if ($as_compact) { no warnings qw(uninitialized); printf $fh "%s[ %s ]\n", $prefix, join(" ", map { defined($_) ? $_ : 'undef' } @$value); } elsif (ref $value) { _dump_elements($value, $separator, $fh, [ @$path, $key ], $callback); } else { print $fh "$prefix$value\n"; } } } no warnings 'numeric'; # @$path can be non-positive printf $fh "%s$end\n", ($pad x (@$path - 1)) if $end; return 1; } sub get_profile_levels { return shift->{profile_modes}; } sub get_fid_line_data { my ($self, $level) = @_; $level ||= 'line'; my $fid_line_data = $self->{"fid_${level}_time"}; return $fid_line_data; } =head2 normalize_variables $profile->normalize_variables; Traverses the profile data structure and normalizes highly variable data, such as the time, in order that the data can more easily be compared. This is mainly of use to the test suite. The data normalized is: =over =item * profile timing data: set to 0 =item * subroutines: timings are set to 0 =item * attributes, like basetime, xs_version, etc., are set to 0 =item * filenames: path prefixes matching absolute paths in @INC are changed to "/.../" =item * filenames: eval sequence numbers, like "(re_eval 2)" are changed to 0 =back =cut sub normalize_variables { my ($self, $normalize_options) = @_; if ($normalize_options) { %{ $self->options } = (); } my $attributes = $self->attributes; for my $attr (qw( basetime xs_version perl_version clock_id ticks_per_sec nv_size profiler_duration profiler_end_time profiler_start_time cumulative_overhead_ticks profiler_active total_stmts_duration total_stmts_measured total_stmts_discounted total_sub_calls sawampersand_line )) { $attributes->{$attr} = 0 if exists $attributes->{$attr}; } for my $attr (qw(PL_perldb cumulative_overhead_ticks)) { delete $attributes->{$attr}; } # normalize line data for my $level (qw(line block sub)) { my $fid_line_data = $self->get_fid_line_data($level) || []; # zero the statement timing data for my $of_fid (@$fid_line_data) { _zero_array_elem($of_fid, 0) if $of_fid; } } my $sub_subinfo = $self->{sub_subinfo}; for my $subname (keys %$sub_subinfo) { my $si = $self->{sub_subinfo}{$subname}; # zero sub info and sub caller times etc. my $newname = $si->normalize_for_test; if ($newname ne $subname) { warn "Normalizing $subname to $newname overwrote other data\n" if $sub_subinfo->{$newname}; $sub_subinfo->{$newname} = delete $sub_subinfo->{$subname}; } } $_->normalize_for_test for $self->all_fileinfos; return 1; } sub _zero_array_elem { my ($ary_of_line_data, $index) = @_; for my $line_data (@$ary_of_line_data) { next unless $line_data; $line_data->[$index] = 0; # if line was a string eval # then recurse to zero the times within the eval lines if (my $eval_lines = $line_data->[2]) { _zero_array_elem($eval_lines, $index); # recurse } } } sub _filename_to_fid { my $self = shift; my $caches = $self->_caches; return $caches->{_filename_to_fid_cache} ||= do { my $filename_to_fid = {}; $filename_to_fid->{$_->filename} = $_->fid for $self->all_fileinfos; $filename_to_fid; }; } =head2 subs_defined_in_file $subs_defined_hash = $profile->subs_defined_in_file( $file, $include_lines ); Returns a reference to a hash containing information about subroutines defined in a source file. The $file argument can be an integer file id (fid) or a file path. Returns undef if the profile contains no C data for the $file. The keys of the returned hash are fully qualified subroutine names and the corresponding value is a hash reference containing L objects. If $include_lines is true then the hash also contains integer keys corresponding to the first line of the subroutine. The corresponding value is a reference to an array. The array contains a hash ref for each of the subroutines defined on that line, typically just one. =cut sub subs_defined_in_file { my ($self, $fid, $incl_lines) = @_; croak "incl_lines is deprecated in subs_defined_in_file, use subs_defined_in_file_by_line instead" if $incl_lines; my $fi = $self->fileinfo_of($fid) or return; $fid = $fi->fid; my $caches = $self->_caches; my $cache_key = "subs_defined_in_file:$fid"; return $caches->{$cache_key} if $caches->{$cache_key}; my %subs = map { $_->subname => $_ } $fi->subs_defined; $caches->{$cache_key} = \%subs; return $caches->{$cache_key}; } sub subs_defined_in_file_by_line { my $subs = shift->subs_defined_in_file(@_); my %line2subs; for (values %$subs) { my $first_line = $_->first_line || 0; # 0 = xsub? push @{$line2subs{$first_line}}, $_; } return \%line2subs; } =head2 file_line_range_of_sub ($file, $fid, $first, $last, $fi) = $profile->file_line_range_of_sub("main::foo"); Returns the filename, fid, and first and last line numbers, and fileinfo object for the specified subroutine (which must be fully qualified with a package name). Returns an empty list if the subroutine name is not in the profile data. The $fid return is the 'original' fid associated with the file the subroutine was created in. The $file returned is the source file that defined the subroutine. Subroutines that are implemented in XS have a line range of 0,0 and a possibly unknown file (if NYTProf couldn't find a good match based on the package name). Subroutines that were called but only returned via an exception may have a line range of undef,undef if they're xsubs or were defined before NYTProf was enabled. =cut sub file_line_range_of_sub { my ($self, $sub) = @_; my $sub_subinfo = $self->subinfo_of($sub) or return; # no such sub; warning supplied by subinfo_of() my ($fid, $first, $last) = @$sub_subinfo; return if not $fid; # sub has no known file my $fileinfo = $fid && $self->fileinfo_of($fid) or croak "No fid_fileinfo for sub $sub fid '$fid'"; return ($fileinfo->filename, $fid, $first, $last, $fileinfo); } =head2 resolve_fid $fid = $profile->resolve_fid( $file ); Returns the integer I that corresponds to $file. If $file can't be found and $file looks like a positive integer then it's presumed to already be a fid and is returned. This is used to enable other methods to work with fid or file arguments. If $file can't be found but it uniquely matches the suffix of one of the files then that corresponding fid is returned. =cut sub resolve_fid { my ($self, $file) = @_; Carp::confess("No file specified") unless defined $file; my $resolve_fid_cache = $self->_filename_to_fid; # exact match return $resolve_fid_cache->{$file} if exists $resolve_fid_cache->{$file}; # looks like a fid already return $file if $file =~ m/^\d+$/; # XXX hack needed to because of how _map_new_to_old deals # with .pmc files because of how ::Reporter works return $self->resolve_fid($file) if $file =~ s/\.pmc$/.pm/; # unfound absolute path, so we're sure we won't find it return undef # XXX carp? if $file =~ m/^\//; # prepend '/' and grep for trailing matches - if just one then use that my $match = qr{/\Q$file\E$}; my @matches = grep {m/$match/} keys %$resolve_fid_cache; # XXX: Not clear how to exercise either of the following conditions return $self->resolve_fid($matches[0]) if @matches == 1; carp "Can't resolve '$file' to a unique file id (matches @matches)" if @matches >= 2; return undef; } 1; __END__ =head1 PROFILE DATA STRUTURE XXX =head1 LIMITATION There's currently no way to merge profile data from multiple files. =head1 SEE ALSO L =head1 AUTHOR B, C<< >> B, L B, C<< >> =head1 COPYRIGHT AND LICENSE Copyright (C) 2008 by Adam Kaplan and The New York Times Company. Copyright (C) 2008,2009 by Tim Bunce, Ireland. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut