package Perl::Critic::Document; use 5.010001; use strict; use warnings; use List::Util qw< reduce >; use Scalar::Util qw< blessed refaddr weaken >; use version; use PPI::Document; use PPI::Document::File; use PPIx::Utils::Traversal qw< split_ppi_node_by_namespace >; use Perl::Critic::Annotation; use Perl::Critic::Exception::Parse qw< throw_parse >; use Perl::Critic::Utils qw< :booleans :characters hashify shebang_line >; use PPIx::Regexp 0.010 qw< >; #----------------------------------------------------------------------------- our $VERSION = '1.156'; #----------------------------------------------------------------------------- our $AUTOLOAD; sub AUTOLOAD { ## no critic (ProhibitAutoloading,ArgUnpacking) my ( $function_name ) = $AUTOLOAD =~ m/ ([^:\']+) \z /xms; return if $function_name eq 'DESTROY'; my $self = shift; return $self->{_doc}->$function_name(@_); } #----------------------------------------------------------------------------- sub new { my ($class, @args) = @_; my $self = bless {}, $class; $self->_init_common(); $self->_init_from_external_source(@args); return $self; } #----------------------------------------------------------------------------- sub _new_for_parent_document { my ($class, $ppi_document, $parent_document) = @_; my $self = bless {}, $class; $self->_init_common(); $self->{_doc} = $ppi_document; $self->{_is_module} = $parent_document->is_module(); return $self; } #----------------------------------------------------------------------------- sub _init_common { my ($self) = @_; $self->{_annotations} = []; $self->{_suppressed_violations} = []; $self->{_disabled_line_map} = {}; return; } #----------------------------------------------------------------------------- sub _init_from_external_source { ## no critic (Subroutines::RequireArgUnpacking) my $self = shift; my %args; if (@_ == 1) { warnings::warnif( 'deprecated', 'Perl::Critic::Document->new($source) deprecated, use Perl::Critic::Document->new(-source => $source) instead.' ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars) ); %args = ('-source' => shift); } else { %args = @_; } my $source_code = $args{'-source'}; # $source_code can be a file name, or a reference to a # PPI::Document, or a reference to a scalar containing source # code. In the last case, PPI handles the translation for us. my $ppi_document = _is_ppi_doc($source_code) ? $source_code : ref $source_code ? PPI::Document->new($source_code) : PPI::Document::File->new($source_code); # Bail on error if (not defined $ppi_document) { my $errstr = PPI::Document::errstr(); my $file = ref $source_code ? undef : $source_code; throw_parse message => qq, file_name => $file; } $self->{_doc} = $ppi_document; $self->index_locations(); $self->_disable_shebang_fix(); $self->{_filename_override} = $args{'-filename-override'}; $self->{_is_module} = $self->_determine_is_module(\%args); return; } #----------------------------------------------------------------------------- sub _is_ppi_doc { my ($ref) = @_; return blessed($ref) && $ref->isa('PPI::Document'); } #----------------------------------------------------------------------------- sub ppi_document { my ($self) = @_; return $self->{_doc}; } #----------------------------------------------------------------------------- sub isa { ## no critic ( Subroutines::ProhibitBuiltinHomonyms ) my ($self, @args) = @_; return $self->SUPER::isa(@args) || ( (ref $self) && $self->{_doc} && $self->{_doc}->isa(@args) ); } #----------------------------------------------------------------------------- sub find { my ($self, $wanted, @more_args) = @_; # This method can only find elements by their class names. For # other types of searches, delegate to the PPI::Document if ( ( ref $wanted ) || !$wanted || $wanted !~ m/ \A PPI:: /xms ) { return $self->{_doc}->find($wanted, @more_args); } # Build the class cache if it doesn't exist. This happens at most # once per Perl::Critic::Document instance. %elements of will be # populated as a side-effect of calling the $finder_sub coderef # that is produced by the caching_finder() closure. if ( !$self->{_elements_of} ) { my %cache = ( 'PPI::Document' => [ $self ] ); # The cache refers to $self, and $self refers to the cache. This # creates a circular reference that leaks memory (i.e. $self is not # destroyed until execution is complete). By weakening the reference, # we allow perl to collect the garbage properly. weaken( $cache{'PPI::Document'}->[0] ); my $finder_coderef = _caching_finder( \%cache ); $self->{_doc}->find( $finder_coderef ); $self->{_elements_of} = \%cache; } # find() must return false-but-defined on fail return $self->{_elements_of}->{$wanted} || q{}; } #----------------------------------------------------------------------------- sub find_first { my ($self, $wanted, @more_args) = @_; # This method can only find elements by their class names. For # other types of searches, delegate to the PPI::Document if ( ( ref $wanted ) || !$wanted || $wanted !~ m/ \A PPI:: /xms ) { return $self->{_doc}->find_first($wanted, @more_args); } my $result = $self->find($wanted); return $result ? $result->[0] : $result; } #----------------------------------------------------------------------------- sub find_any { my ($self, $wanted, @more_args) = @_; # This method can only find elements by their class names. For # other types of searches, delegate to the PPI::Document if ( ( ref $wanted ) || !$wanted || $wanted !~ m/ \A PPI:: /xms ) { return $self->{_doc}->find_any($wanted, @more_args); } my $result = $self->find($wanted); return $result ? 1 : $result; } #----------------------------------------------------------------------------- sub namespaces { my ($self) = @_; return keys %{ $self->_nodes_by_namespace() }; } #----------------------------------------------------------------------------- sub subdocuments_for_namespace { my ($self, $namespace) = @_; my $subdocuments = $self->_nodes_by_namespace()->{$namespace}; return $subdocuments ? @{$subdocuments} : (); } #----------------------------------------------------------------------------- sub ppix_regexp_from_element { my ( $self, $element ) = @_; if ( blessed( $element ) && $element->isa( 'PPI::Element' ) ) { my $addr = refaddr( $element ); return $self->{_ppix_regexp_from_element}{$addr} if exists $self->{_ppix_regexp_from_element}{$addr}; return ( $self->{_ppix_regexp_from_element}{$addr} = PPIx::Regexp->new( $element, default_modifiers => $self->_find_use_re_modifiers_in_scope_from_element( $element ), ) ); } else { return PPIx::Regexp->new( $element ); } } sub _find_use_re_modifiers_in_scope_from_element { my ( $self, $elem ) = @_; my @found; foreach my $use_re ( @{ $self->find( 'PPI::Statement::Include' ) || [] } ) { 're' eq $use_re->module() or next; $self->element_is_in_lexical_scope_after_statement_containing( $elem, $use_re ) or next; my $prefix = 'no' eq $use_re->type() ? q{-} : $EMPTY; push @found, map { "$prefix$_" } grep { m{ \A / }smx } map { $_->isa( 'PPI::Token::Quote' ) ? $_->string() : $_->isa( 'PPI::Token::QuoteLike::Words' ) ? $_->literal() : $_->content() } $use_re->schildren(); } return \@found; } #----------------------------------------------------------------------------- # This got hung on the Perl::Critic::Document, rather than living in # Perl::Critic::Utils::PPI, because of the possibility that caching of scope # objects would turn out to be desirable. sub element_is_in_lexical_scope_after_statement_containing { my ( $self, $inner_elem, $outer_elem ) = @_; # If the outer element defines a scope, we're true if and only if # the outer element contains the inner element, and the inner # element is not somewhere that is hidden from the scope. if ( $outer_elem->scope() ) { return _inner_element_is_in_outer_scope_really( $inner_elem, $outer_elem ); } # In the more general case: # The last element of the statement containing the outer element # must be before the inner element. If not, we know we're false, # without walking the parse tree. my $stmt = $outer_elem->statement() or return; my $last_elem = $stmt; while ( $last_elem->isa( 'PPI::Node' ) ) { $last_elem = $last_elem->last_element() or return; } my $stmt_loc = $last_elem->location() or return; my $inner_loc = $inner_elem->location() or return; $stmt_loc->[0] > $inner_loc->[0] and return; $stmt_loc->[0] == $inner_loc->[0] and $stmt_loc->[1] >= $inner_loc->[1] and return; # Since we know the inner element is after the outer element, find # the element that defines the scope of the statement that contains # the outer element. my $parent = $stmt; while ( ! $parent->scope() ) { # Things appearing in the right-hand side of a # PPI::Statement::Variable are not in-scope to its left-hand # side. RESTRICTION -- this code does not handle truly # pathological stuff like # my ( $c, $d ) = qw{ e f }; # my ( $a, $b ) = my ( $c, $d ) = ( $c, $d ); _inner_is_defined_by_outer( $inner_elem, $parent ) and _location_is_in_right_hand_side_of_assignment( $parent, $inner_elem ) and return; $parent = $parent->parent() or return; } # We're true if and only if the scope of the outer element contains # the inner element. return $inner_elem->descendant_of( $parent ); } # Helper for element_is_in_lexical_scope_after_statement_containing(). # Return true if and only if $outer_elem is a statement that defines # variables and $inner_elem is actually a variable defined in that # statement. sub _inner_is_defined_by_outer { my ( $inner_elem, $outer_elem ) = @_; $outer_elem->isa( 'PPI::Statement::Variable' ) and $inner_elem->isa( 'PPI::Token::Symbol' ) or return; my %defines = hashify( $outer_elem->variables() ); return $defines{$inner_elem->symbol()}; } # Helper for element_is_in_lexical_scope_after_statement_containing(). # Given that the outer element defines a scope, there are still things # that are lexically inside it but outside the scope. We return true if # and only if the inner element is inside the outer element, but not # inside one of the excluded elements. The cases handled so far: # for ----- the list is not part of the scope # foreach - the list is not part of the scope sub _inner_element_is_in_outer_scope_really { my ( $inner_elem, $outer_elem ) = @_; $outer_elem->scope() or return; $inner_elem->descendant_of( $outer_elem ) or return; if ( $outer_elem->isa( 'PPI::Statement::Compound' ) ) { my $first = $outer_elem->schild( 0 ) or return; if ( { for => 1, foreach => 1 }->{ $first->content() } ) { my $next = $first; while ( $next = $next->snext_sibling() ) { $next->isa( 'PPI::Structure::List' ) or next; return ! $inner_elem->descendant_of( $next ); } } } return $TRUE; } # Helper for element_is_in_lexical_scope_after_statement_containing(). # Given and element that represents an assignment or assignment-ish # statement, and a location, return true if the location is to the right # of the equals sign, and false otherwise (including the case where # there is no equals sign). Only the leftmost equals is considered. This # is a restriction. sub _location_is_in_right_hand_side_of_assignment { my ( $elem, $inner_elem ) = @_; my $inner_loc = $inner_elem->location(); my $kid = $elem->schild( 0 ); while ( $kid ) { $kid->isa( 'PPI::Token::Operator' ) and q{=} eq $kid->content() or next; my $l = $kid->location(); $l->[0] > $inner_loc->[0] and return; $l->[0] == $inner_loc->[0] and $l->[1] >= $inner_loc->[1] and return; return $inner_elem->descendant_of( $elem ); } continue { $kid = $kid->snext_sibling(); } return; } #----------------------------------------------------------------------------- sub filename { my ($self) = @_; if (defined $self->{_filename_override}) { return $self->{_filename_override}; } else { my $doc = $self->{_doc}; return $doc->can('filename') ? $doc->filename() : undef; } } #----------------------------------------------------------------------------- sub highest_explicit_perl_version { my ($self) = @_; my $highest_explicit_perl_version = $self->{_highest_explicit_perl_version}; if ( not exists $self->{_highest_explicit_perl_version} ) { my $includes = $self->find( \&_is_a_version_statement ); if ($includes) { # Note: this doesn't use List::Util::max() because that function # doesn't use the overloaded ">=" etc of a version object. The # reduce() style lets version.pm take care of all comparing. # # For reference, max() ends up looking at the string converted to # an NV, or something like that. An underscore like "5.005_04" # provokes a warning and is chopped off at "5.005" thus losing the # minor part from the comparison. # # An underscore "5.005_04" is supposed to mean an alpha release # and shouldn't be used in a perl version. But it's shown in # perlfunc under "use" (as a number separator), and appears in # several modules supplied with perl 5.10.0 (like version.pm # itself!). At any rate if version.pm can understand it then # that's enough for here. $highest_explicit_perl_version = reduce { $a >= $b ? $a : $b } map { version->new( $_->version() ) } @{$includes}; } else { $highest_explicit_perl_version = undef; } $self->{_highest_explicit_perl_version} = $highest_explicit_perl_version; } return $highest_explicit_perl_version if $highest_explicit_perl_version; return; } #----------------------------------------------------------------------------- sub uses_module { my ($self, $module_name) = @_; return exists $self->_modules_used()->{$module_name}; } #----------------------------------------------------------------------------- sub process_annotations { my ($self) = @_; my @annotations = Perl::Critic::Annotation->create_annotations($self); $self->add_annotation(@annotations); return $self; } #----------------------------------------------------------------------------- sub line_is_disabled_for_policy { my ($self, $line, $policy) = @_; my $policy_name = ref $policy || $policy; # HACK: These two policies are special. If they are active, they cannot be # disabled by a "## no critic" annotation. Rather than create a general # hook in Policy.pm for enabling this behavior, we chose to hack # it here, since this isn't the kind of thing that most policies do. return 0 if $policy_name eq 'Perl::Critic::Policy::Miscellanea::ProhibitUnrestrictedNoCritic'; return 0 if $policy_name eq 'Perl::Critic::Policy::Miscellanea::ProhibitUselessNoCritic'; return 1 if $self->{_disabled_line_map}->{$line}->{$policy_name}; return 1 if $self->{_disabled_line_map}->{$line}->{ALL}; return 0; } #----------------------------------------------------------------------------- sub add_annotation { my ($self, @annotations) = @_; # Add annotation to our private map for quick lookup for my $annotation (@annotations) { my ($start, $end) = $annotation->effective_range(); my @affected_policies = $annotation->disables_all_policies ? qw(ALL) : $annotation->disabled_policies(); # TODO: Find clever way to do this with hash slices for my $line ($start .. $end) { for my $policy (@affected_policies) { $self->{_disabled_line_map}->{$line}->{$policy} = 1; } } } push @{ $self->{_annotations} }, @annotations; return $self; } #----------------------------------------------------------------------------- sub annotations { my ($self) = @_; return @{ $self->{_annotations} }; } #----------------------------------------------------------------------------- sub add_suppressed_violation { my ($self, $violation) = @_; push @{$self->{_suppressed_violations}}, $violation; return $self; } #----------------------------------------------------------------------------- sub suppressed_violations { my ($self) = @_; return @{ $self->{_suppressed_violations} }; } #----------------------------------------------------------------------------- sub is_program { my ($self) = @_; return not $self->is_module(); } #----------------------------------------------------------------------------- sub is_module { my ($self) = @_; return $self->{_is_module}; } #----------------------------------------------------------------------------- # PRIVATE functions & methods sub _is_a_version_statement { my (undef, $element) = @_; return 0 if not $element->isa('PPI::Statement::Include'); return 1 if $element->version(); return 0; } #----------------------------------------------------------------------------- sub _caching_finder { my $cache_ref = shift; # These vars will persist for the life of the code ref that this sub returns. my %isa_cache; # Gather up all the PPI elements and sort by @ISA. Note: if any # instances used multiple inheritance, this implementation would # lead to multiple copies of $element in the $elements_of lists. # However, PPI::* doesn't do multiple inheritance, so we are safe return sub { my (undef, $element) = @_; my $classes = $isa_cache{ref $element}; if ( !$classes ) { $classes = [ ref $element ]; # Use a C-style loop because we append to the classes array inside for ( my $i = 0; $i < @{$classes}; $i++ ) { ## no critic(ProhibitCStyleForLoops) no strict 'refs'; ## no critic(ProhibitNoStrict) push @{$classes}, @{"$classes->[$i]::ISA"}; $cache_ref->{$classes->[$i]} ||= []; } $isa_cache{$classes->[0]} = $classes; } for my $class ( @{$classes} ) { push @{$cache_ref->{$class}}, $element; } return 0; # 0 tells find() to keep traversing, but not to store this $element }; } #----------------------------------------------------------------------------- sub _disable_shebang_fix { my ($self) = @_; # When you install a program using ExtUtils::MakeMaker or Module::Build, it # inserts some magical code into the top of the file (just after the # shebang). This code allows people to call your program using a shell, # like `sh my_script`. Unfortunately, this code causes several Policy # violations, so we disable them as if they had "## no critic" annotations. my $first_stmnt = $self->schild(0) || return; # Different versions of MakeMaker and Build use slightly different shebang # fixing strings. This matches most of the ones I've found in my own Perl # distribution, but it may not be bullet-proof. my $fixin_rx = qr<^eval 'exec .* \$0 \$[{]1[+]"\$@"}'\s*[\r\n]\s*if.+;>ms; ## no critic (ExtendedFormatting) if ( $first_stmnt =~ $fixin_rx ) { my $line = $first_stmnt->location->[0]; $self->{_disabled_line_map}->{$line}->{ALL} = 1; $self->{_disabled_line_map}->{$line + 1}->{ALL} = 1; } return $self; } #----------------------------------------------------------------------------- sub _determine_is_module { my ($self, $args) = @_; my $file_name = $self->filename(); if ( defined $file_name and ref $args->{'-program-extensions'} eq 'ARRAY' ) { foreach my $ext ( @{ $args->{'-program-extensions'} } ) { my $regex = ref $ext eq 'Regexp' ? $ext : qr< @{ [ quotemeta $ext ] } \z >xms; return $FALSE if $file_name =~ m/$regex/smx; } } return $FALSE if shebang_line($self); return $FALSE if defined $file_name && $file_name =~ m/ [.] PL \z /smx; return $TRUE; } #----------------------------------------------------------------------------- sub _nodes_by_namespace { my ($self) = @_; my $nodes = $self->{_nodes_by_namespace}; return $nodes if $nodes; my $ppi_document = $self->ppi_document(); if (not $ppi_document) { return $self->{_nodes_by_namespace} = {}; } my $raw_nodes_map = split_ppi_node_by_namespace($ppi_document); my %wrapped_nodes; while ( my ($namespace, $raw_nodes) = each %{$raw_nodes_map} ) { $wrapped_nodes{$namespace} = [ map { __PACKAGE__->_new_for_parent_document($_, $self) } @{$raw_nodes} ]; } return $self->{_nodes_by_namespace} = \%wrapped_nodes; } #----------------------------------------------------------------------------- # Note: must use exists on return value to determine membership because all # the values are false, unlike the result of hashify(). sub _modules_used { my ($self) = @_; my $mapping = $self->{_modules_used}; return $mapping if $mapping; my $includes = $self->find('PPI::Statement::Include'); if (not $includes) { return $self->{_modules_used} = {}; } my %mapping; for my $module ( grep { $_ } map { $_->module() || $_->pragma() } @{$includes} ) { # Significantly less memory than $h{$k} => 1. Thanks Mr. Lembark. $mapping{$module} = (); } return $self->{_modules_used} = \%mapping; } #----------------------------------------------------------------------------- 1; __END__ =pod =for stopwords pre-caches =head1 NAME Perl::Critic::Document - Caching wrapper around a PPI::Document. =head1 SYNOPSIS use PPI::Document; use Perl::Critic::Document; my $doc = PPI::Document->new('Foo.pm'); $doc = Perl::Critic::Document->new(-source => $doc); ## Then use the instance just like a PPI::Document =head1 DESCRIPTION Perl::Critic does a lot of iterations over the PPI document tree via the C method. To save some time, this class pre-caches a lot of the common C calls in a single traversal. Then, on subsequent requests we return the cached data. This is implemented as a facade, where method calls are handed to the stored C instance. =head1 CAVEATS This facade does not implement the overloaded operators from L (that is, the C work). Therefore, users of this facade must not rely on that syntactic sugar. So, for example, instead of C you should write C<< my $source = $doc->content(); >> Perhaps there is a CPAN module out there which implements a facade better than we do here? =head1 INTERFACE SUPPORT This is considered to be a public class. Any changes to its interface will go through a deprecation cycle. =head1 CONSTRUCTOR =over =item C<< new(-source => $source_code, '-filename-override' => $filename, '-program-extensions' => [program_extensions]) >> Create a new instance referencing a PPI::Document instance. The C<$source_code> can be the name of a file, a reference to a scalar containing actual source code, or a L or L. In the event that C<$source_code> is a reference to a scalar containing actual source code or a L, the resulting L will not have a filename. This may cause L to incorrectly classify the source code as a module or script. To avoid this problem, you can optionally set the C<-filename-override> to force the L to have a particular C<$filename>. Do not use this option if C<$source_code> is already the name of a file, or is a reference to a L. The '-program-extensions' argument is optional, and is a reference to a list of strings and/or regular expressions. The strings will be made into regular expressions matching the end of a file name, and any document whose file name matches one of the regular expressions will be considered a program. If -program-extensions is not specified, or if it does not determine the document type, the document will be considered to be a program if the source has a shebang line or its file name (if any) matches C<< m/ [.] PL \z /smx >>. =back =head1 METHODS =over =item C<< ppi_document() >> Accessor for the wrapped PPI::Document instance. Note that altering this instance in any way can cause unpredictable failures in Perl::Critic's subsequent analysis because some caches may fall out of date. =item C<< find($wanted) >> =item C<< find_first($wanted) >> =item C<< find_any($wanted) >> Caching wrappers around the PPI methods. If C<$wanted> is a simple PPI class name, then the cache is employed. Otherwise we forward the call to the corresponding method of the C instance. =item C<< namespaces() >> Returns a list of the namespaces (package names) in the document. =item C<< subdocuments_for_namespace($namespace) >> Returns a list of sub-documents containing the elements in the given namespace. For example, given that the current document is for the source foo(); package Foo; package Bar; package Foo; this method will return two Ls for a parameter of C<"Foo">. For more, see L. =item C<< ppix_regexp_from_element($element) >> Caching wrapper around C<< PPIx::Regexp->new($element) >>. If C<$element> is a C the cache is employed, otherwise it just returns the results of C<< PPIx::Regexp->new() >>. In either case, it returns C unless the argument is something that L actually understands. =item C<< element_is_in_lexical_scope_after_statement_containing( $inner, $outer ) >> Is the C<$inner> element in lexical scope after the statement containing the C<$outer> element? In the case where C<$outer> is itself a scope-defining element, returns true if C<$outer> contains C<$inner>. In any other case, C<$inner> must be after the last element of the statement containing C<$outer>, and the innermost scope for C<$outer> also contains C<$inner>. This is not the same as asking whether C<$inner> is visible from C<$outer>. =item C<< filename() >> Returns the filename for the source code if applicable (PPI::Document::File) or C otherwise (PPI::Document). =item C<< isa( $classname ) >> To be compatible with other modules that expect to get a PPI::Document, the Perl::Critic::Document class masquerades as the PPI::Document class. =item C<< highest_explicit_perl_version() >> Returns a L object for the highest Perl version requirement declared in the document via a C or C statement. Returns nothing if there is no version statement. =item C<< uses_module($module_or_pragma_name) >> Answers whether there is a C, C, or C of the given name in this document. Note that there is no differentiation of modules vs. pragmata here. =item C<< process_annotations() >> Causes this Document to scan itself and mark which lines & policies are disabled by the C<"## no critic"> annotations. =item C<< line_is_disabled_for_policy($line, $policy_object) >> Returns true if the given C<$policy_object> or C<$policy_name> has been disabled for at C<$line> in this Document. Otherwise, returns false. =item C<< add_annotation( $annotation ) >> Adds an C<$annotation> object to this Document. =item C<< annotations() >> Returns a list containing all the Ls that were found in this Document. =item C<< add_suppressed_violation($violation) >> Informs this Document that a C<$violation> was found but not reported because it fell on a line that had been suppressed by a C<"## no critic"> annotation. Returns C<$self>. =item C<< suppressed_violations() >> Returns a list of references to all the Ls that were found in this Document but were suppressed. =item C<< is_program() >> Returns whether this document is considered to be a program. =item C<< is_module() >> Returns whether this document is considered to be a Perl module. =back =head1 AUTHOR Chris Dolan =head1 COPYRIGHT Copyright (c) 2006-2023 Chris Dolan. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of this license can be found in the LICENSE file included with this module. =cut ############################################################################## # Local Variables: # mode: cperl # cperl-indent-level: 4 # fill-column: 78 # indent-tabs-mode: nil # c-indentation-style: bsd # End: # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :