From 33734b49a46374cce5ae05e7f33c32496334f600 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Thu, 29 Dec 2011 11:36:20 -0500 Subject: [PATCH 1/1] abstract query representations from QueryParser When you perform a search, you now also get back an abstract representation of your search query. fts-replacement.pl is a test script that shows you things that QueryParser does, including the new abstract query feature, and it also reconstitutes a string from the abstract query to demonstrate the correctness of that query. [Also] Make last-wins true for facets, filters and modifiers -- arg!; Parameterized values win in the staged-search QP wrapper Signed-off-by: Lebbeous Fogle-Weekley Signed-off-by: Dan Scott --- Open-ILS/src/extras/fts-replacement.pl | 57 +++ .../Application/Storage/Publisher/metabib.pm | 196 +++++--- .../Application/Storage/QueryParser.pm | 448 +++++++++++++++++- 3 files changed, 620 insertions(+), 81 deletions(-) create mode 100755 Open-ILS/src/extras/fts-replacement.pl diff --git a/Open-ILS/src/extras/fts-replacement.pl b/Open-ILS/src/extras/fts-replacement.pl new file mode 100755 index 0000000000..63148af6d1 --- /dev/null +++ b/Open-ILS/src/extras/fts-replacement.pl @@ -0,0 +1,57 @@ +#!/usr/bin/perl +use warnings; +use strict; +use OpenILS::Application::Storage::Driver::Pg::QueryParser; +use JSON::XS; +use Getopt::Long; +use Data::Dumper; +$Data::Dumper::Indent = 1; +use Time::HiRes qw/time/; + +OpenILS::Application::Storage::Driver::Pg::QueryParser->TEST_SETUP; + +my $query = '#available title: foo bar* || (-baz || (subject:"1900'. + '-1910 junk" "and another thing" se:stuff #available '. + 'statuses(0,7,12))) && && && au:malarky || au|'. + 'corporate|personal:gonzo && dc.identifier:+123456789X'. + ' dc.contributor=rowling #metarecord estimation_'. + 'strategy(exclusion) item_type(a, t) item_form(d) '. + 'bib.subjectTitle=potter bib.subjectName=harry '. + 'keyword|mapscale:1:250000'; +my $superpage = 1; +my $superpage_size = 1000; +my $core_limit = 25000; +my $debug; +my $quiet; +my $runs = 100; + +GetOptions( + 'superpage=i' => \$superpage, + 'superpage-size=i' => \$superpage_size, + 'core-limit=i' => \$core_limit, + 'query=s' => \$query, + 'debug' => \$debug, + 'quiet' => \$quiet, + 'runs=i' => \$runs +); + +print "Original query: $query\n"; + +my $start = time(); +OpenILS::Application::Storage::Driver::Pg::QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, debug => $debug, query => $query )->parse->parse_tree for (1 .. $runs); +my $end = time(); + +my $plan = OpenILS::Application::Storage::Driver::Pg::QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, query => $query, debug => $debug ); +$plan->parse; +print "Parsed query tree:\n" . Dumper( $plan->parse_tree) if (!$quiet); +#print "Parsed query tree:\n" . Dumper( QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, query => $query, debug => $debug )->parse->parse_tree); +my $sql = $plan->toSQL; +$sql =~ s/^\s*$//gm; +print "SQL:\n$sql\n\n" if (!$quiet); + +my $abstract_query = $plan->parse_tree->to_abstract_query(with_config => 1); +print "abstract_query: " . Dumper($abstract_query) . "\n"; +print "Simple plan: " . ($plan->simple_plan ? 'yes' : 'no') . "\n"; +print "Total parse time, $runs runs: " . ($end - $start) . "s\n"; +print "Average parse time, $runs runs: " . sprintf('%0.3f',(($end - $start) / $runs) * 1000) . "ms\n"; + diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm index 1da0115738..722c6c58f9 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm @@ -10,11 +10,55 @@ use OpenSRF::Utils::JSON; use Data::Dumper; use Digest::MD5 qw/md5_hex/; +use OpenILS::Application::Storage::QueryParser; my $log = 'OpenSRF::Utils::Logger'; $VERSION = 1; +sub _initialize_parser { + my ($parser) = @_; + + my $cstore = OpenSRF::AppSession->create( 'open-ils.cstore' ); + $parser->initialize( + config_record_attr_index_norm_map => + $cstore->request( + 'open-ils.cstore.direct.config.record_attr_index_norm_map.search.atomic', + { id => { "!=" => undef } }, + { flesh => 1, flesh_fields => { crainm => [qw/norm/] }, order_by => [{ class => "crainm", field => "pos" }] } + )->gather(1), + search_relevance_adjustment => + $cstore->request( + 'open-ils.cstore.direct.search.relevance_adjustment.search.atomic', + { id => { "!=" => undef } } + )->gather(1), + config_metabib_field => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_field.search.atomic', + { id => { "!=" => undef } } + )->gather(1), + config_metabib_search_alias => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_search_alias.search.atomic', + { alias => { "!=" => undef } } + )->gather(1), + config_metabib_field_index_norm_map => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic', + { id => { "!=" => undef } }, + { flesh => 1, flesh_fields => { cmfinm => [qw/norm/] }, order_by => [{ class => "cmfinm", field => "pos" }] } + )->gather(1), + config_record_attr_definition => + $cstore->request( + 'open-ils.cstore.direct.config.record_attr_definition.search.atomic', + { name => { "!=" => undef } } + )->gather(1), + ); + + $cstore->disconnect; + die("Cannot initialize $parser!") unless ($parser->initialization_complete); +} + sub ordered_records_from_metarecord { my $self = shift; my $client = shift; @@ -2775,6 +2819,71 @@ __PACKAGE__->register_method( api_level => 1, ); +# Takes an abstract query object and recursively turns it back into a string +# for QueryParser. +sub abstract_query2str { + my ($self, $conn, $query) = @_; + + return QueryParser::Canonicalize::abstract_query2str_impl($query, 0); +} + +__PACKAGE__->register_method( + api_name => "open-ils.storage.query_parser.abstract_query.canonicalize", + method => "abstract_query2str", + api_level => 1, + signature => { + params => [ + {desc => q/ +Abstract query parser object, with complete config data. For example input, +see the 'abstract_query' part of the output of an API call like +open-ils.search.biblio.multiclass.query, when called with the return_abstract +flag set to true./, + type => "object"} + ], + return => { type => "string", desc => "String representation of abstract query object" } + } +); + +sub str2abstract_query { + my ($self, $conn, $query, $qp_opts, $with_config) = @_; + + my %use_opts = ( # reasonable defaults? should these even be hardcoded here? + superpage => 1, + superpage_size => 1000, + core_limit => 25000, + query => $query, + (ref $opts eq 'HASH' ? %$opts : ()) + ); + + $with_config ||= 0; + + # grab the query parser and initialize it + my $parser = $OpenILS::Application::Storage::QParser; + $parser->use; + + _initialize_parser($parser) unless $parser->initialization_complete; + + my $query = $parser->new(%use_opts)->parse; + + return $query->parse_tree->to_abstract_query(with_config => $with_config); +} + +__PACKAGE__->register_method( + api_name => "open-ils.storage.query_parser.abstract_query.from_string", + method => "str2abstract_query", + api_level => 1, + signature => { + params => [ + {desc => "Query", type => "string"}, + {desc => q/Arguments for initializing QueryParser (optional)/, + type => "object"}, + {desc => q/Flag enabling inclusion of QP config in returned object (optional, default false)/, + type => "bool"} + ], + return => { type => "object", desc => "abstract representation of query parser query" } + } +); + sub query_parser_fts { my $self = shift; my $client = shift; @@ -2785,47 +2894,7 @@ sub query_parser_fts { my $parser = $OpenILS::Application::Storage::QParser; $parser->use; - if (!$parser->initialization_complete) { - my $cstore = OpenSRF::AppSession->create( 'open-ils.cstore' ); - $parser->initialize( - config_record_attr_index_norm_map => - $cstore->request( - 'open-ils.cstore.direct.config.record_attr_index_norm_map.search.atomic', - { id => { "!=" => undef } }, - { flesh => 1, flesh_fields => { crainm => [qw/norm/] }, order_by => [{ class => "crainm", field => "pos" }] } - )->gather(1), - search_relevance_adjustment => - $cstore->request( - 'open-ils.cstore.direct.search.relevance_adjustment.search.atomic', - { id => { "!=" => undef } } - )->gather(1), - config_metabib_field => - $cstore->request( - 'open-ils.cstore.direct.config.metabib_field.search.atomic', - { id => { "!=" => undef } } - )->gather(1), - config_metabib_search_alias => - $cstore->request( - 'open-ils.cstore.direct.config.metabib_search_alias.search.atomic', - { alias => { "!=" => undef } } - )->gather(1), - config_metabib_field_index_norm_map => - $cstore->request( - 'open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic', - { id => { "!=" => undef } }, - { flesh => 1, flesh_fields => { cmfinm => [qw/norm/] }, order_by => [{ class => "cmfinm", field => "pos" }] } - )->gather(1), - config_record_attr_definition => - $cstore->request( - 'open-ils.cstore.direct.config.record_attr_definition.search.atomic', - { name => { "!=" => undef } } - )->gather(1), - ); - - $cstore->disconnect; - die("Cannot initialize $parser!") unless ($parser->initialization_complete); - } - + _initialize_parser($parser) unless $parser->initialization_complete; # populate the locale/language map if (!$locale_map{COMPLETE}) { @@ -3185,29 +3254,32 @@ sub query_parser_fts_wrapper { my $base_plan = $parser->new( query => $base_query )->parse; - $query = "preferred_language($args{preferred_language}) $query" + $query = "$query preferred_language($args{preferred_language})" if ($args{preferred_language} and !$base_plan->parse_tree->find_filter('preferred_language')); - $query = "preferred_language_weight($args{preferred_language_weight}) $query" + $query = "$query preferred_language_weight($args{preferred_language_weight})" if ($args{preferred_language_weight} and !$base_plan->parse_tree->find_filter('preferred_language_weight') and !$base_plan->parse_tree->find_filter('preferred_language_multiplier')); - $query = "estimation_strategy($args{estimation_strategy}) $query" if ($args{estimation_strategy}); - $query = "site($args{org_unit}) $query" if ($args{org_unit}); - $query = "pref_ou($args{pref_ou}) $query" if ($args{pref_ou}); - $query = "depth($args{depth}) $query" if (defined($args{depth})); - $query = "sort($args{sort}) $query" if ($args{sort}); - $query = "limit($args{limit}) $query" if ($args{limit}); - $query = "core_limit($args{core_limit}) $query" if ($args{core_limit}); - $query = "skip_check($args{skip_check}) $query" if ($args{skip_check}); - $query = "superpage($args{superpage}) $query" if ($args{superpage}); - $query = "offset($args{offset}) $query" if ($args{offset}); - $query = "#metarecord $query" if ($self->api_name =~ /metabib/); - $query = "#available $query" if ($args{available}); - $query = "#descending $query" if ($args{sort_dir} && $args{sort_dir} =~ /^d/i); - $query = "#staff $query" if ($self->api_name =~ /staff/); - $query = "before($args{before}) $query" if (defined($args{before}) and $args{before} =~ /^\d+$/); - $query = "after($args{after}) $query" if (defined($args{after}) and $args{after} =~ /^\d+$/); - $query = "during($args{during}) $query" if (defined($args{during}) and $args{during} =~ /^\d+$/); - $query = "between($args{between}[0],$args{between}[1]) $query" + + # we add these to the end of the query (last-wins) because in wrapper mode we want to retain the behaviour + # of separately specified options taking precidenc -- IOW, the user should not be able to cause a change in, + # say, superpage size by adjusting the query string. + $query = "$query estimation_strategy($args{estimation_strategy})" if ($args{estimation_strategy}); + $query = "$query site($args{org_unit})" if ($args{org_unit}); + $query = "$query depth($args{depth})" if (defined($args{depth})); + $query = "$query sort($args{sort})" if ($args{sort}); + $query = "$query limit($args{limit})" if ($args{limit}); + $query = "$query core_limit($args{core_limit})" if ($args{core_limit}); + $query = "$query skip_check($args{skip_check})" if ($args{skip_check}); + $query = "$query superpage($args{superpage})" if ($args{superpage}); + $query = "$query offset($args{offset})" if ($args{offset}); + $query = "$query #metarecord" if ($self->api_name =~ /metabib/); + $query = "$query #available" if ($args{available}); + $query = "$query #descending" if ($args{sort_dir} && $args{sort_dir} =~ /^d/i); + $query = "$query #staff" if ($self->api_name =~ /staff/); + $query = "$query before($args{before})" if (defined($args{before}) and $args{before} =~ /^\d+$/); + $query = "$query after($args{after})" if (defined($args{after}) and $args{after} =~ /^\d+$/); + $query = "$query during($args{during})" if (defined($args{during}) and $args{during} =~ /^\d+$/); + $query = "$query between($args{between}[0],$args{between}[1])" if ( ref($args{between}) and @{$args{between}} == 2 and $args{between}[0] =~ /^\d+$/ and $args{between}[1] =~ /^\d+$/ ); @@ -3230,7 +3302,7 @@ sub query_parser_fts_wrapper { next if (@filter_list == 0); my $filter_string = join ',', @filter_list; - $query = "$filter($filter_string) $query"; + $query = "$query $filter($filter_string)"; } } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm index dc4cfef896..07d19c07e5 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm @@ -475,40 +475,46 @@ sub decompose { my $first_class = 1; my %seen_classes; - for my $class ( keys %{$pkg->search_fields} ) { + for my $class ( keys %{$pkg->search_field_aliases} ) { + warn " *** ... Looking for search fields in $class\n" if $self->debug; - for my $field ( @{$pkg->search_fields->{$class}} ) { + for my $field ( keys %{$pkg->search_field_aliases->{$class}} ) { + warn " *** ... Looking for aliases of $field\n" if $self->debug; for my $alias ( @{$pkg->search_field_aliases->{$class}{$field}} ) { - $alias = qr/$alias/; - s/(^|\s+)$alias[:=]/$1$class\|$field:/g; + my $aliasr = qr/$alias/; + s/(^|\s+)$aliasr\|/$1$class\|$field#$alias\|/g; + s/(^|\s+)$aliasr[:=]/$1$class\|$field#$alias:/g; + warn " *** Rewriting: $alias ($aliasr) as $class\|$field\n" if $self->debug; } } $search_class_re .= '|' unless ($first_class); $first_class = 0; - $search_class_re .= $class . '(?:\|\w+)*'; + $search_class_re .= $class . '(?:[|#][^:|]+)*'; $seen_classes{$class} = 1; } for my $class ( keys %{$pkg->search_class_aliases} ) { for my $alias ( @{$pkg->search_class_aliases->{$class}} ) { - $alias = qr/$alias/; - s/(^|[^|])\b$alias\|/$1$class\|/g; - s/(^|[^|])\b$alias[:=]/$1$class:/g; + my $aliasr = qr/$alias/; + s/(^|[^|])\b$aliasr\|/$1$class#$alias\|/g; + s/(^|[^|])\b$aliasr[:=]/$1$class#$alias:/g; + warn " *** Rewriting: $alias ($aliasr) as $class\n" if $self->debug; } if (!$seen_classes{$class}) { $search_class_re .= '|' unless ($first_class); $first_class = 0; - $search_class_re .= $class . '(?:\|\w+)*'; + $search_class_re .= $class . '(?:[|#][^:|]+)*'; $seen_classes{$class} = 1; } } $search_class_re .= '):'; + warn " ** Rewritten query: $_\n" if $self->debug; warn " ** Search class RE: $search_class_re\n" if $self->debug; my $required_re = $pkg->operator('required'); @@ -535,7 +541,7 @@ sub decompose { # Build the filter and modifier uber-regexps my $facet_re = '^\s*(-?)((?:' . join( '|', @{$pkg->facet_classes}) . ')(?:\|\w+)*)\[(.+?)\]'; - warn " Facet RE: $facet_re\n" if $self->debug; + warn " ** Facet RE: $facet_re\n" if $self->debug; my $filter_re = '^\s*(-?)(' . join( '|', @{$pkg->filters}) . ')\(([^()]+)\)'; my $filter_as_class_re = '^\s*(-?)(' . join( '|', @{$pkg->filters}) . '):\s*(\S+)'; @@ -642,8 +648,7 @@ sub decompose { warn "Encountered class change: $1\n" if $self->debug; - $current_class = $1; - $struct->classed_node( $current_class ); + $current_class = $struct->classed_node( $1 )->requested_class(); $_ = $'; $last_type = 'CLASS'; @@ -762,6 +767,156 @@ sub superpage_size { } +#------------------------------- +package QueryParser::_util; + +# At this level, joiners are always & or |. This is not +# the external, configurable representation of joiners that +# defaults to # && and ||. +sub is_joiner { + my $str = shift; + + return (not ref $str and ($str eq '&' or $str eq '|')); +} + +sub default_joiner { '&' } + +# 0 for different, 1 for the same. +sub compare_abstract_atoms { + my ($left, $right) = @_; + + foreach (qw/prefix suffix content/) { + no warnings; # undef can stand in for '' here + return 0 unless $left->{$_} eq $right->{$_}; + } + + return 1; +} + +sub fake_abstract_atom_from_phrase { + my ($phrase, $neg) = @_; + + my $prefix = '"'; + if ($neg) { + $prefix = + $QueryParser::parser_config{QueryParser}{operators}{disallowed} . + $prefix; + } + + return { + "type" => "atom", "prefix" => $prefix, "suffix" => '"', + "content" => $phrase + } +} + +sub find_arrays_in_abstract { + my ($hash) = @_; + + my @arrays; + foreach my $key (keys %$hash) { + if (ref $hash->{$key} eq "ARRAY") { + push @arrays, $hash->{$key}; + foreach (@{$hash->{$key}}) { + push @arrays, find_arrays_in_abstract($_); + } + } + } + + return @arrays; +} + +#------------------------------- +package QueryParser::Canonicalize; # not OO + +sub _abstract_query2str_filter { + my $f = shift; + my $qpconfig = $parser_config{QueryParser}; + + return sprintf( + "%s%s(%s)", + $f->{negate} ? $qpconfig->{operators}{disallowed} : "", + $f->{name}, + join(",", @{$f->{args}}) + ); +} + +sub _abstract_query2str_modifier { + my $f = shift; + my $qpconfig = $parser_config{QueryParser}; + + return $qpconfig->{operators}{modifier} . $f; +} + +# This should produce an equivalent query to the original, given an +# abstract_query. +sub abstract_query2str_impl { + my ($abstract_query, $depth) = @_; + + my $qpconfig = $parser_config{QueryParser}; + + my $gs = $qpconfig->{operators}{group_start}; + my $ge = $qpconfig->{operators}{group_end}; + my $and = $qpconfig->{operators}{and}; + my $or = $qpconfig->{operators}{or}; + + my $q = ""; + $q .= $gs if $abstract_query->{type} and $abstract_query->{type} eq "query_plan" and $depth; + + if (exists $abstract_query->{type}) { + if ($abstract_query->{type} eq 'query_plan') { + $q .= join(" ", map { _abstract_query2str_filter($_) } @{$abstract_query->{filters}}) if + exists $abstract_query->{filters}; + $q .= " "; + + $q .= join(" ", map { _abstract_query2str_modifier($_) } @{$abstract_query->{modifiers}}) if + exists $abstract_query->{modifiers}; + } elsif ($abstract_query->{type} eq 'node') { + if ($abstract_query->{alias}) { + $q .= " " . $abstract_query->{alias}; + $q .= "|$_" foreach @{$abstract_query->{alias_fields}}; + } else { + $q .= " " . $abstract_query->{class}; + $q .= "|$_" foreach @{$abstract_query->{fields}}; + } + $q .= ":"; + } elsif ($abstract_query->{type} eq 'atom') { + my $prefix = $abstract_query->{prefix} || ''; + $prefix = $qpconfig->{operators}{disallowed} if $prefix eq '!'; + $q .= $prefix . + ($abstract_query->{content} || '') . + ($abstract_query->{suffix} || ''); + } elsif ($abstract_query->{type} eq 'facet') { + # facet syntax [ # ] is hardcoded I guess? + my $prefix = $abstract_query->{negate} ? $qpconfig->{operators}{disallowed} : ''; + $q .= $prefix . $abstract_query->{name} . "[" . + join(" # ", @{$abstract_query->{values}}) . "]"; + } + } + + if (exists $abstract_query->{children}) { + my $op = (keys(%{$abstract_query->{children}}))[0]; + $q .= join( + " " . ($op eq '&' ? $and : $or) . " ", + map { + abstract_query2str_impl($_, $depth + 1) + } @{$abstract_query->{children}{$op}} + ); + } elsif ($abstract_query->{'&'} or $abstract_query->{'|'}) { + my $op = (keys(%{$abstract_query}))[0]; + $q .= join( + " " . ($op eq '&' ? $and : $or) . " ", + map { + abstract_query2str_impl($_, $depth + 1) + } @{$abstract_query->{$op}} + ); + } + $q .= " "; + + $q .= $ge if $abstract_query->{type} and $abstract_query->{type} eq "query_plan" and $depth; + + return $q; +} + #------------------------------- package QueryParser::query_plan; @@ -915,7 +1070,7 @@ sub add_modifier { my $modifier = shift; $self->{modifiers} ||= []; - return $self if (grep {$$_ eq $$modifier} @{$self->{modifiers}}); + $self->{modifiers} = [ grep {$_->name ne $modifier->name} @{$self->{modifiers}} ]; push(@{$self->{modifiers}}, $modifier); @@ -933,7 +1088,7 @@ sub add_facet { my $facet = shift; $self->{facets} ||= []; - return $self if (grep {$_->name eq $facet->name} @{$self->{facets}}); + $self->{facets} = [ grep {$_->name ne $facet->name} @{$self->{facets}} ]; push(@{$self->{facets}}, $facet); @@ -951,16 +1106,67 @@ sub add_filter { my $filter = shift; $self->{filters} ||= []; - return $self if (grep {$_->name eq $filter->name} @{$self->{filters}}); + $self->{filters} = [ grep {$_->name ne $filter->name} @{$self->{filters}} ]; push(@{$self->{filters}}, $filter); return $self; } +# %opts supports two options at this time: +# no_phrases : +# If true, do not do anything to the phrases and unphrases +# fields on any discovered nodes. +# with_config : +# If true, also return the query parser config as part of the blob. +# This will get set back to 0 before recursion to avoid repetition. +sub to_abstract_query { + my $self = shift; + my %opts = @_; + + my $pkg = ref $self->QueryParser || $self->QueryParser; + + my $abstract_query = { + type => "query_plan", + filters => [map { $_->to_abstract_query } @{$self->filters}], + modifiers => [map { $_->to_abstract_query } @{$self->modifiers}] + }; + + if ($opts{with_config}) { + $opts{with_config} = 0; + $abstract_query->{config} = $QueryParser::parser_config{$pkg}; + } + + my $kids = []; + + for my $qnode (@{$self->query_nodes}) { + # Remember: qnode can be a joiner string, a node, or another query_plan + + if (QueryParser::_util::is_joiner($qnode)) { + if ($abstract_query->{children}) { + my $open_joiner = (keys(%{$abstract_query->{children}}))[0]; + next if $open_joiner eq $qnode; + + my $oldroot = $abstract_query->{children}; + $kids = [$oldroot]; + $abstract_query->{children} = {$qnode => $kids}; + } else { + $abstract_query->{children} = {$qnode => $kids}; + } + } else { + push @$kids, $qnode->to_abstract_query(%opts); + } + } + + $abstract_query->{children} ||= { QueryParser::_util::default_joiner() => $kids }; + return $abstract_query; +} + #------------------------------- package QueryParser::query_plan::node; +use Data::Dumper; +$Data::Dumper::Indent = 0; sub new { my $pkg = shift; @@ -976,17 +1182,31 @@ sub new_atom { return do{$pkg.'::atom'}->new( @_ ); } -sub requested_class { # also split into classname and fields +sub requested_class { # also split into classname, fields and alias my $self = shift; my $class = shift; if ($class) { + my @afields; + my (undef, $alias) = split '#', $class; + if ($alias) { + $class =~ s/#[^|]+//; + ($alias, @afields) = split '\|', $alias; + } + + my @fields = @afields; my ($class_part, @field_parts) = split '\|', $class; + for my $f (@field_parts) { + push(@fields, $f) unless (grep { $f eq $_ } @fields); + } + $class_part ||= $class; $self->{requested_class} = $class; + $self->{alias} = $alias if $alias; + $self->{alias_fields} = \@afields if $alias; $self->{classname} = $class_part; - $self->{fields} = \@field_parts; + $self->{fields} = \@fields; } return $self->{requested_class}; @@ -1000,6 +1220,22 @@ sub plan { return $self->{plan}; } +sub alias { + my $self = shift; + my $alias = shift; + + $self->{alias} = $alias if ($alias); + return $self->{alias}; +} + +sub alias_fields { + my $self = shift; + my $alias = shift; + + $self->{alias_fields} = $alias if ($alias); + return $self->{alias_fields}; +} + sub classname { my $self = shift; my $class = shift; @@ -1091,6 +1327,144 @@ sub add_dummy_atom { return $self; } +# This will find up to one occurence of @$short_list within @$long_list, and +# replace it with the single atom $replacement. +sub replace_phrase_in_abstract_query { + my ($self, $short_list, $long_list, $replacement) = @_; + + my $success = 0; + my @already = (); + my $goal = scalar @$short_list; + + for (my $i = 0; $i < scalar (@$long_list); $i++) { + my $right = $long_list->[$i]; + + if (QueryParser::_util::compare_abstract_atoms( + $short_list->[scalar @already], $right + )) { + push @already, $i; + } elsif (scalar @already) { + @already = (); + next; + } + + if (scalar @already == $goal) { + splice @$long_list, $already[0], scalar(@already), $replacement; + $success = 1; + last; + } + } + + return $success; +} + +sub to_abstract_query { + my $self = shift; + my %opts = @_; + + my $pkg = ref $self->plan->QueryParser || $self->plan->QueryParser; + + my $abstract_query = { + "type" => "node", + "alias" => $self->alias, + "alias_fields" => $self->alias_fields, + "class" => $self->classname, + "fields" => $self->fields + }; + + my $kids = []; + + for my $qatom (@{$self->query_atoms}) { + if (QueryParser::_util::is_joiner($qatom)) { + if ($abstract_query->{children}) { + my $open_joiner = (keys(%{$abstract_query->{children}}))[0]; + next if $open_joiner eq $qatom; + + my $oldroot = $abstract_query->{children}; + $kids = [$oldroot]; + $abstract_query->{children} = {$qatom => $kids}; + } else { + $abstract_query->{children} = {$qatom => $kids}; + } + } else { + push @$kids, $qatom->to_abstract_query; + } + } + + if ($self->{phrases} and not $opts{no_phrases}) { + for my $phrase (@{$self->{phrases}}) { + # Phrases appear duplication in a real QP tree, and we don't want + # that duplication in our abstract query. So for all our phrases, + # break them into atoms as QP would, and remove any matching + # sequences of atoms from our abstract query. + + my $tmptree = $self->{plan}->{QueryParser}->new(query => '"'.$phrase.'"')->parse->parse_tree; + if ($tmptree) { + # For a well-behaved phrase, we should now have only one node + # in the $tmptree query plan, and that node should have an + # orderly list of atoms and joiners. + + if ($tmptree->{query} and scalar(@{$tmptree->{query}}) == 1) { + my $tmplist; + + eval { + $tmplist = $tmptree->{query}->[0]->to_abstract_query( + no_phrases => 1 + )->{children}->{'&'}->[0]->{children}->{'&'}; + }; + next if $@; + + foreach ( + QueryParser::_util::find_arrays_in_abstract($abstract_query->{children}) + ) { + last if $self->replace_phrase_in_abstract_query( + $tmplist, + $_, + QueryParser::_util::fake_abstract_atom_from_phrase($phrase) + ); + } + } + } + } + } + + # Do the same as the preceding block for unphrases (negated phrases). + if ($self->{unphrases} and not $opts{no_phrases}) { + for my $phrase (@{$self->{unphrases}}) { + my $tmptree = $self->{plan}->{QueryParser}->new( + query => $QueryParser::parser_config{$pkg}{operators}{disallowed}. + '"' . $phrase . '"' + )->parse->parse_tree; + + if ($tmptree) { + if ($tmptree->{query} and scalar(@{$tmptree->{query}}) == 1) { + my $tmplist; + + eval { + $tmplist = $tmptree->{query}->[0]->to_abstract_query( + no_phrases => 1 + )->{children}->{'&'}->[0]->{children}->{'&'}; + }; + next if $@; + + foreach ( + QueryParser::_util::find_arrays_in_abstract($abstract_query->{children}) + ) { + last if $self->replace_phrase_in_abstract_query( + $tmplist, + $_, + QueryParser::_util::fake_abstract_atom_from_phrase($phrase, 1) + ); + } + } + } + } + } + + $abstract_query->{children} ||= { QueryParser::_util::default_joiner() => $kids }; + return $abstract_query; +} + #------------------------------- package QueryParser::query_plan::node::atom; @@ -1126,6 +1500,14 @@ sub suffix { return $self->{suffix}; } +sub to_abstract_query { + my ($self) = @_; + + return { + (map { $_ => $self->$_ } qw/prefix suffix content/), + "type" => "atom" + }; +} #------------------------------- package QueryParser::query_plan::filter; @@ -1157,6 +1539,14 @@ sub args { return $self->{args}; } +sub to_abstract_query { + my ($self) = @_; + + return { + map { $_ => $self->$_ } qw/name negate args/ + }; +} + #------------------------------- package QueryParser::query_plan::facet; @@ -1188,6 +1578,15 @@ sub values { return $self->{'values'}; } +sub to_abstract_query { + my ($self) = @_; + + return { + (map { $_ => $self->$_ } qw/name negate values/), + "type" => "facet" + }; +} + #------------------------------- package QueryParser::query_plan::modifier; @@ -1195,14 +1594,25 @@ sub new { my $pkg = shift; $pkg = ref($pkg) || $pkg; my $modifier = shift; + my $negate = shift; - return bless \$modifier => $pkg; + return bless { name => $modifier, negate => $negate } => $pkg; } sub name { my $self = shift; - return $$self; + return $self->{name}; +} + +sub negate { + my $self = shift; + return $self->{negate}; } +sub to_abstract_query { + my ($self) = @_; + + return $self->name; +} 1; -- 2.43.2