1 package OpenILS::Application::Storage::Driver::Pg::QueryParser;
2 use OpenILS::Application::Storage::QueryParser;
3 use base 'QueryParser';
4 use OpenSRF::Utils::JSON;
11 sub default_preferred_language {
15 $self->custom_data->{default_preferred_language} = $lang if ($lang);
16 return $self->custom_data->{default_preferred_language};
19 sub default_preferred_language_multiplier {
23 $self->custom_data->{default_preferred_language_multiplier} = $lang if ($lang);
24 return $self->custom_data->{default_preferred_language_multiplier};
30 return 0 unless $self->parse_tree;
31 return 0 if @{$self->parse_tree->filters};
32 return 0 if @{$self->parse_tree->modifiers};
33 for my $node ( @{ $self->parse_tree->query_nodes } ) {
34 return 0 if (!ref($node) && $node eq '|');
35 next unless (ref($node));
36 return 0 if ($node->isa('QueryParser::query_plan'));
44 return $self->parse_tree->toSQL;
51 $self->custom_data->{field_id_map} ||= {};
52 $self->custom_data->{field_id_map} = $map if ($map);
53 return $self->custom_data->{field_id_map};
56 sub add_field_id_map {
63 $self->add_search_field( $class => $field );
64 $self->field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight };
65 $self->field_id_map->{by_class}{$class}{$field} = $id;
68 by_id => { $id => { classname => $class, field => $field, weight => $weight } },
69 by_class => { $class => { $field => $id } }
73 sub field_class_by_id {
77 return $self->field_id_map->{by_id}{$id};
80 sub field_ids_by_class {
85 return undef unless ($class);
88 return [$self->field_id_map->{by_class}{$class}{$field}];
91 return [values( %{ $self->field_id_map->{by_class}{$class} } )];
98 $self->custom_data->{rel_bumps} ||= {};
99 $self->custom_data->{rel_bumps} = $bumps if ($bumps);
100 return $self->custom_data->{rel_bumps};
103 sub find_relevance_bumps {
108 return $self->relevance_bumps->{$class}{$field};
111 sub add_relevance_bump {
116 my $multiplier = shift;
119 $active = 1 if (!defined($active));
121 $self->relevance_bumps->{$class}{$field}{$type} = { multiplier => $multiplier, active => $active };
123 return { $class => { $field => { $type => { multiplier => $multiplier, active => $active } } } };
127 sub initialize_field_id_map {
129 my $cmf_list = shift;
131 for my $cmf (@$cmf_list) {
132 $self->add_field_id_map( $cmf->field_class, $cmf->field, $cmf->id, $cmf->weight );
135 return $self->field_id_map;
138 sub initialize_relevance_bumps {
140 my $sra_list = shift;
142 for my $sra (@$sra_list) {
143 my $c = $self->field_class_by_id( $sra->field );
144 $self->add_relevance_bump( $c->{classname}, $c->{field}, $sra->bump_type, $sra->multiplier );
147 return $self->relevance_bumps;
150 sub initialize_normalizers {
152 my $tree = shift; # open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic { "id" : { "!=" : null } }, { "flesh" : 1, "flesh_fields" : { "cmfinm" : ["norm"] }, "order_by" : [{ "class" : "cmfinm", "field" : "pos" }] }
154 for my $cmfinm ( @$tree ) {
155 my $field_info = $self->field_class_by_id( $cmfinm->field );
156 $self->add_query_normalizer( $field_info->{classname}, $field_info->{field}, $cmfinm->norm->func, OpenSRF::Utils::JSON->JSON2perl($cmfinm->params) );
161 sub initialization_complete {
169 return $_complete if ($_complete);
171 $self->initialize_field_id_map( $args{config_metabib_field} )
172 if ($args{config_metabib_field});
174 $self->initialize_relevance_bumps( $args{search_relevance_adjustment} )
175 if ($args{search_relevance_adjustment});
177 $self->initialize_normalizers( $args{config_metabib_field_index_norm_map} )
178 if ($args{config_metabib_field_index_norm_map});
181 $args{config_metabib_field_index_norm_map} &&
182 $args{search_relevance_adjustment} &&
183 $args{config_metabib_field}
191 __PACKAGE__->add_field_id_map( series => seriestitle => 1 => 1 );
192 __PACKAGE__->add_relevance_bump( series => seriestitle => first_word => 1.5 );
193 __PACKAGE__->add_relevance_bump( series => seriestitle => full_match => 20 );
195 __PACKAGE__->add_field_id_map( title => abbreviated => 2 => 1 );
196 __PACKAGE__->add_relevance_bump( title => abbreviated => first_word => 1.5 );
197 __PACKAGE__->add_relevance_bump( title => abbreviated => full_match => 20 );
199 __PACKAGE__->add_field_id_map( title => translated => 3 => 1 );
200 __PACKAGE__->add_relevance_bump( title => translated => first_word => 1.5 );
201 __PACKAGE__->add_relevance_bump( title => translated => full_match => 20 );
203 __PACKAGE__->add_field_id_map( title => proper => 6 => 1 );
204 __PACKAGE__->add_query_normalizer( title => proper => 'naco_normalize' );
205 __PACKAGE__->add_relevance_bump( title => proper => first_word => 1.5 );
206 __PACKAGE__->add_relevance_bump( title => proper => full_match => 20 );
207 __PACKAGE__->add_relevance_bump( title => proper => word_order => 10 );
209 __PACKAGE__->add_field_id_map( author => coporate => 7 => 1 );
210 __PACKAGE__->add_relevance_bump( author => coporate => first_word => 1.5 );
211 __PACKAGE__->add_relevance_bump( author => coporate => full_match => 20 );
213 __PACKAGE__->add_field_id_map( author => personal => 8 => 1 );
214 __PACKAGE__->add_relevance_bump( author => personal => first_word => 1.5 );
215 __PACKAGE__->add_relevance_bump( author => personal => full_match => 20 );
216 __PACKAGE__->add_query_normalizer( author => personal => 'naco_normalize' );
217 __PACKAGE__->add_query_normalizer( author => personal => 'split_date_range' );
219 __PACKAGE__->add_field_id_map( subject => topic => 14 => 1 );
220 __PACKAGE__->add_relevance_bump( subject => topic => first_word => 1 );
221 __PACKAGE__->add_relevance_bump( subject => topic => full_match => 1 );
223 __PACKAGE__->add_field_id_map( subject => complete => 16 => 1 );
224 __PACKAGE__->add_relevance_bump( subject => complete => first_word => 1 );
225 __PACKAGE__->add_relevance_bump( subject => complete => full_match => 1 );
227 __PACKAGE__->add_field_id_map( keyword => keyword => 15 => 1 );
228 __PACKAGE__->add_relevance_bump( keyword => keyword => first_word => 1 );
229 __PACKAGE__->add_relevance_bump( keyword => keyword => full_match => 1 );
232 __PACKAGE__->add_search_class_alias( keyword => 'kw' );
233 __PACKAGE__->add_search_class_alias( title => 'ti' );
234 __PACKAGE__->add_search_class_alias( author => 'au' );
235 __PACKAGE__->add_search_class_alias( author => 'name' );
236 __PACKAGE__->add_search_class_alias( author => 'dc.contributor' );
237 __PACKAGE__->add_search_class_alias( subject => 'su' );
238 __PACKAGE__->add_search_class_alias( subject => 'bib.subject(?:Title|Place|Occupation)' );
239 __PACKAGE__->add_search_class_alias( series => 'se' );
240 __PACKAGE__->add_search_class_alias( keyword => 'dc.identifier' );
242 __PACKAGE__->add_query_normalizer( author => corporate => 'naco_normalize' );
243 __PACKAGE__->add_query_normalizer( keyword => keyword => 'naco_normalize' );
245 __PACKAGE__->add_search_field_alias( subject => name => 'bib.subjectName' );
249 __PACKAGE__->default_search_class( 'keyword' );
251 __PACKAGE__->add_search_filter( 'audience' );
252 __PACKAGE__->add_search_filter( 'vr_format' );
253 __PACKAGE__->add_search_filter( 'format' );
254 __PACKAGE__->add_search_filter( 'item_type' );
255 __PACKAGE__->add_search_filter( 'item_form' );
256 __PACKAGE__->add_search_filter( 'lit_form' );
257 __PACKAGE__->add_search_filter( 'location' );
258 __PACKAGE__->add_search_filter( 'site' );
259 __PACKAGE__->add_search_filter( 'lasso' );
260 __PACKAGE__->add_search_filter( 'my_lasso' );
261 __PACKAGE__->add_search_filter( 'depth' );
262 __PACKAGE__->add_search_filter( 'sort' );
263 __PACKAGE__->add_search_filter( 'language' );
264 __PACKAGE__->add_search_filter( 'preferred_language' );
265 __PACKAGE__->add_search_filter( 'preferred_language_weight' );
266 __PACKAGE__->add_search_filter( 'preferred_language_multiplier' );
267 __PACKAGE__->add_search_filter( 'statuses' );
268 __PACKAGE__->add_search_filter( 'bib_level' );
269 __PACKAGE__->add_search_filter( 'before' );
270 __PACKAGE__->add_search_filter( 'after' );
271 __PACKAGE__->add_search_filter( 'between' );
272 __PACKAGE__->add_search_filter( 'during' );
273 __PACKAGE__->add_search_filter( 'offset' );
274 __PACKAGE__->add_search_filter( 'limit' );
275 __PACKAGE__->add_search_filter( 'core_limit' );
276 __PACKAGE__->add_search_filter( 'check_limit' );
277 __PACKAGE__->add_search_filter( 'skip_check' );
278 __PACKAGE__->add_search_filter( 'superpage' );
279 __PACKAGE__->add_search_filter( 'superpage_size' );
280 __PACKAGE__->add_search_filter( 'estimation_strategy' );
282 __PACKAGE__->add_search_modifier( 'available' );
283 __PACKAGE__->add_search_modifier( 'descending' );
284 __PACKAGE__->add_search_modifier( 'ascending' );
285 __PACKAGE__->add_search_modifier( 'metarecord' );
286 __PACKAGE__->add_search_modifier( 'metabib' );
287 __PACKAGE__->add_search_modifier( 'staff' );
290 #-------------------------------
291 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan;
292 use base 'QueryParser::query_plan';
298 my ($format) = $self->find_filter('format');
300 my ($t,$f) = split('-', $format->args->[0]);
301 $self->new_filter( item_type => [ split '', $t ] ) if ($t);
302 $self->new_filter( item_form => [ split '', $f ] ) if ($f);
305 for my $f ( qw/preferred_language preferred_language_multiplier preferred_language_weight core_limit check_limit skip_check superpage superpage_size/ ) {
307 $col = 'preferred_language_multiplier' if ($f eq 'preferred_language_weight');
308 my ($filter) = $self->find_filter($f);
309 if ($filter and @{$filter->args}) {
310 $filters{$col} = $filter->args->[0];
314 $self->QueryParser->superpage($filters{superpage}) if ($filters{superpage});
315 $self->QueryParser->superpage_size($filters{superpage_size}) if ($filters{superpage_size});
316 $self->QueryParser->core_limit($filters{core_limit}) if ($filters{core_limit});
318 my $flat_plan = $self->flatten;
320 # generate the relevance ranking
321 my $rel = "AVG(\n\t\t(" . join(")+\n\t\t(", @{$$flat_plan{rank_list}}) . ")\n\t)";
323 # find any supplied sort option
324 my ($sort_filter) = $self->find_filter('sort');
326 $sort_filter = $sort_filter->args->[0];
328 $sort_filter = 'rel';
331 if (($filters{preferred_language} || $self->QueryParser->default_preferred_language) && ($filters{preferred_language_multiplier} || $self->QueryParser->default_preferred_language_multiplier)) {
332 $rel = "($rel) * CASE WHEN mrd.lang = \$_$$\$";
333 $rel .= $filters{preferred_language} ? $filters{preferred_language} : $self->QueryParser->default_preferred_language;
334 $rel .= "\$_$$\$ THEN ";
335 $rel .= $filters{preferred_language_multiplier} ? $filters{preferred_language_multiplier} : $self->QueryParser->default_preferred_language_multiplier;
336 $rel .= " ELSE 1 END";
339 for my $f ( qw/audience vr_format item_type item_form lit_form language bib_level/ ) {
341 $col = 'item_lang' if ($f eq 'language');
343 my ($filter) = $self->find_filter($f);
345 $filters{$f} = "AND mrd.$col in (\$_$$\$" . join("\$_$$\$,\$_$$\$",@{$filter->args}) . "\$_$$\$)";
349 my $audience = $filters{audience};
350 my $vr_format = $filters{vr_format};
351 my $item_type = $filters{item_type};
352 my $item_form = $filters{item_form};
353 my $lit_form = $filters{lit_form};
354 my $language = $filters{language};
355 my $bib_level = $filters{bib_level};
360 $desc = 'DESC' if ($self->find_modifier('descending'));
362 if ($sort_filter eq 'rel') { # relevance ranking flips sort dir
363 if ($desc eq 'ASC') {
369 if ($sort_filter eq 'title') {
370 my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz';
373 SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\\\d+'),'0')::INT + 1 ))
374 FROM metabib.full_rec frt
375 WHERE frt.record = m.source
377 AND frt.subfield = 'a'
381 } elsif ($sort_filter eq 'pubdate') {
382 $rank = "COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT";
383 } elsif ($sort_filter eq 'create_date') {
384 $rank = "( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )";
385 } elsif ($sort_filter eq 'edit_date') {
386 $rank = "( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )";
387 } elsif ($sort_filter eq 'author') {
388 my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz';
391 SELECT LTRIM(fra.value)
392 FROM metabib.full_rec fra
393 WHERE fra.record = m.source
394 AND fra.tag LIKE '1%'
395 AND fra.subfield = 'a'
396 ORDER BY fra.tag::text::int
401 # default to rel ranking
406 my $key = 'm.source';
407 $key = 'm.metarecord' if (grep {$_->name eq 'metarecord'} @{$self->modifiers});
409 my $sp_size = $self->QueryParser->superpage_size || 1000;
410 my $sp = $self->QueryParser->superpage || 1;
414 $offset = 'OFFSET ' . ($sp - 1) * $sp_size;
417 my ($before) = $self->find_filter('before');
418 my ($after) = $self->find_filter('after');
419 my ($during) = $self->find_filter('during');
420 my ($between) = $self->find_filter('between');
422 if ($before and @{$before->args} == 1) {
423 $before = "AND mrd.date1 <= \$_$$\$" . $before->args->[0] . "\$_$$\$";
428 if ($after and @{$after->args} == 1) {
429 $after = "AND mrd.date1 >= \$_$$\$" . $after->args->[0] . "\$_$$\$";
434 if ($during and @{$during->args} == 1) {
435 $during = "AND \$_$$\$" . $during->args->[0] . "\$_$$\$ BETWEEN mrd.date1 AND mrd.date2";
440 if ($between and @{$between->args} == 2) {
441 $between = "AND mrd.date1 BETWEEN \$_$$\$" . $between->args->[0] . "\$_$$\$ AND \$_$$\$" . $between->args->[1] . "\$_$$\$";
448 ARRAY_ACCUM(DISTINCT m.source) AS records,
451 COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT AS tie_break
452 FROM metabib.metarecord_source_map m
453 JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)
467 AND $$flat_plan{where}
469 ORDER BY 4 $desc, 5 DESC
481 my $multiplier = shift;
483 my $only_atoms = $node->only_atoms;
484 return '' if (!@$only_atoms);
486 if ($bump eq 'first_word') {
487 return "/* first_word */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
488 "LIKE naco_normalize(\$_$$\$".$only_atoms->[0]->content."\$_$$\$) \|\| '\%' ".
489 "THEN $multiplier ELSE 1 END";
490 } elsif ($bump eq 'full_match') {
491 return "/* full_match */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
492 "LIKE". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) .
493 "THEN $multiplier ELSE 1 END";
494 } elsif ($bump eq 'word_order') {
495 return "/* word_order */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
496 "LIKE '\%'||". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) . '||\'%\' '.
497 "THEN $multiplier ELSE 1 END";
506 my $from = shift || '';
507 my $where = shift || '';
510 for my $node ( @{$self->query_nodes} ) {
512 if ($node->isa( 'QueryParser::query_plan::node' )) {
514 my $table = $node->table;
515 my $talias = $node->table_alias;
517 my $node_rank = $node->rank . " * ${talias}_weight.weight";
519 $from .= "\n\tLEFT JOIN (\n\t\tSELECT *\n\t\t FROM $table\n\t\t WHERE index_vector @@ (" .$node->tsquery . ')';
522 if (@{$node->fields} > 0) {
523 @bump_fields = @{$node->fields};
524 $from .= "\n\t\t\tAND field IN (SELECT id FROM config.metabib_field WHERE field_class = \$_$$\$". $node->classname ."\$_$$\$ AND name IN (";
525 $from .= "\$_$$\$" . join("\$_$$\$,\$_$$\$", @{$node->fields}) . "\$_$$\$))";
528 @bump_fields = @{$self->QueryParser->search_fields->{$node->classname}};
532 for my $field ( @bump_fields ) {
533 my $bumps = $self->QueryParser->find_relevance_bumps( $node->classname => $field );
534 for my $b (keys %$bumps) {
535 next if (!$$bumps{$b}{active});
536 next if ($used_bumps{$b});
539 my $bump_case = $self->rel_bump( $node, $b, $$bumps{$b}{multiplier} );
540 $node_rank .= "\n\t\t\t\t * " . $bump_case if ($bump_case);
544 my $core_limit = $self->QueryParser->core_limit || 25000;
545 $from .= "\n\t\tLIMIT " . $self->QueryParser->core_limit . "\n\t) AS " . $node->table_alias . ' ON (m.source = ' . $node->table_alias . ".source)";
546 $from .= "\n\tJOIN config.metabib_field AS ${talias}_weight ON (${talias}_weight.id = $talias.field)\n";
548 $where .= $node->table_alias . ".id IS NOT NULL ";
550 push @rank_list, $node_rank;
553 my $subnode = $node->flatten;
555 push(@rank_list, @{$$subnode{rank_list}});
556 $from .= $$subnode{from};
557 $where .= "($$subnode{where})";
560 $where .= ' AND ' if ($node eq '&');
561 $where .= ' OR ' if ($node eq '|');
562 # ... stitching the WHERE together ...
566 return { rank_list => \@rank_list, from => $from, where => $where };
571 #-------------------------------
572 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::filter;
573 use base 'QueryParser::query_plan::filter';
575 #-------------------------------
576 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::modifier;
577 use base 'QueryParser::query_plan::modifier';
579 #-------------------------------
580 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node::atom;
581 use base 'QueryParser::query_plan::node::atom';
587 $self->{sql} = $sql if ($sql);
589 return $self->{sql} if ($self->{sql});
590 return $self->buildSQL;
596 my $classname = $self->node->classname;
598 my $normalizers = $self->node->plan->QueryParser->query_normalizers( $classname );
599 my $fields = $self->node->fields;
601 $fields = $self->node->plan->QueryParser->search_fields->{$classname} if (!@$fields);
604 for my $field (@$fields) {
605 for my $nfield (keys %$normalizers) {
606 for my $nizer ( @{$$normalizers{$nfield}} ) {
607 push(@norm_list, $nizer) if ($field eq $nfield && !(grep {$_ eq $nizer} @norm_list));
612 my $sql = "\$_$$\$" . $self->content . "\$_$$\$";;
614 for my $n ( @norm_list ) {
615 $sql = join(', ', $sql, map { "\$_$$\$" . $_ . "\$_$$\$" } @{ $n->{params} });
616 $sql = $n->{function}."($sql)";
619 my $prefix = $self->prefix || '';
620 my $suffix = $self->suffix || '';
622 $prefix = "'$prefix' ||" if $prefix;
623 my $suffix_op = ":$suffix" if $suffix;
624 my $suffix_after = "|| '$suffix_op'" if $suffix;
626 $sql = "to_tsquery('$classname', $prefix '(' || regexp_replace($sql,E'(?:\\\\s+|:)','$suffix_op&','g') $suffix_after || ')')";
628 return $self->sql($sql);
631 #-------------------------------
632 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node;
633 use base 'QueryParser::query_plan::node';
638 my $atoms = $self->query_atoms;
640 for my $a (@$atoms) {
641 push(@only_atoms, $a) if (ref($a) && $a->isa('QueryParser::query_plan::node::atom'));
650 $self->{table} = $table if ($table);
651 return $self->{table} if $self->{table};
652 return $self->table( 'metabib.' . $self->classname . '_field_entry' );
657 my $table_alias = shift;
658 $self->{table_alias} = $table_alias if ($table_alias);
659 return $self->{table_alias} if ($self->{table_alias});
661 $table_alias = "$self";
662 $table_alias =~ s/^.*\(0(x[0-9a-fA-F]+)\)$/$1/go;
663 $table_alias .= '_' . $self->requested_class;
664 $table_alias =~ s/\|/_/go;
666 return $self->table_alias( $table_alias );
671 return $self->{tsquery} if ($self->{tsquery});
673 for my $atom (@{$self->query_atoms}) {
675 $self->{tsquery} .= "\n\t\t\t" .$atom->sql;
677 $self->{tsquery} .= $atom x 2;
681 return $self->{tsquery};
686 return $self->{rank} if ($self->{rank});
687 return $self->{rank} = 'rank(' . $self->table_alias . '.index_vector, ' . $self->tsquery . ')';