1 package OpenILS::Application::Storage::Driver::Pg::QueryParser;
2 use OpenILS::Application::Storage::QueryParser;
3 use base 'QueryParser';
4 use OpenSRF::Utils::JSON;
11 sub default_preferred_language {
15 $self->custom_data->{default_preferred_language} = $lang if ($lang);
16 return $self->custom_data->{default_preferred_language};
19 sub default_preferred_language_multiplier {
23 $self->custom_data->{default_preferred_language_multiplier} = $lang if ($lang);
24 return $self->custom_data->{default_preferred_language_multiplier};
30 return 0 unless $self->parse_tree;
31 return 0 if @{$self->parse_tree->filters};
32 return 0 if @{$self->parse_tree->modifiers};
33 for my $node ( @{ $self->parse_tree->query_nodes } ) {
34 return 0 if (!ref($node) && $node eq '|');
35 next unless (ref($node));
36 return 0 if ($node->isa('QueryParser::query_plan'));
44 return $self->parse_tree->toSQL;
51 $self->custom_data->{field_id_map} ||= {};
52 $self->custom_data->{field_id_map} = $map if ($map);
53 return $self->custom_data->{field_id_map};
56 sub add_field_id_map {
63 $self->add_search_field( $class => $field );
64 $self->field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight };
65 $self->field_id_map->{by_class}{$class}{$field} = $id;
68 by_id => { $id => { classname => $class, field => $field, weight => $weight } },
69 by_class => { $class => { $field => $id } }
73 sub field_class_by_id {
77 return $self->field_id_map->{by_id}{$id};
80 sub field_ids_by_class {
85 return undef unless ($class);
88 return [$self->field_id_map->{by_class}{$class}{$field}];
91 return [values( %{ $self->field_id_map->{by_class}{$class} } )];
98 $self->custom_data->{rel_bumps} ||= {};
99 $self->custom_data->{rel_bumps} = $bumps if ($bumps);
100 return $self->custom_data->{rel_bumps};
103 sub find_relevance_bumps {
108 return $self->relevance_bumps->{$class}{$field};
111 sub add_relevance_bump {
116 my $multiplier = shift;
119 $active = 1 if (!defined($active));
121 $self->relevance_bumps->{$class}{$field}{$type} = { multiplier => $multiplier, active => $active };
123 return { $class => { $field => { $type => { multiplier => $multiplier, active => $active } } } };
127 sub initialize_field_id_map {
129 my $cmf_list = shift;
131 for my $cmf (@$cmf_list) {
132 __PACKAGE__->add_field_id_map( $cmf->field_class, $cmf->name, $cmf->id, $cmf->weight );
135 return $self->field_id_map;
138 sub initialize_relevance_bumps {
140 my $sra_list = shift;
142 for my $sra (@$sra_list) {
143 my $c = $self->field_class_by_id( $sra->field );
144 __PACKAGE__->add_relevance_bump( $c->{classname}, $c->{field}, $sra->bump_type, $sra->multiplier );
147 return $self->relevance_bumps;
150 sub initialize_normalizers {
152 my $tree = shift; # open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic { "id" : { "!=" : null } }, { "flesh" : 1, "flesh_fields" : { "cmfinm" : ["norm"] }, "order_by" : [{ "class" : "cmfinm", "field" : "pos" }] }
154 for my $cmfinm ( @$tree ) {
155 my $field_info = $self->field_class_by_id( $cmfinm->field );
156 __PACKAGE__->add_query_normalizer( $field_info->{classname}, $field_info->{field}, $cmfinm->norm->func, OpenSRF::Utils::JSON->JSON2perl($cmfinm->params) );
161 sub initialization_complete {
169 return $_complete if ($_complete);
171 $self->initialize_field_id_map( $args{config_metabib_field} )
172 if ($args{config_metabib_field});
174 $self->initialize_relevance_bumps( $args{search_relevance_adjustment} )
175 if ($args{search_relevance_adjustment});
177 $self->initialize_normalizers( $args{config_metabib_field_index_norm_map} )
178 if ($args{config_metabib_field_index_norm_map});
181 $args{config_metabib_field_index_norm_map} &&
182 $args{search_relevance_adjustment} &&
183 $args{config_metabib_field}
191 __PACKAGE__->add_field_id_map( series => seriestitle => 1 => 1 );
192 __PACKAGE__->add_relevance_bump( series => seriestitle => first_word => 1.5 );
193 __PACKAGE__->add_relevance_bump( series => seriestitle => full_match => 20 );
195 __PACKAGE__->add_field_id_map( title => abbreviated => 2 => 1 );
196 __PACKAGE__->add_relevance_bump( title => abbreviated => first_word => 1.5 );
197 __PACKAGE__->add_relevance_bump( title => abbreviated => full_match => 20 );
199 __PACKAGE__->add_field_id_map( title => translated => 3 => 1 );
200 __PACKAGE__->add_relevance_bump( title => translated => first_word => 1.5 );
201 __PACKAGE__->add_relevance_bump( title => translated => full_match => 20 );
203 __PACKAGE__->add_field_id_map( title => proper => 6 => 1 );
204 __PACKAGE__->add_query_normalizer( title => proper => 'naco_normalize' );
205 __PACKAGE__->add_relevance_bump( title => proper => first_word => 1.5 );
206 __PACKAGE__->add_relevance_bump( title => proper => full_match => 20 );
207 __PACKAGE__->add_relevance_bump( title => proper => word_order => 10 );
209 __PACKAGE__->add_field_id_map( author => coporate => 7 => 1 );
210 __PACKAGE__->add_relevance_bump( author => coporate => first_word => 1.5 );
211 __PACKAGE__->add_relevance_bump( author => coporate => full_match => 20 );
213 __PACKAGE__->add_field_id_map( author => personal => 8 => 1 );
214 __PACKAGE__->add_relevance_bump( author => personal => first_word => 1.5 );
215 __PACKAGE__->add_relevance_bump( author => personal => full_match => 20 );
216 __PACKAGE__->add_query_normalizer( author => personal => 'naco_normalize' );
217 __PACKAGE__->add_query_normalizer( author => personal => 'split_date_range' );
219 __PACKAGE__->add_field_id_map( subject => topic => 14 => 1 );
220 __PACKAGE__->add_relevance_bump( subject => topic => first_word => 1 );
221 __PACKAGE__->add_relevance_bump( subject => topic => full_match => 1 );
223 __PACKAGE__->add_field_id_map( subject => complete => 16 => 1 );
224 __PACKAGE__->add_relevance_bump( subject => complete => first_word => 1 );
225 __PACKAGE__->add_relevance_bump( subject => complete => full_match => 1 );
227 __PACKAGE__->add_field_id_map( keyword => keyword => 15 => 1 );
228 __PACKAGE__->add_relevance_bump( keyword => keyword => first_word => 1 );
229 __PACKAGE__->add_relevance_bump( keyword => keyword => full_match => 1 );
232 __PACKAGE__->add_search_class_alias( keyword => 'kw' );
233 __PACKAGE__->add_search_class_alias( title => 'ti' );
234 __PACKAGE__->add_search_class_alias( author => 'au' );
235 __PACKAGE__->add_search_class_alias( author => 'name' );
236 __PACKAGE__->add_search_class_alias( author => 'dc.contributor' );
237 __PACKAGE__->add_search_class_alias( subject => 'su' );
238 __PACKAGE__->add_search_class_alias( subject => 'bib.subject(?:Title|Place|Occupation)' );
239 __PACKAGE__->add_search_class_alias( series => 'se' );
240 __PACKAGE__->add_search_class_alias( keyword => 'dc.identifier' );
242 __PACKAGE__->add_query_normalizer( author => corporate => 'naco_normalize' );
243 __PACKAGE__->add_query_normalizer( keyword => keyword => 'naco_normalize' );
245 __PACKAGE__->add_search_field_alias( subject => name => 'bib.subjectName' );
249 __PACKAGE__->default_search_class( 'keyword' );
251 __PACKAGE__->add_search_filter( 'audience' );
252 __PACKAGE__->add_search_filter( 'vr_format' );
253 __PACKAGE__->add_search_filter( 'format' );
254 __PACKAGE__->add_search_filter( 'item_type' );
255 __PACKAGE__->add_search_filter( 'item_form' );
256 __PACKAGE__->add_search_filter( 'lit_form' );
257 __PACKAGE__->add_search_filter( 'locations' );
258 __PACKAGE__->add_search_filter( 'site' );
259 __PACKAGE__->add_search_filter( 'lasso' );
260 __PACKAGE__->add_search_filter( 'my_lasso' );
261 __PACKAGE__->add_search_filter( 'depth' );
262 __PACKAGE__->add_search_filter( 'sort' );
263 __PACKAGE__->add_search_filter( 'language' );
264 __PACKAGE__->add_search_filter( 'preferred_language' );
265 __PACKAGE__->add_search_filter( 'preferred_language_weight' );
266 __PACKAGE__->add_search_filter( 'preferred_language_multiplier' );
267 __PACKAGE__->add_search_filter( 'statuses' );
268 __PACKAGE__->add_search_filter( 'bib_level' );
269 __PACKAGE__->add_search_filter( 'before' );
270 __PACKAGE__->add_search_filter( 'after' );
271 __PACKAGE__->add_search_filter( 'between' );
272 __PACKAGE__->add_search_filter( 'during' );
273 __PACKAGE__->add_search_filter( 'offset' );
274 __PACKAGE__->add_search_filter( 'limit' );
275 __PACKAGE__->add_search_filter( 'core_limit' );
276 __PACKAGE__->add_search_filter( 'check_limit' );
277 __PACKAGE__->add_search_filter( 'skip_check' );
278 __PACKAGE__->add_search_filter( 'superpage' );
279 __PACKAGE__->add_search_filter( 'superpage_size' );
280 __PACKAGE__->add_search_filter( 'estimation_strategy' );
282 __PACKAGE__->add_search_modifier( 'available' );
283 __PACKAGE__->add_search_modifier( 'descending' );
284 __PACKAGE__->add_search_modifier( 'ascending' );
285 __PACKAGE__->add_search_modifier( 'metarecord' );
286 __PACKAGE__->add_search_modifier( 'metabib' );
287 __PACKAGE__->add_search_modifier( 'staff' );
290 #-------------------------------
291 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan;
292 use base 'QueryParser::query_plan';
298 my ($format) = $self->find_filter('format');
300 my ($t,$f) = split('-', $format->args->[0]);
301 $self->new_filter( item_type => [ split '', $t ] ) if ($t);
302 $self->new_filter( item_form => [ split '', $f ] ) if ($f);
305 for my $f ( qw/preferred_language preferred_language_multiplier preferred_language_weight core_limit check_limit skip_check superpage superpage_size/ ) {
307 $col = 'preferred_language_multiplier' if ($f eq 'preferred_language_weight');
308 my ($filter) = $self->find_filter($f);
309 if ($filter and @{$filter->args}) {
310 $filters{$col} = $filter->args->[0];
314 $self->QueryParser->superpage($filters{superpage}) if ($filters{superpage});
315 $self->QueryParser->superpage_size($filters{superpage_size}) if ($filters{superpage_size});
316 $self->QueryParser->core_limit($filters{core_limit}) if ($filters{core_limit});
318 my $flat_plan = $self->flatten;
320 # generate the relevance ranking
321 my $rel = "AVG(\n\t\t(" . join(")+\n\t\t(", @{$$flat_plan{rank_list}}) . ")\n\t)";
323 # find any supplied sort option
324 my ($sort_filter) = $self->find_filter('sort');
326 $sort_filter = $sort_filter->args->[0];
328 $sort_filter = 'rel';
331 if (($filters{preferred_language} || $self->QueryParser->default_preferred_language) && ($filters{preferred_language_multiplier} || $self->QueryParser->default_preferred_language_multiplier)) {
332 $rel = "($rel) * CASE WHEN mrd.lang = \$_$$\$";
333 $rel .= $filters{preferred_language} ? $filters{preferred_language} : $self->QueryParser->default_preferred_language;
334 $rel .= "\$_$$\$ THEN ";
335 $rel .= $filters{preferred_language_multiplier} ? $filters{preferred_language_multiplier} : $self->QueryParser->default_preferred_language_multiplier;
336 $rel .= " ELSE 1 END";
339 for my $f ( qw/audience vr_format item_type item_form lit_form language bib_level/ ) {
341 $col = 'item_lang' if ($f eq 'language');
343 my ($filter) = $self->find_filter($f);
345 $filters{$f} = "AND mrd.$col in (\$_$$\$" . join("\$_$$\$,\$_$$\$",@{$filter->args}) . "\$_$$\$)";
349 my $audience = $filters{audience};
350 my $vr_format = $filters{vr_format};
351 my $item_type = $filters{item_type};
352 my $item_form = $filters{item_form};
353 my $lit_form = $filters{lit_form};
354 my $language = $filters{language};
355 my $bib_level = $filters{bib_level};
360 $desc = 'DESC' if ($self->find_modifier('descending'));
362 if ($sort_filter eq 'rel') { # relevance ranking flips sort dir
363 if ($desc eq 'ASC') {
369 if ($sort_filter eq 'title') {
370 my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz';
373 SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\\\d+'),'0')::INT + 1 ))
374 FROM metabib.full_rec frt
375 WHERE frt.record = m.source
377 AND frt.subfield = 'a'
381 } elsif ($sort_filter eq 'pubdate') {
382 $rank = "COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT";
383 } elsif ($sort_filter eq 'create_date') {
384 $rank = "( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )";
385 } elsif ($sort_filter eq 'edit_date') {
386 $rank = "( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )";
387 } elsif ($sort_filter eq 'author') {
388 my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz';
391 SELECT LTRIM(fra.value)
392 FROM metabib.full_rec fra
393 WHERE fra.record = m.source
394 AND fra.tag LIKE '1%'
395 AND fra.subfield = 'a'
396 ORDER BY fra.tag::text::int
401 # default to rel ranking
406 my $key = 'm.source';
407 $key = 'm.metarecord' if (grep {$_->name eq 'metarecord' or $_->name eq 'metabib'} @{$self->modifiers});
409 my ($before) = $self->find_filter('before');
410 my ($after) = $self->find_filter('after');
411 my ($during) = $self->find_filter('during');
412 my ($between) = $self->find_filter('between');
414 if ($before and @{$before->args} == 1) {
415 $before = "AND mrd.date1 <= \$_$$\$" . $before->args->[0] . "\$_$$\$";
420 if ($after and @{$after->args} == 1) {
421 $after = "AND mrd.date1 >= \$_$$\$" . $after->args->[0] . "\$_$$\$";
426 if ($during and @{$during->args} == 1) {
427 $during = "AND \$_$$\$" . $during->args->[0] . "\$_$$\$ BETWEEN mrd.date1 AND mrd.date2";
432 if ($between and @{$between->args} == 2) {
433 $between = "AND mrd.date1 BETWEEN \$_$$\$" . $between->args->[0] . "\$_$$\$ AND \$_$$\$" . $between->args->[1] . "\$_$$\$";
440 ARRAY_ACCUM(DISTINCT m.source) AS records,
443 COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT AS tie_break
444 FROM metabib.metarecord_source_map m
445 JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)
459 AND $$flat_plan{where}
461 ORDER BY 4 $desc, 5 DESC, 3 DESC
471 my $multiplier = shift;
473 my $only_atoms = $node->only_atoms;
474 return '' if (!@$only_atoms);
476 if ($bump eq 'first_word') {
477 return "/* first_word */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
478 "LIKE naco_normalize(\$_$$\$".$only_atoms->[0]->content."\$_$$\$) \|\| '\%' ".
479 "THEN $multiplier ELSE 1 END";
480 } elsif ($bump eq 'full_match') {
481 return "/* full_match */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
482 "LIKE". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) .
483 "THEN $multiplier ELSE 1 END";
484 } elsif ($bump eq 'word_order') {
485 return "/* word_order */ CASE WHEN naco_normalize(".$node->table_alias.".value) ".
486 "LIKE '\%'||". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) . '||\'%\' '.
487 "THEN $multiplier ELSE 1 END";
496 my $from = shift || '';
497 my $where = shift || '';
500 for my $node ( @{$self->query_nodes} ) {
502 if ($node->isa( 'QueryParser::query_plan::node' )) {
504 my $table = $node->table;
505 my $talias = $node->table_alias;
507 my $node_rank = $node->rank . " * ${talias}_weight.weight";
509 $from .= "\n\tLEFT JOIN (\n\t\tSELECT *\n\t\t FROM $table\n\t\t WHERE index_vector @@ (" .$node->tsquery . ')';
512 if (@{$node->fields} > 0) {
513 @bump_fields = @{$node->fields};
514 $from .= "\n\t\t\tAND field IN (SELECT id FROM config.metabib_field WHERE field_class = \$_$$\$". $node->classname ."\$_$$\$ AND name IN (";
515 $from .= "\$_$$\$" . join("\$_$$\$,\$_$$\$", @{$node->fields}) . "\$_$$\$))";
518 @bump_fields = @{$self->QueryParser->search_fields->{$node->classname}};
522 for my $field ( @bump_fields ) {
523 my $bumps = $self->QueryParser->find_relevance_bumps( $node->classname => $field );
524 for my $b (keys %$bumps) {
525 next if (!$$bumps{$b}{active});
526 next if ($used_bumps{$b});
529 my $bump_case = $self->rel_bump( $node, $b, $$bumps{$b}{multiplier} );
530 $node_rank .= "\n\t\t\t\t * " . $bump_case if ($bump_case);
534 my $core_limit = $self->QueryParser->core_limit || 25000;
535 $from .= "\n\t\tLIMIT $core_limit\n\t) AS $talias ON (m.source = $talias.source)";
536 $from .= "\n\tJOIN config.metabib_field AS ${talias}_weight ON (${talias}_weight.id = $talias.field)\n";
538 $where .= '(' . $talias . ".id IS NOT NULL";
539 $where .= ' AND ' . join(' AND ', map {"$talias.value ~* \$_$$\$$_\$_$$\$"} @{$node->phrases}) if (@{$node->phrases});
542 push @rank_list, $node_rank;
545 my $subnode = $node->flatten;
547 push(@rank_list, @{$$subnode{rank_list}});
548 $from .= $$subnode{from};
549 $where .= "($$subnode{where})";
552 $where .= ' AND ' if ($node eq '&');
553 $where .= ' OR ' if ($node eq '|');
554 # ... stitching the WHERE together ...
558 return { rank_list => \@rank_list, from => $from, where => $where };
563 #-------------------------------
564 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::filter;
565 use base 'QueryParser::query_plan::filter';
567 #-------------------------------
568 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::modifier;
569 use base 'QueryParser::query_plan::modifier';
571 #-------------------------------
572 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node::atom;
573 use base 'QueryParser::query_plan::node::atom';
579 $self->{sql} = $sql if ($sql);
581 return $self->{sql} if ($self->{sql});
582 return $self->buildSQL;
588 my $classname = $self->node->classname;
590 my $normalizers = $self->node->plan->QueryParser->query_normalizers( $classname );
591 my $fields = $self->node->fields;
593 $fields = $self->node->plan->QueryParser->search_fields->{$classname} if (!@$fields);
596 for my $field (@$fields) {
597 for my $nfield (keys %$normalizers) {
598 for my $nizer ( @{$$normalizers{$nfield}} ) {
599 push(@norm_list, $nizer) if ($field eq $nfield && !(grep {$_ eq $nizer} @norm_list));
604 my $sql = "\$_$$\$" . $self->content . "\$_$$\$";;
606 for my $n ( @norm_list ) {
607 $sql = join(', ', $sql, map { "\$_$$\$" . $_ . "\$_$$\$" } @{ $n->{params} });
608 $sql = $n->{function}."($sql)";
611 my $prefix = $self->prefix || '';
612 my $suffix = $self->suffix || '';
614 $prefix = "'$prefix' ||" if $prefix;
615 my $suffix_op = ":$suffix" if $suffix;
616 my $suffix_after = "|| '$suffix_op'" if $suffix;
618 $sql = "to_tsquery('$classname', $prefix '(' || regexp_replace($sql,E'(?:\\\\s+|:)','$suffix_op&','g') $suffix_after || ')')";
620 return $self->sql($sql);
623 #-------------------------------
624 package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node;
625 use base 'QueryParser::query_plan::node';
630 my $atoms = $self->query_atoms;
632 for my $a (@$atoms) {
633 push(@only_atoms, $a) if (ref($a) && $a->isa('QueryParser::query_plan::node::atom'));
642 $self->{table} = $table if ($table);
643 return $self->{table} if $self->{table};
644 return $self->table( 'metabib.' . $self->classname . '_field_entry' );
649 my $table_alias = shift;
650 $self->{table_alias} = $table_alias if ($table_alias);
651 return $self->{table_alias} if ($self->{table_alias});
653 $table_alias = "$self";
654 $table_alias =~ s/^.*\(0(x[0-9a-fA-F]+)\)$/$1/go;
655 $table_alias .= '_' . $self->requested_class;
656 $table_alias =~ s/\|/_/go;
658 return $self->table_alias( $table_alias );
663 return $self->{tsquery} if ($self->{tsquery});
665 for my $atom (@{$self->query_atoms}) {
667 $self->{tsquery} .= "\n\t\t\t" .$atom->sql;
669 $self->{tsquery} .= $atom x 2;
673 return $self->{tsquery};
678 return $self->{rank} if ($self->{rank});
679 return $self->{rank} = 'rank(' . $self->table_alias . '.index_vector, ' . $self->tsquery . ')';