1 use OpenSRF::Utils::Logger qw/:level/;
2 my $log = 'OpenSRF::Utils::Logger';
4 #-------------------------------------------------------------------------------
5 package OpenILS::Application::Storage::FTS;
6 use OpenSRF::Utils::Logger qw/:level/;
8 use Unicode::Normalize;
10 my $_default_grammar_parser = new Parse::RecDescent ( <<'GRAMMAR' );
14 search_expression: or_expr(s) | and_expr(s) | expr(s)
15 or_expr: lexpr '||' rexpr
16 and_expr: lexpr '&&' rexpr
19 expr: phrase(s) | group(s) | word(s)
21 phrase: '"' token(s) '"'
22 group : '(' search_expression ')'
23 word: numeric_range | negative_token | token
24 negative_token: '-' .../\D+/ token
26 numeric_range: /\d+-\d*/
36 $txt =~ s/\pM+//go; # Remove diacritics
38 $txt =~ s/\xE6/AE/go; # Convert ae digraph
39 $txt =~ s/\x{153}/OE/go;# Convert oe digraph
40 $txt =~ s/\xFE/TH/go; # Convert Icelandic thorn
42 $txt =~ tr/\x{2070}\x{2071}\x{2072}\x{2073}\x{2074}\x{2075}\x{2076}\x{2077}\x{2078}\x{2079}\x{207A}\x{207B}/0123456789+-/;# Convert superscript numbers
43 $txt =~ tr/\x{2080}\x{2081}\x{2082}\x{2083}\x{2084}\x{2085}\x{2086}\x{2087}\x{2088}\x{2089}\x{208A}\x{208B}/0123456889+-/;# Convert subscript numbers
45 $txt =~ tr/\x{0251}\x{03B1}\x{03B2}\x{0262}\x{03B3}/AABGG/; # Convert Latin and Greek
46 $txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /; # Convert Misc
47 $txt =~ tr/\'\[\]\|//d; # Remove Misc
49 if ($sf && $sf =~ /^a/o) {
50 my $commapos = index($txt,',');
52 if ($commapos != length($txt) - 1) {
53 my @list = split /,/, $txt;
54 my $first = shift @list;
55 $txt = $first . ',' . join(' ', @list);
64 $txt =~ s/\s+/ /go; # Compress multiple spaces
65 $txt =~ s/^\s+//o; # Remove leading space
66 $txt =~ s/\s+$//o; # Remove trailing space
71 #' stupid vim syntax highlighting ...
75 $log->debug("You must override me somewhere, or I will make searching really slow!!!!",ERROR);;
81 $self = ref($self) || $self;
82 $self = bless {} => $self;
84 $self->decompose($term);
86 for my $part ( $self->words, $self->phrases ) {
87 $part = OpenILS::Application::Storage::CDBI->quote($part);
88 push @{ $self->{ fts_query } }, "'\%$part\%'";
91 for my $part ( $self->nots ) {
92 $part = OpenILS::Application::Storage::CDBI->quote($part);
93 push @{ $self->{ fts_query_not } }, "'\%$part\%'";
100 my $parser = shift || $_default_grammar_parser;
103 $term =~ s/\s+--\s+/ /go;
104 $term =~ s/(?:&[^;]+;)//go;
106 $term =~ s/(^|\s+)-(\w+)/$1!$2/go;
107 $term =~ s/\b(\+)(\w+)/$2/go;
108 $term =~ s/^\s*\b(.+)\b\s*$/$1/o;
109 $term =~ s/(\d{4})-(\d{4})/$1 $2/go;
110 #$term =~ s/^(?:an?|the)\b(.*)/$1/o;
112 $log->debug("Stripped search term string is [$term]",DEBUG);
114 my $parsetree = $parser->search_expression( $term );
115 my @words = $term =~ /\b((?<!!)\w+)\b/go;
116 my @nots = $term =~ /\b(?<=!)(\w+)\b/go;
118 $log->debug("Stripped words are[".join(', ',@words)."]",DEBUG);
119 $log->debug("Stripped nots are[".join(', ',@nots)."]",DEBUG);
122 while ($term =~ s/ ((?<!\\)"{1}) (.*?) ((?<!\\)"){1} //x) {
127 push @parts, lc($part);
130 $self->{ fts_op } = 'ILIKE';
131 $self->{ fts_col } = $self->{ text_col } = 'value';
132 $self->{ raw } = $term;
133 $self->{ parsetree } = $parsetree;
134 $self->{ words } = \@words;
135 $self->{ nots } = \@nots;
136 $self->{ phrases } = \@parts;
143 return wantarray ? @{ $self->{fts_query_not} } : $self->{fts_query_not};
148 return wantarray ? @{ $self->{fts_rank} } : $self->{fts_rank};
153 return wantarray ? @{ $self->{fts_query} } : $self->{fts_query};
163 return $self->{parsetree};
168 return $self->{fts_col};
173 return $self->{text_col};
178 return wantarray ? @{ $self->{phrases} } : $self->{phrases};
183 return wantarray ? @{ $self->{words} } : $self->{words};
188 return wantarray ? @{ $self->{nots} } : $self->{nots};
191 sub sql_exact_phrase_match {
193 my $column = $self->text_col;
195 for my $phrase ( $self->phrases ) {
196 $phrase =~ s/%/\\%/go;
197 $phrase =~ s/_/\\_/go;
198 $phrase =~ s/'/\\'/go;
199 $log->debug("Adding phrase [$phrase] to the match list", DEBUG);
200 $output .= " AND $column ILIKE '\%$phrase\%'";
202 $log->debug("Phrase list is [$output]", DEBUG);
206 sub sql_exact_word_bump {
208 my $bump = shift || '0.1';
210 my $column = $self->text_col;
212 for my $word ( $self->words ) {
216 $log->debug("Adding word [$word] to the relevancy bump list", DEBUG);
217 $output .= " + CASE WHEN $column ILIKE '\%$word\%' THEN $bump ELSE 0 END";
219 $log->debug("Word bump list is [$output]", DEBUG);
223 sub sql_where_clause {
227 for my $fts ( $self->fts_query ) {
228 push @output, join(' ', $self->fts_col, $self->{fts_op}, $fts);
231 for my $fts ( $self->fts_query_not ) {
232 push @output, 'NOT (' . join(' ', $self->fts_col, $self->{fts_op}, $fts) . ')';
235 my $phrase_match = $self->sql_exact_phrase_match();
236 return join(' AND ', @output);
239 #-------------------------------------------------------------------------------
248 my ($proto, $search_type, @args) = @_;
249 my $class = ref $proto || $proto;
252 my $search_opts = (@args > 1 and ref($args[-1]) eq 'HASH') ? pop @args : {};
254 @args = %{ $args[0] } if ref $args[0] eq "HASH";
256 $search_opts->{offset} = int($search_opts->{page} - 1) * int($search_opts->{page_size}) if ($search_opts->{page_size});
257 $search_opts->{_placeholder} ||= '?';
260 while (my ($col, $val) = splice @args, 0, 2) {
261 my $column = $class->find_column($col)
262 || (List::Util::first { $_->accessor eq $col } $class->columns)
263 || $class->_croak("$col is not a column of $class");
265 if (!defined($val)) {
266 push @frags, "$col IS NULL";
267 } elsif (ref($val) and ref($val) eq 'ARRAY') {
268 push @frags, "$col IN (".join(',',map{'?'}@$val).")";
270 push @vals, ''.$class->_deflated_column($column, $v);
273 push @frags, "$col $search_type $$search_opts{_placeholder}";
274 push @vals, $class->_deflated_column($column, $val);
278 my $frag = join " AND ", @frags;
280 $frag .= " ORDER BY $search_opts->{order_by}"
281 if $search_opts->{order_by};
282 $frag .= " LIMIT $search_opts->{limit}"
283 if $search_opts->{limit};
284 $frag .= " OFFSET $search_opts->{offset}"
285 if ($search_opts->{limit} && defined($search_opts->{offset}));
287 return $class->sth_to_objects($class->sql_Retrieve($frag), \@vals);