2 * Copyright (C) 2007-2008 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
18 DROP SCHEMA search CASCADE;
24 CREATE TABLE search.relevance_adjustment (
25 id SERIAL PRIMARY KEY,
26 active BOOL NOT NULL DEFAULT TRUE,
27 field INT NOT NULL REFERENCES config.metabib_field (id) DEFERRABLE INITIALLY DEFERRED,
28 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
29 multiplier NUMERIC NOT NULL DEFAULT 1.0
31 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
33 CREATE OR REPLACE FUNCTION search.pick_table (TEXT) RETURNS TEXT AS $$
35 WHEN $1 = 'author' THEN 'metabib.author_field_entry'
36 WHEN $1 = 'title' THEN 'metabib.title_field_entry'
37 WHEN $1 = 'subject' THEN 'metabib.subject_field_entry'
38 WHEN $1 = 'keyword' THEN 'metabib.keyword_field_entry'
39 WHEN $1 = 'series' THEN 'metabib.series_field_entry'
43 CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT );
44 CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
46 CREATE OR REPLACE FUNCTION search.staged_fts (
50 param_searches TEXT, -- JSON hash, to be turned into a resultset via search.parse_search_args
52 param_locations INT[],
53 param_audience TEXT[],
54 param_language TEXT[],
55 param_lit_form TEXT[],
58 param_vformats TEXT[],
59 param_bib_level TEXT[],
65 param_pref_lang_multiplier REAL,
74 ) RETURNS SETOF search.search_result AS $func$
77 current_res search.search_result%ROWTYPE;
78 query_part search.search_args%ROWTYPE;
79 phrase_query_part search.search_args%ROWTYPE;
84 rank_adjust search.relevance_adjustment%ROWTYPE;
90 query_table_alias TEXT;
91 from_alias_array TEXT[] := '{}';
92 used_ranks TEXT[] := '{}';
95 search_org_list INT[];
96 select_clause TEXT := 'SELECT';
97 from_clause TEXT := ' FROM metabib.metarecord_source_map m JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record) ';
98 where_clause TEXT := ' WHERE 1=1 ';
99 mrd_used BOOL := FALSE;
100 sort_desc BOOL := FALSE;
103 core_cursor REFCURSOR;
105 vis_limit_query TEXT;
106 inner_where_clause TEXT;
108 total_count INT := 0;
109 check_count INT := 0;
110 deleted_count INT := 0;
111 visible_count INT := 0;
112 excluded_count INT := 0;
116 core_rel_limit := COALESCE( param_rel_limit, 25000 );
117 core_chk_limit := COALESCE( param_chk_limit, 1000 );
118 core_skip_chk := COALESCE( param_skip_chk, 1 );
121 select_clause := select_clause || ' m.metarecord as id, array_accum(distinct m.source) as records,';
123 select_clause := select_clause || ' m.source as id, array_accum(distinct m.source) as records,';
126 -- first we need to construct the base query
127 FOR query_part IN SELECT * FROM search.parse_search_args(param_searches) WHERE term_type = 'fts_query' LOOP
129 inner_where_clause := 'index_vector @@ ' || query_part.term;
131 IF query_part.field_name IS NOT NULL THEN
133 SELECT id INTO mb_field
134 FROM config.metabib_field
135 WHERE field_class = query_part.field_class
136 AND name = query_part.field_name;
139 inner_where_clause := inner_where_clause ||
140 ' AND ' || 'field = ' || mb_field;
145 -- moving on to the rank ...
146 SELECT * INTO query_part
147 FROM search.parse_search_args(param_searches)
148 WHERE term_type = 'fts_rank'
149 AND table_alias = query_part.table_alias;
151 current_rank := query_part.term || ' * ' || query_part.table_alias || '_weight.weight';
153 IF query_part.field_name IS NOT NULL THEN
155 SELECT array_accum(distinct id) INTO mb_field_list
156 FROM config.metabib_field
157 WHERE field_class = query_part.field_class
158 AND name = query_part.field_name;
162 SELECT array_accum(distinct id) INTO mb_field_list
163 FROM config.metabib_field
164 WHERE field_class = query_part.field_class;
168 FOR rank_adjust IN SELECT * FROM search.relevance_adjustment WHERE active AND field IN ( SELECT * FROM search.explode_array( mb_field_list ) ) LOOP
170 IF NOT rank_adjust.bump_type = ANY (used_ranks) THEN
172 IF rank_adjust.bump_type = 'first_word' THEN
173 SELECT term INTO tmp_text
174 FROM search.parse_search_args(param_searches)
175 WHERE table_alias = query_part.table_alias AND term_type = 'word'
179 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( tmp_text || '%' );
181 ELSIF rank_adjust.bump_type = 'word_order' THEN
182 SELECT array_to_string( array_accum( term ), '%' ) INTO tmp_text
183 FROM search.parse_search_args(param_searches)
184 WHERE table_alias = query_part.table_alias AND term_type = 'word';
186 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( '%' || tmp_text || '%' );
188 ELSIF rank_adjust.bump_type = 'full_match' THEN
189 SELECT array_to_string( array_accum( term ), E'\\s+' ) INTO tmp_text
190 FROM search.parse_search_args(param_searches)
191 WHERE table_alias = query_part.table_alias AND term_type = 'word';
193 tmp_text := query_part.table_alias || '.value ~ ' || quote_literal( '^' || tmp_text || E'\\W*$' );
198 IF tmp_text IS NOT NULL THEN
199 current_rank := current_rank || ' * ( CASE WHEN ' || tmp_text ||
200 ' THEN ' || rank_adjust.multiplier || '::REAL ELSE 1.0 END )';
203 used_ranks := array_append( used_ranks, rank_adjust.bump_type );
209 ranks := array_append( ranks, current_rank );
212 FOR phrase_query_part IN
214 FROM search.parse_search_args(param_searches)
215 WHERE term_type = 'phrase'
216 AND table_alias = query_part.table_alias LOOP
218 tmp_text := replace( phrase_query_part.term, '*', E'\\*' );
219 tmp_text := replace( tmp_text, '?', E'\\?' );
220 tmp_text := replace( tmp_text, '+', E'\\+' );
221 tmp_text := replace( tmp_text, '|', E'\\|' );
222 tmp_text := replace( tmp_text, '(', E'\\(' );
223 tmp_text := replace( tmp_text, ')', E'\\)' );
224 tmp_text := replace( tmp_text, '[', E'\\[' );
225 tmp_text := replace( tmp_text, ']', E'\\]' );
227 inner_where_clause := inner_where_clause || ' AND ' || 'value ~* ' || quote_literal( E'(^|\\W+)' || regexp_replace(tmp_text, E'\\s+',E'\\\\s+','g') || E'(\\W+|\$)' );
231 query_table := search.pick_table(query_part.field_class);
233 from_clause := from_clause ||
234 ' JOIN ( SELECT * FROM ' || query_table || ' WHERE ' || inner_where_clause ||
235 CASE WHEN core_rel_limit > 0 THEN ' LIMIT ' || core_rel_limit::TEXT ELSE '' END || ' ) AS ' || query_part.table_alias ||
236 ' ON ( m.source = ' || query_part.table_alias || '.source )' ||
237 ' JOIN config.metabib_field AS ' || query_part.table_alias || '_weight' ||
238 ' ON ( ' || query_part.table_alias || '.field = ' || query_part.table_alias || '_weight.id AND ' || query_part.table_alias || '_weight.search_field)';
240 from_alias_array := array_append(from_alias_array, query_part.table_alias);
244 IF param_pref_lang IS NOT NULL AND param_pref_lang_multiplier IS NOT NULL THEN
245 current_rank := ' CASE WHEN mrd.item_lang = ' || quote_literal( param_pref_lang ) ||
246 ' THEN ' || param_pref_lang_multiplier || '::REAL ELSE 1.0 END ';
248 -- ranks := array_append( ranks, current_rank );
251 current_rank := ' AVG( ( (' || array_to_string( ranks, ') + (' ) || ') ) * ' || current_rank || ' ) ';
252 select_clause := select_clause || current_rank || ' AS rel,';
254 sort_desc = param_sort_desc;
256 IF param_sort = 'pubdate' THEN
258 tmp_text := '999999';
259 IF param_sort_desc THEN tmp_text := '0'; END IF;
261 current_rank := $$ COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\D+', '9', 'g'),'')), $$ || quote_literal(tmp_text) || $$ )::INT $$;
263 ELSIF param_sort = 'title' THEN
265 tmp_text := 'zzzzzz';
266 IF param_sort_desc THEN tmp_text := ' '; END IF;
270 SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\d+'),'0')::INT + 1 ))
271 FROM metabib.full_rec frt
272 WHERE frt.record = m.source
274 AND frt.subfield = 'a'
276 )),$$ || quote_literal(tmp_text) || $$))
279 ELSIF param_sort = 'author' THEN
281 tmp_text := 'zzzzzz';
282 IF param_sort_desc THEN tmp_text := ' '; END IF;
286 SELECT LTRIM(fra.value)
287 FROM metabib.full_rec fra
288 WHERE fra.record = m.source
289 AND fra.tag LIKE '1%'
290 AND fra.subfield = 'a'
291 ORDER BY fra.tag::text::int
293 )),$$ || quote_literal(tmp_text) || $$))
296 ELSIF param_sort = 'create_date' THEN
297 current_rank := $$( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
298 ELSIF param_sort = 'edit_date' THEN
299 current_rank := $$( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
301 sort_desc := NOT COALESCE(param_sort_desc, FALSE);
304 select_clause := select_clause || current_rank || ' AS rank';
306 -- now add the other qualifiers
307 IF param_audience IS NOT NULL AND array_upper(param_audience, 1) > 0 THEN
308 where_clause = where_clause || $$ AND mrd.audience IN ('$$ || array_to_string(param_audience, $$','$$) || $$') $$;
311 IF param_language IS NOT NULL AND array_upper(param_language, 1) > 0 THEN
312 where_clause = where_clause || $$ AND mrd.item_lang IN ('$$ || array_to_string(param_language, $$','$$) || $$') $$;
315 IF param_lit_form IS NOT NULL AND array_upper(param_lit_form, 1) > 0 THEN
316 where_clause = where_clause || $$ AND mrd.lit_form IN ('$$ || array_to_string(param_lit_form, $$','$$) || $$') $$;
319 IF param_types IS NOT NULL AND array_upper(param_types, 1) > 0 THEN
320 where_clause = where_clause || $$ AND mrd.item_type IN ('$$ || array_to_string(param_types, $$','$$) || $$') $$;
323 IF param_forms IS NOT NULL AND array_upper(param_forms, 1) > 0 THEN
324 where_clause = where_clause || $$ AND mrd.item_form IN ('$$ || array_to_string(param_forms, $$','$$) || $$') $$;
327 IF param_vformats IS NOT NULL AND array_upper(param_vformats, 1) > 0 THEN
328 where_clause = where_clause || $$ AND mrd.vr_format IN ('$$ || array_to_string(param_vformats, $$','$$) || $$') $$;
331 IF param_bib_level IS NOT NULL AND array_upper(param_bib_level, 1) > 0 THEN
332 where_clause = where_clause || $$ AND mrd.bib_level IN ('$$ || array_to_string(param_bib_level, $$','$$) || $$') $$;
335 IF param_before IS NOT NULL AND param_before <> '' THEN
336 where_clause = where_clause || $$ AND mrd.date1 <= $$ || quote_literal(param_before) || ' ';
339 IF param_after IS NOT NULL AND param_after <> '' THEN
340 where_clause = where_clause || $$ AND mrd.date1 >= $$ || quote_literal(param_after) || ' ';
343 IF param_during IS NOT NULL AND param_during <> '' THEN
344 where_clause = where_clause || $$ AND $$ || quote_literal(param_during) || $$ BETWEEN mrd.date1 AND mrd.date2 $$;
347 IF param_between IS NOT NULL AND array_upper(param_between, 1) > 1 THEN
348 where_clause = where_clause || $$ AND mrd.date1 BETWEEN $$ || array_to_string(param_bib_level, $$' AND '$$) || ' ';
351 core_rel_query := select_clause || from_clause || where_clause ||
352 ' GROUP BY 1 ORDER BY 4' || CASE WHEN sort_desc THEN ' DESC' ELSE ' ASC' END || ';';
353 --RAISE NOTICE 'Base Query: %', core_rel_query;
355 IF param_search_ou > 0 THEN
356 IF param_depth IS NOT NULL THEN
357 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou, param_depth );
359 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou );
361 ELSIF param_search_ou < 0 THEN
362 SELECT array_accum(distinct org_unit) INTO search_org_list FROM actor.org_lasso_map WHERE lasso = -param_search_ou;
363 ELSIF param_search_ou = 0 THEN
364 -- reserved for user lassos (ou_buckets/type='lasso') with ID passed in depth ... hack? sure.
367 OPEN core_cursor FOR EXECUTE core_rel_query;
371 FETCH core_cursor INTO core_result;
375 IF total_count % 1000 = 0 THEN
376 -- RAISE NOTICE ' % total, % checked so far ... ', total_count, check_count;
379 IF core_chk_limit > 0 AND total_count - core_skip_chk + 1 >= core_chk_limit THEN
380 total_count := total_count + 1;
384 total_count := total_count + 1;
386 CONTINUE WHEN param_skip_chk IS NOT NULL and total_count < param_skip_chk;
388 check_count := check_count + 1;
390 PERFORM 1 FROM biblio.record_entry b WHERE NOT b.deleted AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
392 -- RAISE NOTICE ' % were all deleted ... ', core_result.records;
393 deleted_count := deleted_count + 1;
398 FROM biblio.record_entry b
399 JOIN config.bib_source s ON (b.source = s.id)
401 AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
404 -- RAISE NOTICE ' % were all transcendant ... ', core_result.records;
405 visible_count := visible_count + 1;
407 current_res.id = core_result.id;
408 current_res.rel = core_result.rel;
412 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
416 current_res.record = core_result.records[1];
418 current_res.record = NULL;
421 RETURN NEXT current_res;
426 IF param_statuses IS NOT NULL AND array_upper(param_statuses, 1) > 0 THEN
429 FROM asset.call_number cn
430 JOIN asset.copy cp ON (cp.call_number = cn.id)
433 AND cp.status IN ( SELECT * FROM search.explode_array( param_statuses ) )
434 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
435 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
439 -- RAISE NOTICE ' % were all status-excluded ... ', core_result.records;
440 excluded_count := excluded_count + 1;
446 IF param_locations IS NOT NULL AND array_upper(param_locations, 1) > 0 THEN
449 FROM asset.call_number cn
450 JOIN asset.copy cp ON (cp.call_number = cn.id)
453 AND cp.location IN ( SELECT * FROM search.explode_array( param_locations ) )
454 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
455 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
459 -- RAISE NOTICE ' % were all copy_location-excluded ... ', core_result.records;
460 excluded_count := excluded_count + 1;
466 IF staff IS NULL OR NOT staff THEN
469 FROM asset.call_number cn
470 JOIN asset.copy cp ON (cp.call_number = cn.id)
471 JOIN actor.org_unit a ON (cp.circ_lib = a.id)
472 JOIN asset.copy_location cl ON (cp.location = cl.id)
473 JOIN config.copy_status cs ON (cp.status = cs.id)
480 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
481 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
485 -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
486 excluded_count := excluded_count + 1;
493 FROM asset.call_number cn
494 JOIN asset.copy cp ON (cp.call_number = cn.id)
495 JOIN actor.org_unit a ON (cp.circ_lib = a.id)
496 JOIN asset.copy_location cl ON (cp.location = cl.id)
499 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
500 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
506 FROM asset.call_number cn
507 WHERE cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
511 -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
512 excluded_count := excluded_count + 1;
520 visible_count := visible_count + 1;
522 current_res.id = core_result.id;
523 current_res.rel = core_result.rel;
527 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
531 current_res.record = core_result.records[1];
533 current_res.record = NULL;
536 RETURN NEXT current_res;
538 IF visible_count % 1000 = 0 THEN
539 -- RAISE NOTICE ' % visible so far ... ', visible_count;
544 current_res.id = NULL;
545 current_res.rel = NULL;
546 current_res.record = NULL;
547 current_res.total = total_count;
548 current_res.checked = check_count;
549 current_res.deleted = deleted_count;
550 current_res.visible = visible_count;
551 current_res.excluded = excluded_count;
555 RETURN NEXT current_res;
558 $func$ LANGUAGE PLPGSQL;
561 CREATE OR REPLACE FUNCTION search.explode_array(anyarray) RETURNS SETOF anyelement AS $BODY$
562 SELECT ($1)[s] FROM generate_series(1, array_upper($1, 1)) AS s;
564 LANGUAGE 'sql' IMMUTABLE;
566 CREATE OR REPLACE FUNCTION search.parse_search_args (TEXT) RETURNS SETOF search.search_args AS $perlcode$
570 my $args = decode_json( $json );
574 for my $k ( keys %$args ) {
575 (my $alias = $k) =~ s/\|/_/gso;
576 my ($class, $field) = split /\|/, $k;
577 my $part = $args->{$k};
578 for my $p ( keys %$part ) {
579 my $data = $part->{$p};
580 $data = [$data] if (!ref($data));
581 for my $datum ( @$data ) {
583 { field_class => $class,
584 field_name => $field,
586 table_alias => $alias,
598 $perlcode$ LANGUAGE PLPERLU;