2 DROP SCHEMA search CASCADE;
8 CREATE TABLE search.relevance_adjustment (
10 active BOOL NOT NULL DEFAULT TRUE,
11 field INT NOT NULL REFERENCES config.metabib_field (id),
12 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
13 multiplier NUMERIC NOT NULL DEFAULT 1.0
15 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
17 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'first_word', 1.5);
18 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'full_match', 20);
19 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'first_word', 1.5);
20 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'word_order', 10);
21 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'full_match', 20);
22 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'first_word', 1.5);
23 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'word_order', 10);
24 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'full_match', 20);
25 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'first_word', 1.5);
26 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'word_order', 10);
27 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'full_match', 20);
28 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'first_word', 1.5);
29 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'word_order', 10);
30 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'full_match', 20);
31 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(6, 'first_word', 1.5);
32 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(7, 'first_word', 1.5);
33 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(8, 'first_word', 1.5);
34 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(9, 'first_word', 1.5);
35 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(14, 'word_order', 10);
37 CREATE OR REPLACE FUNCTION search.pick_table (TEXT) RETURNS TEXT AS $$
39 WHEN $1 = 'author' THEN 'metabib.author_field_entry'
40 WHEN $1 = 'title' THEN 'metabib.title_field_entry'
41 WHEN $1 = 'subject' THEN 'metabib.subject_field_entry'
42 WHEN $1 = 'keyword' THEN 'metabib.keyword_field_entry'
43 WHEN $1 = 'series' THEN 'metabib.series_field_entry'
47 CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT );
48 CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
50 CREATE OR REPLACE FUNCTION search.staged_fts (
54 param_searches TEXT, -- JSON hash, to be turned into a resultset via search.parse_search_args
56 param_audience TEXT[],
57 param_language TEXT[],
58 param_lit_form TEXT[],
61 param_vformats TEXT[],
70 ) RETURNS SETOF search.search_result AS $func$
73 current_res search.search_result%ROWTYPE;
74 query_part search.search_args%ROWTYPE;
75 phrase_query_part search.search_args%ROWTYPE;
80 rank_adjust search.relevance_adjustment%ROWTYPE;
86 query_table_alias TEXT;
87 from_alias_array TEXT[] := '{}';
88 used_ranks TEXT[] := '{}';
91 search_org_list INT[];
92 select_clause TEXT := 'SELECT';
93 from_clause TEXT := ' FROM metabib.metarecord_source_map m ';
94 where_clause TEXT := ' WHERE 1=1 ';
95 mrd_used BOOL := FALSE;
96 sort_desc BOOL := FALSE;
99 core_cursor REFCURSOR;
101 vis_limit_query TEXT;
102 inner_where_clause TEXT;
104 total_count INT := 0;
105 check_count INT := 0;
106 deleted_count INT := 0;
107 visible_count INT := 0;
108 excluded_count INT := 0;
112 core_rel_limit := COALESCE( param_rel_limit, 25000 );
113 core_chk_limit := COALESCE( param_chk_limit, 1000 );
114 core_skip_chk := COALESCE( param_skip_chk, 1 );
117 select_clause := select_clause || ' m.metarecord as id, array_accum(distinct m.source) as records,';
119 select_clause := select_clause || ' m.source as id, array_accum(distinct m.source) as records,';
122 -- first we need to construct the base query
123 FOR query_part IN SELECT * FROM search.parse_search_args(param_searches) WHERE term_type = 'fts_query' LOOP
125 inner_where_clause := 'index_vector @@ ' || query_part.term;
127 IF query_part.field_name IS NOT NULL THEN
129 SELECT id INTO mb_field
130 FROM config.metabib_field
131 WHERE field_class = query_part.field_class
132 AND name = query_part.field_name;
135 inner_where_clause := inner_where_clause ||
136 ' AND ' || 'field = ' || mb_field;
141 -- moving on to the rank ...
142 SELECT * INTO query_part
143 FROM search.parse_search_args(param_searches)
144 WHERE term_type = 'fts_rank'
145 AND table_alias = query_part.table_alias;
147 current_rank := query_part.term || ' * ' || query_part.table_alias || '_weight.weight';
149 IF query_part.field_name IS NOT NULL THEN
151 SELECT array_accum(distinct id) INTO mb_field_list
152 FROM config.metabib_field
153 WHERE field_class = query_part.field_class
154 AND name = query_part.field_name;
158 SELECT array_accum(distinct id) INTO mb_field_list
159 FROM config.metabib_field
160 WHERE field_class = query_part.field_class;
164 FOR rank_adjust IN SELECT * FROM search.relevance_adjustment WHERE field IN ( SELECT * FROM search.explode_array( mb_field_list ) ) LOOP
166 IF NOT rank_adjust.bump_type = ANY (used_ranks) THEN
168 IF rank_adjust.bump_type = 'first_word' THEN
169 SELECT term INTO tmp_text
170 FROM search.parse_search_args(param_searches)
171 WHERE table_alias = query_part.table_alias AND term_type = 'word'
175 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( tmp_text || '%' );
177 ELSIF rank_adjust.bump_type = 'word_order' THEN
178 SELECT array_to_string( array_accum( term ), '%' ) INTO tmp_text
179 FROM search.parse_search_args(param_searches)
180 WHERE table_alias = query_part.table_alias AND term_type = 'word';
182 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( '%' || tmp_text || '%' );
184 ELSIF rank_adjust.bump_type = 'full_match' THEN
185 SELECT array_to_string( array_accum( term ), E'\\s+' ) INTO tmp_text
186 FROM search.parse_search_args(param_searches)
187 WHERE table_alias = query_part.table_alias AND term_type = 'word';
189 tmp_text := query_part.table_alias || '.value ~ ' || quote_literal( '^' || tmp_text || E'\\W*$' );
194 current_rank := current_rank || ' * ( CASE WHEN ' || tmp_text ||
195 ' THEN ' || rank_adjust.multiplier || '::REAL ELSE 1.0 END )';
197 used_ranks := array_append( used_ranks, rank_adjust.bump_type );
203 ranks := array_append( ranks, current_rank );
206 FOR phrase_query_part IN
208 FROM search.parse_search_args(param_searches)
209 WHERE term_type = 'phrase'
210 AND table_alias = query_part.table_alias LOOP
212 inner_where_clause := inner_where_clause || ' AND ' || query_part.table_alias || '.value ~* ' || quote_literal( E'(^|\\W+)' || regexp_replace(phrase_query_part.term, E'\\s+',E'\\\\s+','g') || E'(\\W+|\$)' );
216 query_table := search.pick_table(query_part.field_class);
218 from_clause := from_clause ||
219 ' JOIN ( SELECT * FROM ' || query_table || ' WHERE ' || inner_where_clause ||
220 CASE WHEN core_rel_limit > 0 THEN ' LIMIT ' || core_rel_limit::TEXT ELSE '' END || ' ) AS ' || query_part.table_alias ||
221 ' ON ( m.source = ' || query_part.table_alias || '.source )' ||
222 ' JOIN config.metabib_field AS ' || query_part.table_alias || '_weight' ||
223 ' ON ( ' || query_part.table_alias || '.field = ' || query_part.table_alias || '_weight.id )';
225 from_alias_array := array_append(from_alias_array, query_part.table_alias);
229 current_rank := ' AVG( (' || array_to_string( ranks, ') + (' ) || ') )';
230 select_clause := select_clause || current_rank || ' AS rel,';
232 sort_desc = param_sort_desc;
234 IF param_sort = 'pubdate' THEN
236 tmp_text := '999999';
237 IF param_sort_desc THEN tmp_text := '0'; END IF;
241 SELECT SUBSTRING(frp.value FROM E'\\d{4}')
242 FROM metabib.full_rec frp
243 WHERE frp.record = m.source
245 AND frp.subfield = 'c'
247 )), $$ || quote_literal(tmp_text) || $$ )::INT )
250 ELSIF param_sort = 'title' THEN
252 tmp_text := 'zzzzzz';
253 IF param_sort_desc THEN tmp_text := ' '; END IF;
257 SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\d+'),'0')::INT + 1 ))
258 FROM metabib.full_rec frt
259 WHERE frt.record = m.source
261 AND frt.subfield = 'a'
263 )),$$ || quote_literal(tmp_text) || $$))
266 ELSIF param_sort = 'author' THEN
268 tmp_text := 'zzzzzz';
269 IF param_sort_desc THEN tmp_text := ' '; END IF;
273 SELECT LTRIM(fra.value)
274 FROM metabib.full_rec fra
275 WHERE fra.record = m.source
276 AND fra.tag LIKE '1%'
277 AND fra.subfield = 'a'
278 ORDER BY fra.tag::text::int
280 )),$$ || quote_literal(tmp_text) || $$))
283 ELSIF param_sort = 'create_date' THEN
284 current_rank := $$( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
286 ELSIF param_sort = 'edit_date' THEN
287 current_rank := $$( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
290 sort_desc := NOT COALESCE(param_sort_desc, FALSE);
294 select_clause := select_clause || current_rank || ' AS rank';
296 -- now add the other qualifiers
297 IF param_audience IS NOT NULL AND array_upper(param_audience, 1) > 0 THEN
299 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
303 where_clause = where_clause || $$ AND mrd.audience IN ('$$ || array_to_string(param_audience, $$','$$) || $$') $$;
306 IF param_language IS NOT NULL AND array_upper(param_language, 1) > 0 THEN
308 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
312 where_clause = where_clause || $$ AND mrd.item_lang IN ('$$ || array_to_string(param_language, $$','$$) || $$') $$;
315 IF param_lit_form IS NOT NULL AND array_upper(param_lit_form, 1) > 0 THEN
317 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
321 where_clause = where_clause || $$ AND mrd.lit_form IN ('$$ || array_to_string(param_lit_form, $$','$$) || $$') $$;
324 IF param_types IS NOT NULL AND array_upper(param_types, 1) > 0 THEN
326 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
330 where_clause = where_clause || $$ AND mrd.item_type IN ('$$ || array_to_string(param_types, $$','$$) || $$') $$;
333 IF param_forms IS NOT NULL AND array_upper(param_forms, 1) > 0 THEN
335 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
339 where_clause = where_clause || $$ AND mrd.item_form IN ('$$ || array_to_string(param_forms, $$','$$) || $$') $$;
342 IF param_vformats IS NOT NULL AND array_upper(param_vformats, 1) > 0 THEN
344 from_clause := from_clause || ' JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record)';
348 where_clause = where_clause || $$ AND mrd.vr_format IN ('$$ || array_to_string(param_types, $$','$$) || $$') $$;
351 core_rel_query := select_clause || from_clause || where_clause ||
352 ' GROUP BY 1 ORDER BY 4' || CASE WHEN sort_desc THEN ' DESC' ELSE ' ASC' END || ';';
353 --RAISE NOTICE 'Base Query: %', core_rel_query;
355 IF param_depth IS NOT NULL THEN
356 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou, param_depth );
358 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou );
361 OPEN core_cursor FOR EXECUTE core_rel_query;
365 FETCH core_cursor INTO core_result;
369 IF total_count % 1000 = 0 THEN
370 -- RAISE NOTICE ' % total, % checked so far ... ', total_count, check_count;
373 IF core_chk_limit > 0 AND total_count - core_skip_chk + 1 >= core_chk_limit THEN
374 total_count := total_count + 1;
378 total_count := total_count + 1;
380 CONTINUE WHEN param_skip_chk IS NOT NULL and total_count < param_skip_chk;
382 check_count := check_count + 1;
384 PERFORM 1 FROM biblio.record_entry b WHERE NOT b.deleted AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
386 -- RAISE NOTICE ' % were all deleted ... ', core_result.records;
387 deleted_count := deleted_count + 1;
391 PERFORM 1 FROM biblio.record_entry b JOIN config.bib_source s ON (b.source = s.id) WHERE b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
393 -- RAISE NOTICE ' % were all transcendant ... ', core_result.records;
394 visible_count := visible_count + 1;
396 current_res.id = core_result.id;
397 current_res.rel = core_result.rel;
399 IF array_upper(core_result.records, 1) = 1 THEN
400 current_res.record = core_result.records[1];
402 current_res.record = NULL;
405 RETURN NEXT current_res;
410 IF param_statuses IS NOT NULL AND array_upper(param_statuses, 1) > 0 THEN
413 FROM asset.call_number cn
414 JOIN asset.copy cp ON (cp.call_number = cn.id)
417 AND cp.status IN ( SELECT * FROM search.explode_array( param_statuses ) )
418 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
419 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
423 -- RAISE NOTICE ' % were all status-excluded ... ', core_result.records;
424 excluded_count := excluded_count + 1;
430 IF staff IS NULL OR NOT staff THEN
433 FROM asset.call_number cn
434 JOIN asset.copy cp ON (cp.call_number = cn.id)
435 JOIN actor.org_unit a ON (cp.circ_lib = a.id)
436 JOIN asset.copy_location cl ON (cp.location = cl.id)
437 JOIN config.copy_status cs ON (cp.status = cs.id)
444 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
445 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
449 -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
450 excluded_count := excluded_count + 1;
456 visible_count := visible_count + 1;
458 current_res.id = core_result.id;
459 current_res.rel = core_result.rel;
461 IF array_upper(core_result.records, 1) = 1 THEN
462 current_res.record = core_result.records[1];
464 current_res.record = NULL;
467 RETURN NEXT current_res;
469 IF visible_count % 1000 = 0 THEN
470 -- RAISE NOTICE ' % visible so far ... ', visible_count;
475 current_res.id = NULL;
476 current_res.rel = NULL;
477 current_res.record = NULL;
478 current_res.total = total_count;
479 current_res.checked = check_count;
480 current_res.deleted = deleted_count;
481 current_res.visible = visible_count;
482 current_res.excluded = excluded_count;
486 RETURN NEXT current_res;
489 $func$ LANGUAGE PLPGSQL;
492 param_statuses INT[],
493 param_audience TEXT[], x
494 param_language TEXT[], x
495 param_lit_form TEXT[], x
496 param_types TEXT[], x
497 param_forms TEXT[], x
498 param_vformats TEXT[], x
501 CREATE OR REPLACE FUNCTION search.explode_array(anyarray) RETURNS SETOF anyelement AS $BODY$
502 SELECT ($1)[s] FROM generate_series(1, array_upper($1, 1)) AS s;
504 LANGUAGE 'sql' IMMUTABLE;
506 CREATE OR REPLACE FUNCTION search.parse_search_args (TEXT) RETURNS SETOF search.search_args AS $perlcode$
510 my $args = decode_json( $json );
514 for my $k ( keys %$args ) {
515 (my $alias = $k) =~ s/\|/_/gso;
516 my ($class, $field) = split /\|/, $k;
517 my $part = $args->{$k};
518 for my $p ( keys %$part ) {
519 my $data = $part->{$p};
520 $data = [$data] if (!ref($data));
521 for my $datum ( @$data ) {
523 { field_class => $class,
524 field_name => $field,
526 table_alias => $alias,
538 $perlcode$ LANGUAGE PLPERLU;