2 DROP SCHEMA search CASCADE;
8 CREATE TABLE search.relevance_adjustment (
10 active BOOL NOT NULL DEFAULT TRUE,
11 field INT NOT NULL REFERENCES config.metabib_field (id),
12 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
13 multiplier NUMERIC NOT NULL DEFAULT 1.0
15 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
17 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'first_word', 1.5);
18 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'full_match', 20);
19 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'first_word', 1.5);
20 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'word_order', 10);
21 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'full_match', 20);
22 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'first_word', 1.5);
23 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'word_order', 10);
24 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'full_match', 20);
25 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'first_word', 1.5);
26 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'word_order', 10);
27 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'full_match', 20);
28 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'first_word', 1.5);
29 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'word_order', 10);
30 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'full_match', 20);
31 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(6, 'first_word', 1.5);
32 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(7, 'first_word', 1.5);
33 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(8, 'first_word', 1.5);
34 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(9, 'first_word', 1.5);
35 INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(14, 'word_order', 10);
37 CREATE OR REPLACE FUNCTION search.pick_table (TEXT) RETURNS TEXT AS $$
39 WHEN $1 = 'author' THEN 'metabib.author_field_entry'
40 WHEN $1 = 'title' THEN 'metabib.title_field_entry'
41 WHEN $1 = 'subject' THEN 'metabib.subject_field_entry'
42 WHEN $1 = 'keyword' THEN 'metabib.keyword_field_entry'
43 WHEN $1 = 'series' THEN 'metabib.series_field_entry'
47 CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT );
48 CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
50 CREATE OR REPLACE FUNCTION search.staged_fts (
54 param_searches TEXT, -- JSON hash, to be turned into a resultset via search.parse_search_args
56 param_audience TEXT[],
57 param_language TEXT[],
58 param_lit_form TEXT[],
61 param_vformats TEXT[],
63 param_pref_lang_multiplier REAL,
72 ) RETURNS SETOF search.search_result AS $func$
75 current_res search.search_result%ROWTYPE;
76 query_part search.search_args%ROWTYPE;
77 phrase_query_part search.search_args%ROWTYPE;
82 rank_adjust search.relevance_adjustment%ROWTYPE;
88 query_table_alias TEXT;
89 from_alias_array TEXT[] := '{}';
90 used_ranks TEXT[] := '{}';
93 search_org_list INT[];
94 select_clause TEXT := 'SELECT';
95 from_clause TEXT := ' FROM metabib.metarecord_source_map m JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record) ';
96 where_clause TEXT := ' WHERE 1=1 ';
97 mrd_used BOOL := FALSE;
98 sort_desc BOOL := FALSE;
101 core_cursor REFCURSOR;
103 vis_limit_query TEXT;
104 inner_where_clause TEXT;
106 total_count INT := 0;
107 check_count INT := 0;
108 deleted_count INT := 0;
109 visible_count INT := 0;
110 excluded_count INT := 0;
114 core_rel_limit := COALESCE( param_rel_limit, 25000 );
115 core_chk_limit := COALESCE( param_chk_limit, 1000 );
116 core_skip_chk := COALESCE( param_skip_chk, 1 );
119 select_clause := select_clause || ' m.metarecord as id, array_accum(distinct m.source) as records,';
121 select_clause := select_clause || ' m.source as id, array_accum(distinct m.source) as records,';
124 -- first we need to construct the base query
125 FOR query_part IN SELECT * FROM search.parse_search_args(param_searches) WHERE term_type = 'fts_query' LOOP
127 inner_where_clause := 'index_vector @@ ' || query_part.term;
129 IF query_part.field_name IS NOT NULL THEN
131 SELECT id INTO mb_field
132 FROM config.metabib_field
133 WHERE field_class = query_part.field_class
134 AND name = query_part.field_name;
137 inner_where_clause := inner_where_clause ||
138 ' AND ' || 'field = ' || mb_field;
143 -- moving on to the rank ...
144 SELECT * INTO query_part
145 FROM search.parse_search_args(param_searches)
146 WHERE term_type = 'fts_rank'
147 AND table_alias = query_part.table_alias;
149 current_rank := query_part.term || ' * ' || query_part.table_alias || '_weight.weight';
151 IF query_part.field_name IS NOT NULL THEN
153 SELECT array_accum(distinct id) INTO mb_field_list
154 FROM config.metabib_field
155 WHERE field_class = query_part.field_class
156 AND name = query_part.field_name;
160 SELECT array_accum(distinct id) INTO mb_field_list
161 FROM config.metabib_field
162 WHERE field_class = query_part.field_class;
166 FOR rank_adjust IN SELECT * FROM search.relevance_adjustment WHERE field IN ( SELECT * FROM search.explode_array( mb_field_list ) ) LOOP
168 IF NOT rank_adjust.bump_type = ANY (used_ranks) THEN
170 IF rank_adjust.bump_type = 'first_word' THEN
171 SELECT term INTO tmp_text
172 FROM search.parse_search_args(param_searches)
173 WHERE table_alias = query_part.table_alias AND term_type = 'word'
177 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( tmp_text || '%' );
179 ELSIF rank_adjust.bump_type = 'word_order' THEN
180 SELECT array_to_string( array_accum( term ), '%' ) INTO tmp_text
181 FROM search.parse_search_args(param_searches)
182 WHERE table_alias = query_part.table_alias AND term_type = 'word';
184 tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( '%' || tmp_text || '%' );
186 ELSIF rank_adjust.bump_type = 'full_match' THEN
187 SELECT array_to_string( array_accum( term ), E'\\s+' ) INTO tmp_text
188 FROM search.parse_search_args(param_searches)
189 WHERE table_alias = query_part.table_alias AND term_type = 'word';
191 tmp_text := query_part.table_alias || '.value ~ ' || quote_literal( '^' || tmp_text || E'\\W*$' );
196 current_rank := current_rank || ' * ( CASE WHEN ' || tmp_text ||
197 ' THEN ' || rank_adjust.multiplier || '::REAL ELSE 1.0 END )';
199 used_ranks := array_append( used_ranks, rank_adjust.bump_type );
205 ranks := array_append( ranks, current_rank );
208 FOR phrase_query_part IN
210 FROM search.parse_search_args(param_searches)
211 WHERE term_type = 'phrase'
212 AND table_alias = query_part.table_alias LOOP
214 inner_where_clause := inner_where_clause || ' AND ' || 'value ~* ' || quote_literal( E'(^|\\W+)' || regexp_replace(phrase_query_part.term, E'\\s+',E'\\\\s+','g') || E'(\\W+|\$)' );
218 query_table := search.pick_table(query_part.field_class);
220 from_clause := from_clause ||
221 ' JOIN ( SELECT * FROM ' || query_table || ' WHERE ' || inner_where_clause ||
222 CASE WHEN core_rel_limit > 0 THEN ' LIMIT ' || core_rel_limit::TEXT ELSE '' END || ' ) AS ' || query_part.table_alias ||
223 ' ON ( m.source = ' || query_part.table_alias || '.source )' ||
224 ' JOIN config.metabib_field AS ' || query_part.table_alias || '_weight' ||
225 ' ON ( ' || query_part.table_alias || '.field = ' || query_part.table_alias || '_weight.id )';
227 from_alias_array := array_append(from_alias_array, query_part.table_alias);
231 IF param_pref_lang IS NOT NULL AND param_pref_lang_multiplier IS NOT NULL THEN
232 current_rank := ' CASE WHEN mrd.item_lang = ' || quote_literal( param_pref_lang ) ||
233 ' THEN ' || param_pref_lang_multiplier || '::REAL ELSE 1.0 END ';
235 --ranks := array_append( ranks, current_rank );
238 current_rank := ' AVG( ( (' || array_to_string( ranks, ') + (' ) || ') ) * ' || current_rank || ' ) ';
239 select_clause := select_clause || current_rank || ' AS rel,';
241 sort_desc = param_sort_desc;
243 IF param_sort = 'pubdate' THEN
245 tmp_text := '999999';
246 IF param_sort_desc THEN tmp_text := '0'; END IF;
250 SELECT SUBSTRING(frp.value FROM E'\\d{4}')
251 FROM metabib.full_rec frp
252 WHERE frp.record = m.source
254 AND frp.subfield = 'c'
256 )), $$ || quote_literal(tmp_text) || $$ )::INT )
259 ELSIF param_sort = 'title' THEN
261 tmp_text := 'zzzzzz';
262 IF param_sort_desc THEN tmp_text := ' '; END IF;
266 SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\d+'),'0')::INT + 1 ))
267 FROM metabib.full_rec frt
268 WHERE frt.record = m.source
270 AND frt.subfield = 'a'
272 )),$$ || quote_literal(tmp_text) || $$))
275 ELSIF param_sort = 'author' THEN
277 tmp_text := 'zzzzzz';
278 IF param_sort_desc THEN tmp_text := ' '; END IF;
282 SELECT LTRIM(fra.value)
283 FROM metabib.full_rec fra
284 WHERE fra.record = m.source
285 AND fra.tag LIKE '1%'
286 AND fra.subfield = 'a'
287 ORDER BY fra.tag::text::int
289 )),$$ || quote_literal(tmp_text) || $$))
292 ELSIF param_sort = 'create_date' THEN
293 current_rank := $$( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
294 ELSIF param_sort = 'edit_date' THEN
295 current_rank := $$( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
297 sort_desc := NOT COALESCE(param_sort_desc, FALSE);
300 select_clause := select_clause || current_rank || ' AS rank';
302 -- now add the other qualifiers
303 IF param_audience IS NOT NULL AND array_upper(param_audience, 1) > 0 THEN
304 where_clause = where_clause || $$ AND mrd.audience IN ('$$ || array_to_string(param_audience, $$','$$) || $$') $$;
307 IF param_language IS NOT NULL AND array_upper(param_language, 1) > 0 THEN
308 where_clause = where_clause || $$ AND mrd.item_lang IN ('$$ || array_to_string(param_language, $$','$$) || $$') $$;
311 IF param_lit_form IS NOT NULL AND array_upper(param_lit_form, 1) > 0 THEN
312 where_clause = where_clause || $$ AND mrd.lit_form IN ('$$ || array_to_string(param_lit_form, $$','$$) || $$') $$;
315 IF param_types IS NOT NULL AND array_upper(param_types, 1) > 0 THEN
316 where_clause = where_clause || $$ AND mrd.item_type IN ('$$ || array_to_string(param_types, $$','$$) || $$') $$;
319 IF param_forms IS NOT NULL AND array_upper(param_forms, 1) > 0 THEN
320 where_clause = where_clause || $$ AND mrd.item_form IN ('$$ || array_to_string(param_forms, $$','$$) || $$') $$;
323 IF param_vformats IS NOT NULL AND array_upper(param_vformats, 1) > 0 THEN
324 where_clause = where_clause || $$ AND mrd.vr_format IN ('$$ || array_to_string(param_vformats, $$','$$) || $$') $$;
327 core_rel_query := select_clause || from_clause || where_clause ||
328 ' GROUP BY 1 ORDER BY 4' || CASE WHEN sort_desc THEN ' DESC' ELSE ' ASC' END || ';';
329 --RAISE NOTICE 'Base Query: %', core_rel_query;
331 IF param_depth IS NOT NULL THEN
332 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou, param_depth );
334 SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou );
337 OPEN core_cursor FOR EXECUTE core_rel_query;
341 FETCH core_cursor INTO core_result;
345 IF total_count % 1000 = 0 THEN
346 -- RAISE NOTICE ' % total, % checked so far ... ', total_count, check_count;
349 IF core_chk_limit > 0 AND total_count - core_skip_chk + 1 >= core_chk_limit THEN
350 total_count := total_count + 1;
354 total_count := total_count + 1;
356 CONTINUE WHEN param_skip_chk IS NOT NULL and total_count < param_skip_chk;
358 check_count := check_count + 1;
360 PERFORM 1 FROM biblio.record_entry b WHERE NOT b.deleted AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
362 -- RAISE NOTICE ' % were all deleted ... ', core_result.records;
363 deleted_count := deleted_count + 1;
367 PERFORM 1 FROM biblio.record_entry b JOIN config.bib_source s ON (b.source = s.id) WHERE b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
369 -- RAISE NOTICE ' % were all transcendant ... ', core_result.records;
370 visible_count := visible_count + 1;
372 current_res.id = core_result.id;
373 current_res.rel = core_result.rel;
377 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
381 current_res.record = core_result.records[1];
383 current_res.record = NULL;
386 RETURN NEXT current_res;
391 IF param_statuses IS NOT NULL AND array_upper(param_statuses, 1) > 0 THEN
394 FROM asset.call_number cn
395 JOIN asset.copy cp ON (cp.call_number = cn.id)
398 AND cp.status IN ( SELECT * FROM search.explode_array( param_statuses ) )
399 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
400 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
404 -- RAISE NOTICE ' % were all status-excluded ... ', core_result.records;
405 excluded_count := excluded_count + 1;
411 IF staff IS NULL OR NOT staff THEN
414 FROM asset.call_number cn
415 JOIN asset.copy cp ON (cp.call_number = cn.id)
416 JOIN actor.org_unit a ON (cp.circ_lib = a.id)
417 JOIN asset.copy_location cl ON (cp.location = cl.id)
418 JOIN config.copy_status cs ON (cp.status = cs.id)
425 AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
426 AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
430 -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
431 excluded_count := excluded_count + 1;
437 visible_count := visible_count + 1;
439 current_res.id = core_result.id;
440 current_res.rel = core_result.rel;
444 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
448 current_res.record = core_result.records[1];
450 current_res.record = NULL;
453 RETURN NEXT current_res;
455 IF visible_count % 1000 = 0 THEN
456 -- RAISE NOTICE ' % visible so far ... ', visible_count;
461 current_res.id = NULL;
462 current_res.rel = NULL;
463 current_res.record = NULL;
464 current_res.total = total_count;
465 current_res.checked = check_count;
466 current_res.deleted = deleted_count;
467 current_res.visible = visible_count;
468 current_res.excluded = excluded_count;
472 RETURN NEXT current_res;
475 $func$ LANGUAGE PLPGSQL;
478 param_statuses INT[],
479 param_audience TEXT[], x
480 param_language TEXT[], x
481 param_lit_form TEXT[], x
482 param_types TEXT[], x
483 param_forms TEXT[], x
484 param_vformats TEXT[], x
487 CREATE OR REPLACE FUNCTION search.explode_array(anyarray) RETURNS SETOF anyelement AS $BODY$
488 SELECT ($1)[s] FROM generate_series(1, array_upper($1, 1)) AS s;
490 LANGUAGE 'sql' IMMUTABLE;
492 CREATE OR REPLACE FUNCTION search.parse_search_args (TEXT) RETURNS SETOF search.search_args AS $perlcode$
496 my $args = decode_json( $json );
500 for my $k ( keys %$args ) {
501 (my $alias = $k) =~ s/\|/_/gso;
502 my ($class, $field) = split /\|/, $k;
503 my $part = $args->{$k};
504 for my $p ( keys %$part ) {
505 my $data = $part->{$p};
506 $data = [$data] if (!ref($data));
507 for my $datum ( @$data ) {
509 { field_class => $class,
510 field_name => $field,
512 table_alias => $alias,
524 $perlcode$ LANGUAGE PLPERLU;