2 * Copyright (C) 2007-2010 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS search CASCADE;
24 CREATE OR REPLACE FUNCTION evergreen.pg_statistics (tab TEXT, col TEXT) RETURNS TABLE(element TEXT, frequency INT) AS $$
26 -- This query will die on PG < 9.2, but the function can be created. We just won't use it where we can't.
30 FROM (SELECT ROW_NUMBER() OVER (),
32 FROM (SELECT UNNEST(most_common_elem_freqs) AS f
38 JOIN (SELECT ROW_NUMBER() OVER (),
40 FROM (SELECT UNNEST(most_common_elems::text::text[]) AS e
45 ) AS elems USING (row_number);
49 CREATE OR REPLACE FUNCTION evergreen.query_int_wrapper (INT[],TEXT) RETURNS BOOL AS $$
51 RETURN $1 @@ $2::query_int;
53 $$ LANGUAGE PLPGSQL STABLE;
55 CREATE TABLE search.relevance_adjustment (
56 id SERIAL PRIMARY KEY,
57 active BOOL NOT NULL DEFAULT TRUE,
58 field INT NOT NULL REFERENCES config.metabib_field (id) DEFERRABLE INITIALLY DEFERRED,
59 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
60 multiplier NUMERIC NOT NULL DEFAULT 1.0
62 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
64 CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT, badges TEXT, popularity NUMERIC );
65 CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
67 CREATE OR REPLACE FUNCTION search.query_parser_fts (
73 param_locations INT[],
80 param_pref_ou INT DEFAULT NULL
81 ) RETURNS SETOF search.search_result AS $func$
84 current_res search.search_result%ROWTYPE;
85 search_org_list INT[];
95 core_cursor REFCURSOR;
100 deleted_count INT := 0;
101 visible_count INT := 0;
102 excluded_count INT := 0;
107 check_limit := COALESCE( param_check, 1000 );
108 core_limit := COALESCE( param_limit, 25000 );
109 core_offset := COALESCE( param_offset, 0 );
111 SELECT COALESCE( enabled, FALSE ) INTO luri_as_copy FROM config.global_flag WHERE name = 'opac.located_uri.act_as_copy';
113 -- core_skip_chk := COALESCE( param_skip_chk, 1 );
115 IF param_search_ou > 0 THEN
116 IF param_depth IS NOT NULL THEN
117 SELECT ARRAY_AGG(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou, param_depth );
119 SELECT ARRAY_AGG(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou );
123 SELECT ARRAY_AGG(distinct id) INTO luri_org_list FROM actor.org_unit_full_path( param_search_ou );
125 SELECT ARRAY_AGG(distinct id) INTO luri_org_list FROM actor.org_unit_ancestors( param_search_ou );
128 ELSIF param_search_ou < 0 THEN
129 SELECT ARRAY_AGG(distinct org_unit) INTO search_org_list FROM actor.org_lasso_map WHERE lasso = -param_search_ou;
131 FOR tmp_int IN SELECT * FROM UNNEST(search_org_list) LOOP
134 SELECT ARRAY_AGG(distinct id) INTO tmp_int_list FROM actor.org_unit_full_path( tmp_int );
136 SELECT ARRAY_AGG(distinct id) INTO tmp_int_list FROM actor.org_unit_ancestors( tmp_int );
139 luri_org_list := luri_org_list || tmp_int_list;
142 SELECT ARRAY_AGG(DISTINCT x.id) INTO luri_org_list FROM UNNEST(luri_org_list) x(id);
144 ELSIF param_search_ou = 0 THEN
145 -- reserved for user lassos (ou_buckets/type='lasso') with ID passed in depth ... hack? sure.
148 IF param_pref_ou IS NOT NULL THEN
150 SELECT ARRAY_AGG(distinct id) INTO tmp_int_list FROM actor.org_unit_full_path( param_pref_ou );
152 SELECT ARRAY_AGG(distinct id) INTO tmp_int_list FROM actor.org_unit_ancestors( param_pref_ou );
155 luri_org_list := luri_org_list || tmp_int_list;
158 OPEN core_cursor FOR EXECUTE param_query;
162 FETCH core_cursor INTO core_result;
164 EXIT WHEN total_count >= core_limit;
166 total_count := total_count + 1;
168 CONTINUE WHEN total_count NOT BETWEEN core_offset + 1 AND check_limit + core_offset;
170 check_count := check_count + 1;
172 IF NOT deleted_search THEN
174 PERFORM 1 FROM biblio.record_entry b WHERE NOT b.deleted AND b.id IN ( SELECT * FROM unnest( core_result.records ) );
176 -- RAISE NOTICE ' % were all deleted ... ', core_result.records;
177 deleted_count := deleted_count + 1;
182 FROM biblio.record_entry b
183 JOIN config.bib_source s ON (b.source = s.id)
185 AND b.id IN ( SELECT * FROM unnest( core_result.records ) );
188 -- RAISE NOTICE ' % were all transcendant ... ', core_result.records;
189 visible_count := visible_count + 1;
191 current_res.id = core_result.id;
192 current_res.rel = core_result.rel;
193 current_res.badges = core_result.badges;
194 current_res.popularity = core_result.popularity;
198 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
202 current_res.record = core_result.records[1];
204 current_res.record = NULL;
207 RETURN NEXT current_res;
213 FROM asset.call_number cn
214 JOIN asset.uri_call_number_map map ON (map.call_number = cn.id)
215 JOIN asset.uri uri ON (map.uri = uri.id)
217 AND cn.label = '##URI##'
219 AND ( param_locations IS NULL OR array_upper(param_locations, 1) IS NULL )
220 AND cn.record IN ( SELECT * FROM unnest( core_result.records ) )
221 AND cn.owning_lib IN ( SELECT * FROM unnest( luri_org_list ) )
225 -- RAISE NOTICE ' % have at least one URI ... ', core_result.records;
226 visible_count := visible_count + 1;
228 current_res.id = core_result.id;
229 current_res.rel = core_result.rel;
230 current_res.badges = core_result.badges;
231 current_res.popularity = core_result.popularity;
235 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
239 current_res.record = core_result.records[1];
241 current_res.record = NULL;
244 RETURN NEXT current_res;
249 IF param_statuses IS NOT NULL AND array_upper(param_statuses, 1) > 0 THEN
252 FROM asset.call_number cn
253 JOIN asset.copy cp ON (cp.call_number = cn.id)
256 AND cp.status IN ( SELECT * FROM unnest( param_statuses ) )
257 AND cn.record IN ( SELECT * FROM unnest( core_result.records ) )
258 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
263 FROM biblio.peer_bib_copy_map pr
264 JOIN asset.copy cp ON (cp.id = pr.target_copy)
266 AND cp.status IN ( SELECT * FROM unnest( param_statuses ) )
267 AND pr.peer_record IN ( SELECT * FROM unnest( core_result.records ) )
268 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
272 -- RAISE NOTICE ' % and multi-home linked records were all status-excluded ... ', core_result.records;
273 excluded_count := excluded_count + 1;
280 IF param_locations IS NOT NULL AND array_upper(param_locations, 1) > 0 THEN
283 FROM asset.call_number cn
284 JOIN asset.copy cp ON (cp.call_number = cn.id)
287 AND cp.location IN ( SELECT * FROM unnest( param_locations ) )
288 AND cn.record IN ( SELECT * FROM unnest( core_result.records ) )
289 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
294 FROM biblio.peer_bib_copy_map pr
295 JOIN asset.copy cp ON (cp.id = pr.target_copy)
297 AND cp.location IN ( SELECT * FROM unnest( param_locations ) )
298 AND pr.peer_record IN ( SELECT * FROM unnest( core_result.records ) )
299 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
303 -- RAISE NOTICE ' % and multi-home linked records were all copy_location-excluded ... ', core_result.records;
304 excluded_count := excluded_count + 1;
311 IF staff IS NULL OR NOT staff THEN
314 FROM asset.opac_visible_copies
315 WHERE circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
316 AND record IN ( SELECT * FROM unnest( core_result.records ) )
321 FROM biblio.peer_bib_copy_map pr
322 JOIN asset.opac_visible_copies cp ON (cp.copy_id = pr.target_copy)
323 WHERE cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
324 AND pr.peer_record IN ( SELECT * FROM unnest( core_result.records ) )
329 -- RAISE NOTICE ' % and multi-home linked records were all visibility-excluded ... ', core_result.records;
330 excluded_count := excluded_count + 1;
338 FROM asset.call_number cn
339 JOIN asset.copy cp ON (cp.call_number = cn.id)
342 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
343 AND cn.record IN ( SELECT * FROM unnest( core_result.records ) )
349 FROM biblio.peer_bib_copy_map pr
350 JOIN asset.copy cp ON (cp.id = pr.target_copy)
352 AND cp.circ_lib IN ( SELECT * FROM unnest( search_org_list ) )
353 AND pr.peer_record IN ( SELECT * FROM unnest( core_result.records ) )
359 FROM asset.call_number cn
360 JOIN asset.copy cp ON (cp.call_number = cn.id)
361 WHERE cn.record IN ( SELECT * FROM unnest( core_result.records ) )
366 -- Recheck Located URI visibility in the case of no "foreign" copies
368 FROM asset.call_number cn
369 JOIN asset.uri_call_number_map map ON (map.call_number = cn.id)
370 JOIN asset.uri uri ON (map.uri = uri.id)
372 AND cn.label = '##URI##'
374 AND cn.record IN ( SELECT * FROM unnest( core_result.records ) )
375 AND cn.owning_lib NOT IN ( SELECT * FROM unnest( luri_org_list ) )
379 -- RAISE NOTICE ' % were excluded for foreign located URIs... ', core_result.records;
380 excluded_count := excluded_count + 1;
384 -- RAISE NOTICE ' % and multi-home linked records were all visibility-excluded ... ', core_result.records;
385 excluded_count := excluded_count + 1;
396 visible_count := visible_count + 1;
398 current_res.id = core_result.id;
399 current_res.rel = core_result.rel;
400 current_res.badges = core_result.badges;
401 current_res.popularity = core_result.popularity;
405 SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
409 current_res.record = core_result.records[1];
411 current_res.record = NULL;
414 RETURN NEXT current_res;
416 IF visible_count % 1000 = 0 THEN
417 -- RAISE NOTICE ' % visible so far ... ', visible_count;
422 current_res.id = NULL;
423 current_res.rel = NULL;
424 current_res.record = NULL;
425 current_res.badges = NULL;
426 current_res.popularity = NULL;
427 current_res.total = total_count;
428 current_res.checked = check_count;
429 current_res.deleted = deleted_count;
430 current_res.visible = visible_count;
431 current_res.excluded = excluded_count;
435 RETURN NEXT current_res;
438 $func$ LANGUAGE PLPGSQL;
440 CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes text[], hits bigint[]) RETURNS TABLE(id integer, value text, count bigint)
442 SELECT id, value, count
444 SELECT mfae.field AS id,
446 COUNT(DISTINCT mfae.source),
448 PARTITION BY mfae.field ORDER BY COUNT(DISTINCT mfae.source) DESC
450 FROM metabib.facet_entry mfae
451 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
452 WHERE mfae.source = ANY ($2)
454 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
459 (SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled),
465 CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
466 SELECT id, value, count FROM (
467 SELECT mfae.field AS id,
469 COUNT(DISTINCT mmrsm.metarecord),
471 PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
473 FROM metabib.facet_entry mfae
474 JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
475 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
476 WHERE mmrsm.metarecord IN (SELECT * FROM unnest($2))
478 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
481 WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
484 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute ( value INT, attr TEXT ) RETURNS INT AS $f$
487 WHEN 'luri_org' THEN 0 -- "b" attr
488 WHEN 'bib_source' THEN 1 -- "b" attr
490 WHEN 'copy_flags' THEN 0 -- "c" attr
491 WHEN 'owning_lib' THEN 1 -- "c" attr
492 WHEN 'circ_lib' THEN 2 -- "c" attr
493 WHEN 'status' THEN 3 -- "c" attr
494 WHEN 'location' THEN 4 -- "c" attr
495 WHEN 'location_group' THEN 5 -- "c" attr
499 /* copy_flags bit positions, LSB-first:
501 0: asset.copy.opac_visible
504 When adding flags, you must update asset.all_visible_flags()
506 Because bib and copy values are stored separately, we can reuse
507 shifts, saving us some space. We could probably take back a bit
508 too, but I'm not sure its worth squeezing that last one out. We'd
509 be left with just 2 slots for copy attrs, rather than 10.
512 $f$ LANGUAGE SQL IMMUTABLE;
514 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_list ( attr TEXT, value INT[] ) RETURNS INT[] AS $f$
515 SELECT ARRAY_AGG(search.calculate_visibility_attribute(x, $1)) FROM UNNEST($2) AS X;
516 $f$ LANGUAGE SQL IMMUTABLE;
518 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_test ( attr TEXT, value INT[], negate BOOL DEFAULT FALSE ) RETURNS TEXT AS $f$
519 SELECT CASE WHEN $3 THEN '!' ELSE '' END || '(' || ARRAY_TO_STRING(search.calculate_visibility_attribute_list($1,$2),'|') || ')';
520 $f$ LANGUAGE SQL IMMUTABLE;
522 CREATE OR REPLACE FUNCTION asset.calculate_copy_visibility_attribute_set ( copy_id BIGINT ) RETURNS INT[] AS $f$
524 copy_row asset.copy%ROWTYPE;
525 lgroup_map asset.copy_location_group_map%ROWTYPE;
526 attr_set INT[] := '{}'::INT[];
528 SELECT * INTO copy_row FROM asset.copy WHERE id = copy_id;
530 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.opac_visible::INT, 'copy_flags');
531 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.circ_lib, 'circ_lib');
532 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.status, 'status');
533 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.location, 'location');
537 search.calculate_visibility_attribute(owning_lib, 'owning_lib')
539 FROM asset.call_number
540 WHERE id = copy_row.call_number;
542 FOR lgroup_map IN SELECT * FROM asset.copy_location_group_map WHERE location = copy_row.location LOOP
543 attr_set := attr_set || search.calculate_visibility_attribute(lgroup_map.lgroup, 'location_group');
548 $f$ LANGUAGE PLPGSQL;
550 CREATE OR REPLACE FUNCTION biblio.calculate_bib_visibility_attribute_set ( bib_id BIGINT, new_source INT DEFAULT NULL, force_source BOOL DEFAULT FALSE ) RETURNS INT[] AS $f$
552 bib_row biblio.record_entry%ROWTYPE;
553 cn_row asset.call_number%ROWTYPE;
554 attr_set INT[] := '{}'::INT[];
556 SELECT * INTO bib_row FROM biblio.record_entry WHERE id = bib_id;
559 IF new_source IS NOT NULL THEN
560 attr_set := attr_set || search.calculate_visibility_attribute(new_source, 'bib_source');
562 ELSIF bib_row.source IS NOT NULL THEN
563 attr_set := attr_set || search.calculate_visibility_attribute(bib_row.source, 'bib_source');
568 FROM asset.call_number
569 WHERE record = bib_id
570 AND label = '##URI##'
573 attr_set := attr_set || search.calculate_visibility_attribute(cn_row.owning_lib, 'luri_org');
578 $f$ LANGUAGE PLPGSQL;
580 CREATE OR REPLACE FUNCTION asset.cache_copy_visibility () RETURNS TRIGGER as $func$
582 ocn asset.call_number%ROWTYPE;
583 ncn asset.call_number%ROWTYPE;
588 SELECT enabled = FALSE INTO dobib FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc';
590 IF TG_TABLE_NAME = 'peer_bib_copy_map' THEN -- Only needs ON INSERT OR DELETE, so handle separately
591 IF TG_OP = 'INSERT' THEN
592 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
595 asset.calculate_copy_visibility_attribute_set(NEW.target_copy)
599 ELSIF TG_OP = 'DELETE' THEN
600 DELETE FROM asset.copy_vis_attr_cache
601 WHERE record = OLD.peer_record AND target_copy = OLD.target_copy;
607 IF TG_OP = 'INSERT' THEN -- Handles ON INSERT. ON UPDATE is below.
608 IF TG_TABLE_NAME IN ('copy', 'unit') THEN
609 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
610 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
613 asset.calculate_copy_visibility_attribute_set(NEW.id)
615 ELSIF TG_TABLE_NAME = 'record_entry' THEN
616 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
617 ELSIF TG_TABLE_NAME = 'call_number' AND NEW.label = '##URI##' AND dobib THEN -- New located URI
618 UPDATE biblio.record_entry
619 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
620 WHERE id = NEW.record;
627 -- handle items first, since with circulation activity
628 -- their statuses change frequently
629 IF TG_TABLE_NAME IN ('copy', 'unit') THEN -- This handles ON UPDATE OR DELETE. ON INSERT above
631 IF TG_OP = 'DELETE' THEN -- Shouldn't get here, normally
632 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
636 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
638 IF OLD.deleted <> NEW.deleted THEN
640 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
642 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
645 asset.calculate_copy_visibility_attribute_set(NEW.id)
650 ELSIF OLD.location <> NEW.location OR
651 OLD.status <> NEW.status OR
652 OLD.opac_visible <> NEW.opac_visible OR
653 OLD.circ_lib <> NEW.circ_lib OR
654 OLD.call_number <> NEW.call_number
656 IF OLD.call_number <> NEW.call_number THEN -- Special check since it's more expensive than the next branch
657 SELECT * INTO ocn FROM asset.call_number cn WHERE id = OLD.call_number;
659 IF ncn.record <> ocn.record THEN
660 -- We have to use a record-specific WHERE clause
661 -- to avoid modifying the entries for peer-bib copies.
662 UPDATE asset.copy_vis_attr_cache
663 SET target_copy = NEW.id,
665 WHERE target_copy = OLD.id
666 AND record = ocn.record;
670 -- Any of these could change visibility, but
671 -- we'll save some queries and not try to calculate
672 -- the change directly. We want to update peer-bib
673 -- entries in this case, unlike above.
674 UPDATE asset.copy_vis_attr_cache
675 SET target_copy = NEW.id,
676 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(NEW.id)
677 WHERE target_copy = OLD.id;
681 ELSIF TG_TABLE_NAME = 'call_number' THEN
683 IF TG_OP = 'DELETE' AND OLD.label = '##URI##' AND dobib THEN -- really deleted located URI, if the delete protection rule is disabled...
684 UPDATE biblio.record_entry
685 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
686 WHERE id = OLD.record;
690 IF OLD.label = '##URI##' AND dobib THEN -- Located URI
691 IF OLD.deleted <> NEW.deleted OR OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
692 UPDATE biblio.record_entry
693 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
694 WHERE id = NEW.record;
696 IF OLD.record <> NEW.record THEN -- maybe on merge?
697 UPDATE biblio.record_entry
698 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
699 WHERE id = OLD.record;
703 ELSIF OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
704 UPDATE asset.copy_vis_attr_cache
705 SET record = NEW.record,
706 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(target_copy)
707 WHERE target_copy IN (SELECT id FROM asset.copy WHERE call_number = NEW.id)
708 AND record = OLD.record;
712 ELSIF TG_TABLE_NAME = 'record_entry' AND OLD.source IS DISTINCT FROM NEW.source THEN -- Only handles ON UPDATE, INSERT above
713 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
718 $func$ LANGUAGE PLPGSQL;
720 CREATE TRIGGER z_opac_vis_mat_view_tgr BEFORE INSERT OR UPDATE ON biblio.record_entry FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
721 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR DELETE ON biblio.peer_bib_copy_map FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
722 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE OR DELETE ON asset.call_number FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
723 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
724 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
725 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
726 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
728 CREATE OR REPLACE FUNCTION asset.all_visible_flags () RETURNS TEXT AS $f$
729 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(1 << x, 'copy_flags')),'&') || ')'
730 FROM GENERATE_SERIES(0,0) AS x; -- increment as new flags are added.
731 $f$ LANGUAGE SQL STABLE;
733 CREATE OR REPLACE FUNCTION asset.visible_orgs (otype TEXT) RETURNS TEXT AS $f$
734 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
737 $f$ LANGUAGE SQL STABLE;
739 CREATE OR REPLACE FUNCTION asset.invisible_orgs (otype TEXT) RETURNS TEXT AS $f$
740 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
742 WHERE NOT opac_visible;
743 $f$ LANGUAGE SQL STABLE;
745 -- Bib-oriented defaults for search
746 CREATE OR REPLACE FUNCTION asset.bib_source_default () RETURNS TEXT AS $f$
747 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'bib_source')),'|') || ')'
748 FROM config.bib_source
750 $f$ LANGUAGE SQL IMMUTABLE;
752 CREATE OR REPLACE FUNCTION asset.luri_org_default () RETURNS TEXT AS $f$
753 SELECT * FROM asset.invisible_orgs('luri_org');
754 $f$ LANGUAGE SQL STABLE;
756 -- Copy-oriented defaults for search
757 CREATE OR REPLACE FUNCTION asset.location_group_default () RETURNS TEXT AS $f$
758 SELECT '!()'::TEXT; -- For now, as there's no way to cause a location group to hide all copies.
760 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location_group')),'|') || ')'
761 FROM asset.copy_location_group
762 WHERE NOT opac_visible;
764 $f$ LANGUAGE SQL IMMUTABLE;
766 CREATE OR REPLACE FUNCTION asset.location_default () RETURNS TEXT AS $f$
767 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location')),'|') || ')'
768 FROM asset.copy_location
769 WHERE NOT opac_visible;
770 $f$ LANGUAGE SQL STABLE;
772 CREATE OR REPLACE FUNCTION asset.status_default () RETURNS TEXT AS $f$
773 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'status')),'|') || ')'
774 FROM config.copy_status
775 WHERE NOT opac_visible;
776 $f$ LANGUAGE SQL STABLE;
778 CREATE OR REPLACE FUNCTION asset.owning_lib_default () RETURNS TEXT AS $f$
779 SELECT * FROM asset.invisible_orgs('owning_lib');
780 $f$ LANGUAGE SQL STABLE;
782 CREATE OR REPLACE FUNCTION asset.circ_lib_default () RETURNS TEXT AS $f$
783 SELECT * FROM asset.invisible_orgs('circ_lib');
784 $f$ LANGUAGE SQL STABLE;
786 CREATE OR REPLACE FUNCTION asset.patron_default_visibility_mask () RETURNS TABLE (b_attrs TEXT, c_attrs TEXT) AS $f$
788 copy_flags TEXT; -- "c" attr
790 owning_lib TEXT; -- "c" attr
791 circ_lib TEXT; -- "c" attr
792 status TEXT; -- "c" attr
793 location TEXT; -- "c" attr
794 location_group TEXT; -- "c" attr
796 luri_org TEXT; -- "b" attr
797 bib_sources TEXT; -- "b" attr
799 bib_tests TEXT := '';
801 copy_flags := asset.all_visible_flags(); -- Will always have at least one
803 owning_lib := NULLIF(asset.owning_lib_default(),'!()');
805 circ_lib := NULLIF(asset.circ_lib_default(),'!()');
806 status := NULLIF(asset.status_default(),'!()');
807 location := NULLIF(asset.location_default(),'!()');
808 location_group := NULLIF(asset.location_group_default(),'!()');
810 -- LURIs will be handled at the perl layer directly
811 -- luri_org := NULLIF(asset.luri_org_default(),'!()');
812 bib_sources := NULLIF(asset.bib_source_default(),'()');
815 IF luri_org IS NOT NULL AND bib_sources IS NOT NULL THEN
816 bib_tests := '('||ARRAY_TO_STRING( ARRAY[luri_org,bib_sources], '|')||')&('||luri_org||')&';
817 ELSIF luri_org IS NOT NULL THEN
818 bib_tests := luri_org || '&';
819 ELSIF bib_sources IS NOT NULL THEN
820 bib_tests := bib_sources || '|';
823 RETURN QUERY SELECT bib_tests,
824 '('||ARRAY_TO_STRING(
825 ARRAY[copy_flags,owning_lib,circ_lib,status,location,location_group]::TEXT[],
829 $f$ LANGUAGE PLPGSQL STABLE ROWS 1;
831 CREATE OR REPLACE FUNCTION metabib.suggest_browse_entries(raw_query_text text, search_class text, headline_opts text, visibility_org integer, query_limit integer, normalization integer)
832 RETURNS TABLE(value text, field integer, buoyant_and_class_match boolean, field_match boolean, field_weight integer, rank real, buoyant boolean, match text)
835 prepared_query_texts TEXT[];
838 opac_visibility_join TEXT;
839 search_class_join TEXT;
843 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
845 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
846 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
848 visibility_org := NULLIF(visibility_org,-1);
849 IF visibility_org IS NOT NULL THEN
850 PERFORM FROM actor.org_unit WHERE id = visibility_org AND parent_ou IS NULL;
852 opac_visibility_join := '';
854 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
856 b_tests := search.calculate_visibility_attribute_test(
858 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(visibility_org))
861 b_tests := search.calculate_visibility_attribute_test(
863 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(visibility_org))
866 opac_visibility_join := '
867 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = x.source)
868 LEFT JOIN biblio.record_entry b ON (b.id = x.source)
869 JOIN vm ON (acvac.vis_attr_vector @@
870 (vm.c_attrs || $$&$$ ||
871 search.calculate_visibility_attribute_test(
873 (SELECT ARRAY_AGG(id) FROM actor.org_unit_descendants($4))
876 ) OR (b.vis_attr_vector @@ $$' || b_tests || '$$::query_int)
880 opac_visibility_join := '';
883 -- The following determines whether we only provide suggestsons matching
884 -- the user's selected search_class, or whether we show other suggestions
885 -- too. The reason for MIN() is that for search_classes like
886 -- 'title|proper|uniform' you would otherwise get multiple rows. The
887 -- implication is that if title as a class doesn't have restrict,
888 -- nor does the proper field, but the uniform field does, you're going
889 -- to get 'false' for your overall evaluation of 'should we restrict?'
890 -- To invert that, change from MIN() to MAX().
894 MIN(cmc.restrict::INT) AS restrict_class,
895 MIN(cmf.restrict::INT) AS restrict_field
896 FROM metabib.search_class_to_registered_components(search_class)
897 AS _registered (field_class TEXT, field INT)
899 config.metabib_class cmc ON (cmc.name = _registered.field_class)
901 config.metabib_field cmf ON (cmf.id = _registered.field);
903 -- evaluate 'should we restrict?'
904 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
905 search_class_join := '
907 metabib.search_class_to_registered_components($2)
908 AS _registered (field_class TEXT, field INT) ON (
909 (_registered.field IS NULL AND
910 _registered.field_class = cmf.field_class) OR
911 (_registered.field = cmf.id)
915 search_class_join := '
917 metabib.search_class_to_registered_components($2)
918 AS _registered (field_class TEXT, field INT) ON (
919 _registered.field_class = cmc.name
924 RETURN QUERY EXECUTE '
925 WITH vm AS ( SELECT * FROM asset.patron_default_visibility_mask() ),
926 mbe AS (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000)
935 TS_HEADLINE(value, $7, $3)
936 FROM (SELECT DISTINCT
939 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
940 _registered.field = cmf.id AS restrict,
942 TS_RANK_CD(mbe.index_vector, $1, $6),
945 FROM metabib.browse_entry_def_map mbedm
946 JOIN mbe ON (mbe.id = mbedm.entry)
947 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
948 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
949 ' || search_class_join || '
950 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
952 ' || opac_visibility_join || '
953 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
955 ' -- sic, repeat the order by clause in the outer select too
957 query, search_class, headline_opts,
958 visibility_org, query_limit, normalization, plain_query
962 -- buoyant AND chosen class = match class
963 -- chosen field = match field
970 $f$ LANGUAGE plpgsql ROWS 10;
972 CREATE OR REPLACE FUNCTION metabib.staged_browse(query text, fields integer[], context_org integer, context_locations integer[], staff boolean, browse_superpage_size integer, count_up_from_zero boolean, result_limit integer, next_pivot_pos integer)
973 RETURNS SETOF metabib.flat_browse_entry_appearance
982 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
983 results_skipped INT := 0;
984 row_counter INT := 0;
989 all_records BIGINT[];
990 all_brecords BIGINT[];
991 all_arecords BIGINT[];
992 superpage_of_records BIGINT[];
997 unauthorized_entry RECORD;
999 IF count_up_from_zero THEN
1006 SELECT x.c_attrs, x.b_attrs INTO c_tests, b_tests FROM asset.patron_default_visibility_mask() x;
1009 -- b_tests supplies its own query_int operator, c_tests does not
1010 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
1012 SELECT ARRAY_AGG(id) INTO c_orgs FROM actor.org_unit_descendants(context_org);
1014 c_tests := c_tests || search.calculate_visibility_attribute_test('circ_lib',c_orgs)
1015 || '&' || search.calculate_visibility_attribute_test('owning_lib',c_orgs);
1017 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
1019 b_tests := b_tests || search.calculate_visibility_attribute_test(
1021 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(context_org) x)
1024 b_tests := b_tests || search.calculate_visibility_attribute_test(
1026 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(context_org) x)
1030 IF context_locations THEN
1031 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
1032 c_tests := c_tests || search.calculate_visibility_attribute_test('location',context_locations);
1035 OPEN curs NO SCROLL FOR EXECUTE query;
1038 FETCH curs INTO rec;
1040 IF result_row.pivot_point IS NOT NULL THEN
1041 RETURN NEXT result_row;
1047 SELECT INTO unauthorized_entry *
1048 FROM metabib.browse_entry_simple_heading_map mbeshm
1049 INNER JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
1050 INNER JOIN authority.control_set_authority_field acsaf ON ( acsaf.id = ash.atag )
1051 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
1052 WHERE mbeshm.entry = rec.id
1053 AND ahf.heading_purpose = 'variant';
1055 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
1056 IF (unauthorized_entry.record IS NOT NULL) THEN
1057 --unauthorized term belongs to an auth linked to a bib?
1058 SELECT INTO all_arecords, result_row.sees, afields
1059 ARRAY_AGG(DISTINCT abl.bib),
1060 STRING_AGG(DISTINCT abl.authority::TEXT, $$,$$),
1061 ARRAY_AGG(DISTINCT map.metabib_field)
1062 FROM authority.bib_linking abl
1063 INNER JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
1064 map.authority_field = unauthorized_entry.atag
1065 AND map.metabib_field = ANY(fields)
1067 WHERE abl.authority = unauthorized_entry.record;
1069 --do usual procedure
1070 SELECT INTO all_arecords, result_row.sees, afields
1071 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
1072 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
1073 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
1075 FROM metabib.browse_entry_simple_heading_map mbeshm
1076 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
1077 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
1078 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
1079 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
1080 ash.atag = map.authority_field
1081 AND map.metabib_field = ANY(fields)
1083 JOIN authority.control_set_authority_field acsaf ON (
1084 map.authority_field = acsaf.id
1086 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
1087 WHERE mbeshm.entry = rec.id
1088 AND ahf.heading_purpose = 'variant';
1092 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
1093 SELECT INTO all_brecords, result_row.authorities, bfields
1094 ARRAY_AGG(DISTINCT source),
1095 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
1096 ARRAY_AGG(DISTINCT def)
1097 FROM metabib.browse_entry_def_map
1098 WHERE entry = rec.id
1099 AND def = ANY(fields);
1101 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
1103 result_row.sources := 0;
1104 result_row.asources := 0;
1106 -- Bib-linked vis checking
1107 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
1109 SELECT INTO result_row.sources COUNT(DISTINCT b.id)
1110 FROM biblio.record_entry b
1111 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
1112 WHERE b.id = ANY(all_brecords[1:browse_superpage_size])
1114 acvac.vis_attr_vector @@ c_tests::query_int
1115 OR b.vis_attr_vector @@ b_tests::query_int
1118 result_row.accurate := TRUE;
1122 -- Authority-linked vis checking
1123 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
1125 SELECT INTO result_row.asources COUNT(DISTINCT b.id)
1126 FROM biblio.record_entry b
1127 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
1128 WHERE b.id = ANY(all_arecords[1:browse_superpage_size])
1130 acvac.vis_attr_vector @@ c_tests::query_int
1131 OR b.vis_attr_vector @@ b_tests::query_int
1134 result_row.aaccurate := TRUE;
1138 IF result_row.sources > 0 OR result_row.asources > 0 THEN
1140 -- The function that calls this function needs row_number in order
1141 -- to correctly order results from two different runs of this
1143 result_row.row_number := row_number;
1145 -- Now, if row_counter is still less than limit, return a row. If
1146 -- not, but it is less than next_pivot_pos, continue on without
1147 -- returning actual result rows until we find
1148 -- that next pivot, and return it.
1150 IF row_counter < result_limit THEN
1151 result_row.browse_entry := rec.id;
1152 result_row.value := rec.value;
1154 RETURN NEXT result_row;
1156 result_row.browse_entry := NULL;
1157 result_row.authorities := NULL;
1158 result_row.fields := NULL;
1159 result_row.value := NULL;
1160 result_row.sources := NULL;
1161 result_row.sees := NULL;
1162 result_row.accurate := NULL;
1163 result_row.aaccurate := NULL;
1164 result_row.pivot_point := rec.id;
1166 IF row_counter >= next_pivot_pos THEN
1167 RETURN NEXT result_row;
1172 IF count_up_from_zero THEN
1173 row_number := row_number + 1;
1175 row_number := row_number - 1;
1178 -- row_counter is different from row_number.
1179 -- It simply counts up from zero so that we know when
1180 -- we've reached our limit.
1181 row_counter := row_counter + 1;
1185 $f$ LANGUAGE plpgsql ROWS 10;
1187 CREATE OR REPLACE FUNCTION metabib.browse(search_field integer[], browse_term text, context_org integer DEFAULT NULL::integer, context_loc_group integer DEFAULT NULL::integer, staff boolean DEFAULT false, pivot_id bigint DEFAULT NULL::bigint, result_limit integer DEFAULT 10)
1188 RETURNS SETOF metabib.flat_browse_entry_appearance
1194 pivot_sort_value TEXT;
1195 pivot_sort_fallback TEXT;
1196 context_locations INT[];
1197 browse_superpage_size INT;
1198 results_skipped INT := 0;
1202 forward_to_pivot INT;
1204 -- First, find the pivot if we were given a browse term but not a pivot.
1205 IF pivot_id IS NULL THEN
1206 pivot_id := metabib.browse_pivot(search_field, browse_term);
1209 SELECT INTO pivot_sort_value, pivot_sort_fallback
1210 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
1212 -- Bail if we couldn't find a pivot.
1213 IF pivot_sort_value IS NULL THEN
1217 -- Transform the context_loc_group argument (if any) (logc at the
1218 -- TPAC layer) into a form we'll be able to use.
1219 IF context_loc_group IS NOT NULL THEN
1220 SELECT INTO context_locations ARRAY_AGG(location)
1221 FROM asset.copy_location_group_map
1222 WHERE lgroup = context_loc_group;
1225 -- Get the configured size of browse superpages.
1226 SELECT INTO browse_superpage_size COALESCE(value::INT,100) -- NULL ok
1227 FROM config.global_flag
1228 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
1230 -- First we're going to search backward from the pivot, then we're going
1231 -- to search forward. In each direction, we need two limits. At the
1232 -- lesser of the two limits, we delineate the edge of the result set
1233 -- we're going to return. At the greater of the two limits, we find the
1234 -- pivot value that would represent an offset from the current pivot
1235 -- at a distance of one "page" in either direction, where a "page" is a
1236 -- result set of the size specified in the "result_limit" argument.
1238 -- The two limits in each direction make four derived values in total,
1239 -- and we calculate them now.
1240 back_limit := CEIL(result_limit::FLOAT / 2);
1241 back_to_pivot := result_limit;
1242 forward_limit := result_limit / 2;
1243 forward_to_pivot := result_limit - 1;
1245 -- This is the meat of the SQL query that finds browse entries. We'll
1246 -- pass this to a function which uses it with a cursor, so that individual
1247 -- rows may be fetched in a loop until some condition is satisfied, without
1248 -- waiting for a result set of fixed size to be collected all at once.
1253 FROM metabib.browse_entry mbe
1255 EXISTS ( -- are there any bibs using this mbe via the requested fields?
1257 FROM metabib.browse_entry_def_map mbedm
1258 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
1259 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
1261 FROM metabib.browse_entry_simple_heading_map mbeshm
1262 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
1263 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
1264 ash.atag = map.authority_field
1265 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
1267 JOIN authority.control_set_authority_field acsaf ON (
1268 map.authority_field = acsaf.id
1270 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
1271 WHERE mbeshm.entry = mbe.id
1272 AND ahf.heading_purpose IN (' || $$'variant'$$ || ')
1273 -- and authority that variant is coming from is linked to a bib
1276 FROM metabib.browse_entry_def_map mbedm2
1277 WHERE mbedm2.authority = ash.record AND mbedm2.def = ANY(' || quote_literal(search_field) || ')
1283 -- This is the variant of the query for browsing backward.
1284 back_query := core_query ||
1285 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
1286 ' ORDER BY mbe.sort_value DESC, mbe.value DESC LIMIT 1000';
1288 -- This variant browses forward.
1289 forward_query := core_query ||
1290 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
1291 ' ORDER BY mbe.sort_value, mbe.value LIMIT 1000';
1293 -- We now call the function which applies a cursor to the provided
1294 -- queries, stopping at the appropriate limits and also giving us
1295 -- the next page's pivot.
1297 SELECT * FROM metabib.staged_browse(
1298 back_query, search_field, context_org, context_locations,
1299 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
1301 SELECT * FROM metabib.staged_browse(
1302 forward_query, search_field, context_org, context_locations,
1303 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
1304 ) ORDER BY row_number DESC;
1307 $f$ LANGUAGE plpgsql ROWS 10;
1309 CREATE OR REPLACE FUNCTION metabib.browse(
1312 context_org INT DEFAULT NULL,
1313 context_loc_group INT DEFAULT NULL,
1314 staff BOOL DEFAULT FALSE,
1315 pivot_id BIGINT DEFAULT NULL,
1316 result_limit INT DEFAULT 10
1317 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
1319 RETURN QUERY SELECT * FROM metabib.browse(
1320 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
1321 FROM config.metabib_field WHERE field_class = search_class),
1330 $p$ LANGUAGE PLPGSQL ROWS 10;
1332 CREATE OR REPLACE VIEW search.best_tsconfig AS
1334 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
1335 FROM config.metabib_field m
1336 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
1337 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND f.index_weight = 'C');
1339 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
1341 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
1344 field_list INT[] DEFAULT '{}'::INT[],
1345 css_class TEXT DEFAULT 'oils_SH',
1346 hl_all BOOL DEFAULT TRUE,
1347 minwords INT DEFAULT 5,
1348 maxwords INT DEFAULT 25,
1349 shortwords INT DEFAULT 0,
1350 maxfrags INT DEFAULT 0,
1351 delimiter TEXT DEFAULT ' ... '
1352 ) RETURNS SETOF search.highlight_result AS $f$
1355 v_css_class TEXT := css_class;
1356 v_delimiter TEXT := delimiter;
1357 v_field_list INT[] := field_list;
1360 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
1361 v_delimiter := ' ... ';
1365 opts := opts || 'MinWords=' || minwords;
1366 opts := opts || ', MaxWords=' || maxwords;
1367 opts := opts || ', ShortWords=' || shortwords;
1368 opts := opts || ', MaxFragments=' || maxfrags;
1369 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
1371 opts := opts || 'HighlightAll=TRUE';
1374 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
1375 v_css_class := 'oils_SH';
1378 opts := opts || $$, StopSel=</b>, StartSel="<b class='$$ || v_css_class; -- "
1380 IF v_field_list = '{}'::INT[] THEN
1381 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
1388 evergreen.escape_for_html(de.value) AS value,
1390 ts_config::REGCONFIG,
1391 evergreen.escape_for_html(de.value),
1392 $$ || quote_literal(tsq) || $$,
1393 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
1395 FROM metabib.display_entry de
1396 JOIN config.metabib_field mf ON (mf.id = de.field)
1397 JOIN search.best_tsconfig t ON (t.id = de.field)
1398 WHERE de.source = $2
1399 AND field = ANY ($3)
1402 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
1404 $f$ LANGUAGE PLPGSQL;
1406 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
1407 SELECT regexp_replace(
1423 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
1425 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
1427 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
1428 css_class TEXT DEFAULT 'oils_SH',
1429 hl_all BOOL DEFAULT TRUE,
1430 minwords INT DEFAULT 5,
1431 maxwords INT DEFAULT 25,
1432 shortwords INT DEFAULT 0,
1433 maxfrags INT DEFAULT 0,
1434 delimiter TEXT DEFAULT ' ... '
1435 ) RETURNS SETOF search.highlight_result AS $f$
1443 IF (tsq_map ILIKE 'hstore%') THEN
1444 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
1446 tsq_hstore := tsq_map::HSTORE;
1449 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore::HSTORE) LOOP
1450 SELECT ARRAY_AGG(unnest::INT) INTO afields
1451 FROM unnest(regexp_split_to_array(fields,','));
1452 seen := seen || afields;
1455 SELECT * FROM search.highlight_display_fields_impl(
1456 rid, tsq, afields, css_class, hl_all,minwords,
1457 maxwords, shortwords, maxfrags, delimiter
1465 evergreen.escape_for_html(value) AS value,
1466 evergreen.escape_for_html(value) AS highlight
1467 FROM metabib.display_entry
1469 AND NOT (field = ANY (seen));
1471 $f$ LANGUAGE PLPGSQL ROWS 10;
1473 -- SymSpell implementation follows
1475 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
1476 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
1478 ) RETURNS TEXT[] AS $F$
1479 SELECT NULLIF(ARRAY(
1480 SELECT * FROM UNNEST($1) x
1482 SELECT * FROM UNNEST($2) y
1486 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1487 use String::KeyboardDistance qw(:all);
1488 return qwerty_keyboard_distance(@_);
1489 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1491 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1492 use String::KeyboardDistance qw(:all);
1493 return qwerty_keyboard_distance_match(@_);
1494 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1496 CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
1497 use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
1498 return xs_edistance(@_);
1499 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1501 CREATE TABLE search.symspell_dictionary (
1502 keyword_count INT NOT NULL DEFAULT 0,
1503 title_count INT NOT NULL DEFAULT 0,
1504 author_count INT NOT NULL DEFAULT 0,
1505 subject_count INT NOT NULL DEFAULT 0,
1506 series_count INT NOT NULL DEFAULT 0,
1507 identifier_count INT NOT NULL DEFAULT 0,
1509 prefix_key TEXT PRIMARY KEY,
1511 keyword_suggestions TEXT[],
1512 title_suggestions TEXT[],
1513 author_suggestions TEXT[],
1514 subject_suggestions TEXT[],
1515 series_suggestions TEXT[],
1516 identifier_suggestions TEXT[]
1517 ) WITH (fillfactor = 80);
1519 CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
1520 RETURNS SETOF TEXT AS $F$
1521 SELECT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1522 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1524 -- This version does not preserve input word order!
1525 CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
1526 RETURNS SETOF TEXT AS $F$
1527 SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1528 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1530 CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
1537 woChars := regexp_split_to_array(withoutCase,'');
1538 FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
1539 IF curr = evergreen.uppercase(curr) THEN
1540 woChars[ind] := evergreen.uppercase(woChars[ind]);
1544 RETURN ARRAY_TO_STRING(woChars,'');
1546 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1548 CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
1552 ) RETURNS TEXT[] AS $F$
1555 list TEXT[] := '{}';
1556 sublist TEXT[] := '{}';
1558 FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
1559 item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
1560 IF NOT list @> ARRAY[item] THEN
1561 list := item || list;
1562 IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
1563 sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
1569 RETURN evergreen.text_array_merge_unique(list, sublist);
1571 RETURN list || sublist;
1574 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1576 -- DROP TYPE search.symspell_lookup_output CASCADE;
1577 CREATE TYPE search.symspell_lookup_output AS (
1579 suggestion_count INT,
1581 pg_trgm_sim NUMERIC,
1582 qwerty_kb_match NUMERIC,
1583 soundex_sim NUMERIC,
1587 prefix_key_count INT,
1592 CREATE OR REPLACE FUNCTION search.symspell_lookup(
1595 verbosity integer DEFAULT 2,
1596 xfer_case boolean DEFAULT false,
1597 count_threshold integer DEFAULT 1,
1598 soundex_weight integer DEFAULT 0,
1599 pg_trgm_weight integer DEFAULT 0,
1600 kbdist_weight integer DEFAULT 0
1601 ) RETURNS SETOF search.symspell_lookup_output LANGUAGE plpgsql AS $function$
1607 edit_list TEXT[] := '{}';
1608 seen_list TEXT[] := '{}';
1609 output search.symspell_lookup_output;
1610 output_list search.symspell_lookup_output[];
1618 smallest_ed INT := -1;
1623 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1624 prefix_length := COALESCE(prefix_length, 6);
1626 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1627 maxED := COALESCE(maxED, 3);
1629 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
1631 -- Common case exact match test for preformance
1632 IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
1634 'SELECT '||search_class||'_suggestions AS suggestions,
1635 '||search_class||'_count AS count,
1637 FROM search.symspell_dictionary
1638 WHERE prefix_key = $1
1639 AND '||search_class||'_count >= $2
1640 AND '||search_class||'_suggestions @> ARRAY[$1]'
1641 INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
1642 IF entry.prefix_key IS NOT NULL THEN
1643 output.lev_distance := 0; -- definitionally
1644 output.prefix_key := entry.prefix_key;
1645 output.prefix_key_count := entry.count;
1646 output.suggestion_count := entry.count;
1647 output.input := word_list[1];
1649 output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
1651 output.suggestion := entry.prefix_key;
1653 output.norm_input := entry.prefix_key;
1654 output.qwerty_kb_match := 1;
1655 output.pg_trgm_sim := 1;
1656 output.soundex_sim := 1;
1663 FOREACH word IN ARRAY word_list LOOP
1665 input := evergreen.lowercase(word);
1666 i_len := CHARACTER_LENGTH(input);
1669 IF CHARACTER_LENGTH(input) > prefix_length THEN
1670 prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
1671 edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
1673 edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
1676 SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
1678 output_list := '{}';
1683 FOREACH entry_key IN ARRAY edit_list LOOP
1685 IF global_ed IS NOT NULL THEN
1686 smallest_ed := global_ed;
1689 FOR entry IN EXECUTE
1690 'SELECT '||search_class||'_suggestions AS suggestions,
1691 '||search_class||'_count AS count,
1693 FROM search.symspell_dictionary
1694 WHERE prefix_key = $1
1695 AND '||search_class||'_suggestions IS NOT NULL'
1701 ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
1702 ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
1706 FROM UNNEST(entry.suggestions) s
1707 WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
1708 AND NOT seen_list @> ARRAY[s];
1710 CONTINUE WHEN good_suggs IS NULL;
1712 FOR sugg, output.suggestion_count IN EXECUTE
1713 'SELECT prefix_key, '||search_class||'_count
1714 FROM search.symspell_dictionary
1715 WHERE prefix_key = ANY ($1)
1716 AND '||search_class||'_count >= $2'
1717 USING AKEYS(good_suggs), COALESCE(count_threshold,1)
1720 output.lev_distance := good_suggs->sugg;
1721 seen_list := seen_list || sugg;
1723 -- Track the smallest edit distance among suggestions from this prefix key.
1724 IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
1725 smallest_ed := output.lev_distance;
1728 -- Track the smallest edit distance for all prefix keys for this word.
1729 IF global_ed IS NULL OR smallest_ed < global_ed THEN
1730 global_ed = smallest_ed;
1731 -- And if low verbosity, ignore suggs with a larger distance from here on.
1732 IF verbosity <= 1 THEN
1733 l_maxED := global_ed;
1737 -- Lev distance is our main similarity measure. While
1738 -- trgm or soundex similarity could be the main filter,
1739 -- Lev is both language agnostic and faster.
1741 -- Here we will skip suggestions that have a longer edit distance
1742 -- than the shortest we've already found. This is simply an
1743 -- optimization that allows us to avoid further processing
1744 -- of this entry. It would be filtered out later.
1745 CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
1747 -- If we have an exact match on the suggestion key we can also avoid
1748 -- some function calls.
1749 IF output.lev_distance = 0 THEN
1750 output.qwerty_kb_match := 1;
1751 output.pg_trgm_sim := 1;
1752 output.soundex_sim := 1;
1754 IF kbdist_weight THEN
1755 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
1757 output.qwerty_kb_match := 0;
1759 IF pg_trgm_weight THEN
1760 output.pg_trgm_sim := similarity(input, sugg);
1762 output.pg_trgm_sim := 0;
1764 IF soundex_weight THEN
1765 output.soundex_sim := difference(input, sugg) / 4.0;
1767 output.soundex_sim := 0;
1771 -- Fill in some fields
1772 IF xfer_case AND input <> word THEN
1773 output.suggestion := search.symspell_transfer_casing(word, sugg);
1775 output.suggestion := sugg;
1777 output.prefix_key := entry.prefix_key;
1778 output.prefix_key_count := entry.count;
1779 output.input := word;
1780 output.norm_input := input;
1781 output.word_pos := w_pos;
1783 -- We can't "cache" a set of generated records directly, so
1784 -- here we build up an array of search.symspell_lookup_output
1785 -- records that we can revivicate later as a table using UNNEST().
1786 output_list := output_list || output;
1788 EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
1789 CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
1791 END LOOP; -- loop over suggestions
1792 END LOOP; -- loop over entries
1793 END LOOP; -- loop over entry_keys
1795 -- Now we're done examining this word
1796 IF verbosity = 0 THEN
1797 -- Return the "best" suggestion from the smallest edit
1798 -- distance group. We define best based on the weighting
1799 -- of the non-lev similarity measures and use the suggestion
1800 -- use count to break ties.
1802 SELECT * FROM UNNEST(output_list)
1803 ORDER BY lev_distance,
1804 (soundex_sim * COALESCE(soundex_weight,0))
1805 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1806 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1807 suggestion_count DESC
1809 ELSIF verbosity = 1 THEN
1810 -- Return all suggestions from the smallest
1811 -- edit distance group.
1813 SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
1814 ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
1815 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1816 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1817 suggestion_count DESC;
1818 ELSIF verbosity = 2 THEN
1819 -- Return everything we find, along with relevant stats
1821 SELECT * FROM UNNEST(output_list)
1822 ORDER BY lev_distance,
1823 (soundex_sim * COALESCE(soundex_weight,0))
1824 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1825 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1826 suggestion_count DESC;
1827 ELSIF verbosity = 3 THEN
1828 -- Return everything we find from the two smallest edit distance groups
1830 SELECT * FROM UNNEST(output_list)
1831 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
1832 ORDER BY lev_distance,
1833 (soundex_sim * COALESCE(soundex_weight,0))
1834 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1835 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1836 suggestion_count DESC;
1837 ELSIF verbosity = 4 THEN
1838 -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
1840 SELECT * FROM UNNEST(output_list)
1841 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
1842 ORDER BY lev_distance,
1843 (soundex_sim * COALESCE(soundex_weight,0))
1844 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1845 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1846 suggestion_count DESC;
1848 END LOOP; -- loop over words
1852 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
1855 no_limit BOOL DEFAULT FALSE,
1856 prefix_length INT DEFAULT 6,
1858 ) RETURNS SETOF search.symspell_dictionary AS $F$
1863 entry search.symspell_dictionary%ROWTYPE;
1867 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
1868 key := SUBSTRING(key FROM 1 FOR prefix_length);
1869 key_list := ARRAY[raw_input, key];
1871 key_list := ARRAY[key];
1874 FOREACH del_key IN ARRAY key_list LOOP
1876 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1878 entry.prefix_key := del_key;
1880 entry.keyword_count := 0;
1881 entry.title_count := 0;
1882 entry.author_count := 0;
1883 entry.subject_count := 0;
1884 entry.series_count := 0;
1885 entry.identifier_count := 0;
1887 entry.keyword_suggestions := '{}';
1888 entry.title_suggestions := '{}';
1889 entry.author_suggestions := '{}';
1890 entry.subject_suggestions := '{}';
1891 entry.series_suggestions := '{}';
1892 entry.identifier_suggestions := '{}';
1894 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1895 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1896 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1897 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1898 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1899 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1900 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1902 IF del_key = raw_input THEN
1903 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
1904 IF source_class = 'title' THEN entry.title_count := 1; END IF;
1905 IF source_class = 'author' THEN entry.author_count := 1; END IF;
1906 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
1907 IF source_class = 'series' THEN entry.series_count := 1; END IF;
1908 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
1914 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
1917 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1918 -- skip suggestions that are already too long for the prefix key
1919 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
1921 entry.keyword_suggestions := '{}';
1922 entry.title_suggestions := '{}';
1923 entry.author_suggestions := '{}';
1924 entry.subject_suggestions := '{}';
1925 entry.series_suggestions := '{}';
1926 entry.identifier_suggestions := '{}';
1928 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
1929 IF source_class = 'title' THEN entry.title_count := 0; END IF;
1930 IF source_class = 'author' THEN entry.author_count := 0; END IF;
1931 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
1932 IF source_class = 'series' THEN entry.series_count := 0; END IF;
1933 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
1935 entry.prefix_key := del_key;
1937 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1938 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1939 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1940 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1941 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1942 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1943 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1949 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1951 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
1954 old_input TEXT DEFAULT NULL,
1955 include_phrases BOOL DEFAULT FALSE
1956 ) RETURNS SETOF search.symspell_dictionary AS $F$
1963 entry search.symspell_dictionary;
1965 IF full_input IS NOT NULL THEN
1966 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1967 prefix_length := COALESCE(prefix_length, 6);
1969 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1970 maxED := COALESCE(maxED, 3);
1972 input := evergreen.lowercase(full_input);
1973 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
1974 IF word_list IS NULL THEN
1978 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
1979 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
1982 FOREACH word IN ARRAY word_list LOOP
1983 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
1984 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1985 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
1989 IF old_input IS NOT NULL THEN
1990 input := evergreen.lowercase(old_input);
1992 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
1993 -- similarly skip words that have 5 or more digits here to
1994 -- avoid adding erroneous prefix deletion entries to the dictionary
1995 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1996 entry.prefix_key := word;
1998 entry.keyword_count := 0;
1999 entry.title_count := 0;
2000 entry.author_count := 0;
2001 entry.subject_count := 0;
2002 entry.series_count := 0;
2003 entry.identifier_count := 0;
2005 entry.keyword_suggestions := '{}';
2006 entry.title_suggestions := '{}';
2007 entry.author_suggestions := '{}';
2008 entry.subject_suggestions := '{}';
2009 entry.series_suggestions := '{}';
2010 entry.identifier_suggestions := '{}';
2012 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
2013 IF source_class = 'title' THEN entry.title_count := -1; END IF;
2014 IF source_class = 'author' THEN entry.author_count := -1; END IF;
2015 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
2016 IF source_class = 'series' THEN entry.series_count := -1; END IF;
2017 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
2023 $F$ LANGUAGE PLPGSQL;
2025 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
2028 old_input TEXT DEFAULT NULL,
2029 include_phrases BOOL DEFAULT FALSE
2030 ) RETURNS SETOF search.symspell_dictionary AS $F$
2033 conflict_entry RECORD;
2036 IF full_input = old_input THEN -- neither NULL, and are the same
2040 FOR new_entry IN EXECUTE $q$
2044 FROM (SELECT prefix_key,
2045 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
2046 SUM($q$ || source_class || $q$_count) count
2047 FROM search.symspell_build_entries($1, $2, $3, $4)
2049 $q$ USING full_input, source_class, old_input, include_phrases
2053 $q$ || source_class || $q$_suggestions suggestions,
2054 $q$ || source_class || $q$_count count
2055 FROM search.symspell_dictionary
2056 WHERE prefix_key = $1 $q$
2058 USING new_entry.prefix_key;
2060 IF new_entry.count <> 0 THEN -- Real word, and count changed
2061 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
2062 IF conflict_entry.count > 0 THEN -- it's a real word
2063 RETURN QUERY EXECUTE $q$
2064 UPDATE search.symspell_dictionary
2065 SET $q$ || source_class || $q$_count = $2
2066 WHERE prefix_key = $1
2068 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
2069 ELSE -- it was a prefix key or delete-emptied word before
2070 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
2071 RETURN QUERY EXECUTE $q$
2072 UPDATE search.symspell_dictionary
2073 SET $q$ || source_class || $q$_count = $2
2074 WHERE prefix_key = $1
2076 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
2077 ELSE -- new suggestion!
2078 RETURN QUERY EXECUTE $q$
2079 UPDATE search.symspell_dictionary
2080 SET $q$ || source_class || $q$_count = $2,
2081 $q$ || source_class || $q$_suggestions = $3
2082 WHERE prefix_key = $1
2084 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
2088 -- We keep the on-conflict clause just in case...
2089 RETURN QUERY EXECUTE $q$
2090 INSERT INTO search.symspell_dictionary AS d (
2091 $q$ || source_class || $q$_count,
2093 $q$ || source_class || $q$_suggestions
2094 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
2095 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
2096 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
2098 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
2100 ELSE -- key only, or no change
2101 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
2102 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
2103 RETURN QUERY EXECUTE $q$
2104 UPDATE search.symspell_dictionary
2105 SET $q$ || source_class || $q$_suggestions = $2
2106 WHERE prefix_key = $1
2108 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
2111 RETURN QUERY EXECUTE $q$
2112 INSERT INTO search.symspell_dictionary AS d (
2113 $q$ || source_class || $q$_count,
2115 $q$ || source_class || $q$_suggestions
2116 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
2117 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
2119 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
2124 $F$ LANGUAGE PLPGSQL;
2126 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
2129 new_value TEXT := NULL;
2130 old_value TEXT := NULL;
2132 search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
2134 IF TG_OP IN ('INSERT', 'UPDATE') THEN
2135 new_value := NEW.value;
2138 IF TG_OP IN ('DELETE', 'UPDATE') THEN
2139 old_value := OLD.value;
2142 PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
2144 RETURN NULL; -- always fired AFTER
2146 $f$ LANGUAGE PLPGSQL;
2148 CREATE TRIGGER maintain_symspell_entries_tgr
2149 AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
2150 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2152 CREATE TRIGGER maintain_symspell_entries_tgr
2153 AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
2154 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2156 CREATE TRIGGER maintain_symspell_entries_tgr
2157 AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
2158 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2160 CREATE TRIGGER maintain_symspell_entries_tgr
2161 AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
2162 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2164 CREATE TRIGGER maintain_symspell_entries_tgr
2165 AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
2166 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2168 CREATE TRIGGER maintain_symspell_entries_tgr
2169 AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
2170 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();