2 * Copyright (C) 2007-2010 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS search CASCADE;
24 CREATE OR REPLACE FUNCTION evergreen.pg_statistics (tab TEXT, col TEXT) RETURNS TABLE(element TEXT, frequency INT) AS $$
26 -- This query will die on PG < 9.2, but the function can be created. We just won't use it where we can't.
30 FROM (SELECT ROW_NUMBER() OVER (),
32 FROM (SELECT UNNEST(most_common_elem_freqs) AS f
38 JOIN (SELECT ROW_NUMBER() OVER (),
40 FROM (SELECT UNNEST(most_common_elems::text::text[]) AS e
45 ) AS elems USING (row_number);
49 CREATE OR REPLACE FUNCTION evergreen.query_int_wrapper (INT[],TEXT) RETURNS BOOL AS $$
51 RETURN $1 @@ $2::query_int;
53 $$ LANGUAGE PLPGSQL STABLE;
55 CREATE TABLE search.relevance_adjustment (
56 id SERIAL PRIMARY KEY,
57 active BOOL NOT NULL DEFAULT TRUE,
58 field INT NOT NULL REFERENCES config.metabib_field (id) DEFERRABLE INITIALLY DEFERRED,
59 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
60 multiplier NUMERIC NOT NULL DEFAULT 1.0
62 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
64 CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes text[], hits bigint[]) RETURNS TABLE(id integer, value text, count bigint)
66 SELECT id, value, count
68 SELECT mfae.field AS id,
70 COUNT(DISTINCT mfae.source),
72 PARTITION BY mfae.field ORDER BY COUNT(DISTINCT mfae.source) DESC
74 FROM metabib.facet_entry mfae
75 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
76 WHERE mfae.source = ANY ($2)
78 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
83 (SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled),
89 CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
90 SELECT id, value, count FROM (
91 SELECT mfae.field AS id,
93 COUNT(DISTINCT mmrsm.metarecord),
95 PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
97 FROM metabib.facet_entry mfae
98 JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
99 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
100 WHERE mmrsm.metarecord IN (SELECT * FROM unnest($2))
102 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
105 WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
108 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute ( value INT, attr TEXT ) RETURNS INT AS $f$
111 WHEN 'luri_org' THEN 0 -- "b" attr
112 WHEN 'bib_source' THEN 1 -- "b" attr
114 WHEN 'copy_flags' THEN 0 -- "c" attr
115 WHEN 'owning_lib' THEN 1 -- "c" attr
116 WHEN 'circ_lib' THEN 2 -- "c" attr
117 WHEN 'status' THEN 3 -- "c" attr
118 WHEN 'location' THEN 4 -- "c" attr
119 WHEN 'location_group' THEN 5 -- "c" attr
123 /* copy_flags bit positions, LSB-first:
125 0: asset.copy.opac_visible
128 When adding flags, you must update asset.all_visible_flags()
130 Because bib and copy values are stored separately, we can reuse
131 shifts, saving us some space. We could probably take back a bit
132 too, but I'm not sure its worth squeezing that last one out. We'd
133 be left with just 2 slots for copy attrs, rather than 10.
136 $f$ LANGUAGE SQL IMMUTABLE;
138 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_list ( attr TEXT, value INT[] ) RETURNS INT[] AS $f$
139 SELECT ARRAY_AGG(search.calculate_visibility_attribute(x, $1)) FROM UNNEST($2) AS X;
140 $f$ LANGUAGE SQL IMMUTABLE;
142 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_test ( attr TEXT, value INT[], negate BOOL DEFAULT FALSE ) RETURNS TEXT AS $f$
143 SELECT CASE WHEN $3 THEN '!' ELSE '' END || '(' || ARRAY_TO_STRING(search.calculate_visibility_attribute_list($1,$2),'|') || ')';
144 $f$ LANGUAGE SQL IMMUTABLE;
146 CREATE OR REPLACE FUNCTION asset.calculate_copy_visibility_attribute_set ( copy_id BIGINT ) RETURNS INT[] AS $f$
148 copy_row asset.copy%ROWTYPE;
149 lgroup_map asset.copy_location_group_map%ROWTYPE;
150 attr_set INT[] := '{}'::INT[];
152 SELECT * INTO copy_row FROM asset.copy WHERE id = copy_id;
154 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.opac_visible::INT, 'copy_flags');
155 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.circ_lib, 'circ_lib');
156 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.status, 'status');
157 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.location, 'location');
161 search.calculate_visibility_attribute(owning_lib, 'owning_lib')
163 FROM asset.call_number
164 WHERE id = copy_row.call_number;
166 FOR lgroup_map IN SELECT * FROM asset.copy_location_group_map WHERE location = copy_row.location LOOP
167 attr_set := attr_set || search.calculate_visibility_attribute(lgroup_map.lgroup, 'location_group');
172 $f$ LANGUAGE PLPGSQL;
174 CREATE OR REPLACE FUNCTION biblio.calculate_bib_visibility_attribute_set ( bib_id BIGINT, new_source INT DEFAULT NULL, force_source BOOL DEFAULT FALSE ) RETURNS INT[] AS $f$
176 bib_row biblio.record_entry%ROWTYPE;
177 cn_row asset.call_number%ROWTYPE;
178 attr_set INT[] := '{}'::INT[];
180 SELECT * INTO bib_row FROM biblio.record_entry WHERE id = bib_id;
183 IF new_source IS NOT NULL THEN
184 attr_set := attr_set || search.calculate_visibility_attribute(new_source, 'bib_source');
186 ELSIF bib_row.source IS NOT NULL THEN
187 attr_set := attr_set || search.calculate_visibility_attribute(bib_row.source, 'bib_source');
192 FROM asset.call_number
193 WHERE record = bib_id
194 AND label = '##URI##'
197 attr_set := attr_set || search.calculate_visibility_attribute(cn_row.owning_lib, 'luri_org');
202 $f$ LANGUAGE PLPGSQL;
204 CREATE OR REPLACE FUNCTION asset.cache_copy_visibility () RETURNS TRIGGER as $func$
206 ocn asset.call_number%ROWTYPE;
207 ncn asset.call_number%ROWTYPE;
212 SELECT enabled = FALSE INTO dobib FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc';
214 IF TG_TABLE_NAME = 'peer_bib_copy_map' THEN -- Only needs ON INSERT OR DELETE, so handle separately
215 IF TG_OP = 'INSERT' THEN
216 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
219 asset.calculate_copy_visibility_attribute_set(NEW.target_copy)
223 ELSIF TG_OP = 'DELETE' THEN
224 DELETE FROM asset.copy_vis_attr_cache
225 WHERE record = OLD.peer_record AND target_copy = OLD.target_copy;
231 IF TG_OP = 'INSERT' THEN -- Handles ON INSERT. ON UPDATE is below.
232 IF TG_TABLE_NAME IN ('copy', 'unit') THEN
233 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
234 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
237 asset.calculate_copy_visibility_attribute_set(NEW.id)
239 ELSIF TG_TABLE_NAME = 'record_entry' THEN
240 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
241 ELSIF TG_TABLE_NAME = 'call_number' AND NEW.label = '##URI##' AND dobib THEN -- New located URI
242 UPDATE biblio.record_entry
243 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
244 WHERE id = NEW.record;
251 -- handle items first, since with circulation activity
252 -- their statuses change frequently
253 IF TG_TABLE_NAME IN ('copy', 'unit') THEN -- This handles ON UPDATE OR DELETE. ON INSERT above
255 IF TG_OP = 'DELETE' THEN -- Shouldn't get here, normally
256 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
260 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
262 IF OLD.deleted <> NEW.deleted THEN
264 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
266 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
269 asset.calculate_copy_visibility_attribute_set(NEW.id)
274 ELSIF OLD.location <> NEW.location OR
275 OLD.status <> NEW.status OR
276 OLD.opac_visible <> NEW.opac_visible OR
277 OLD.circ_lib <> NEW.circ_lib OR
278 OLD.call_number <> NEW.call_number
280 IF OLD.call_number <> NEW.call_number THEN -- Special check since it's more expensive than the next branch
281 SELECT * INTO ocn FROM asset.call_number cn WHERE id = OLD.call_number;
283 IF ncn.record <> ocn.record THEN
284 -- We have to use a record-specific WHERE clause
285 -- to avoid modifying the entries for peer-bib copies.
286 UPDATE asset.copy_vis_attr_cache
287 SET target_copy = NEW.id,
289 WHERE target_copy = OLD.id
290 AND record = ocn.record;
294 -- Any of these could change visibility, but
295 -- we'll save some queries and not try to calculate
296 -- the change directly. We want to update peer-bib
297 -- entries in this case, unlike above.
298 UPDATE asset.copy_vis_attr_cache
299 SET target_copy = NEW.id,
300 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(NEW.id)
301 WHERE target_copy = OLD.id;
305 ELSIF TG_TABLE_NAME = 'call_number' THEN
307 IF TG_OP = 'DELETE' AND OLD.label = '##URI##' AND dobib THEN -- really deleted located URI, if the delete protection rule is disabled...
308 UPDATE biblio.record_entry
309 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
310 WHERE id = OLD.record;
314 IF OLD.label = '##URI##' AND dobib THEN -- Located URI
315 IF OLD.deleted <> NEW.deleted OR OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
316 UPDATE biblio.record_entry
317 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
318 WHERE id = NEW.record;
320 IF OLD.record <> NEW.record THEN -- maybe on merge?
321 UPDATE biblio.record_entry
322 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
323 WHERE id = OLD.record;
327 ELSIF OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
328 UPDATE asset.copy_vis_attr_cache
329 SET record = NEW.record,
330 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(target_copy)
331 WHERE target_copy IN (SELECT id FROM asset.copy WHERE call_number = NEW.id)
332 AND record = OLD.record;
336 ELSIF TG_TABLE_NAME = 'record_entry' AND OLD.source IS DISTINCT FROM NEW.source THEN -- Only handles ON UPDATE, INSERT above
337 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
342 $func$ LANGUAGE PLPGSQL;
344 CREATE TRIGGER z_opac_vis_mat_view_tgr BEFORE INSERT OR UPDATE ON biblio.record_entry FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
345 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR DELETE ON biblio.peer_bib_copy_map FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
346 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE OR DELETE ON asset.call_number FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
347 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
348 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
349 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
350 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
352 CREATE OR REPLACE FUNCTION asset.all_visible_flags () RETURNS TEXT AS $f$
353 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(1 << x, 'copy_flags')),'&') || ')'
354 FROM GENERATE_SERIES(0,0) AS x; -- increment as new flags are added.
355 $f$ LANGUAGE SQL STABLE;
357 CREATE OR REPLACE FUNCTION asset.visible_orgs (otype TEXT) RETURNS TEXT AS $f$
358 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
361 $f$ LANGUAGE SQL STABLE;
363 CREATE OR REPLACE FUNCTION asset.invisible_orgs (otype TEXT) RETURNS TEXT AS $f$
364 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
366 WHERE NOT opac_visible;
367 $f$ LANGUAGE SQL STABLE;
369 -- Bib-oriented defaults for search
370 CREATE OR REPLACE FUNCTION asset.bib_source_default () RETURNS TEXT AS $f$
371 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'bib_source')),'|') || ')'
372 FROM config.bib_source
374 $f$ LANGUAGE SQL IMMUTABLE;
376 CREATE OR REPLACE FUNCTION asset.luri_org_default () RETURNS TEXT AS $f$
377 SELECT * FROM asset.invisible_orgs('luri_org');
378 $f$ LANGUAGE SQL STABLE;
380 -- Copy-oriented defaults for search
381 CREATE OR REPLACE FUNCTION asset.location_group_default () RETURNS TEXT AS $f$
382 SELECT '!()'::TEXT; -- For now, as there's no way to cause a location group to hide all copies.
384 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location_group')),'|') || ')'
385 FROM asset.copy_location_group
386 WHERE NOT opac_visible;
388 $f$ LANGUAGE SQL IMMUTABLE;
390 CREATE OR REPLACE FUNCTION asset.location_default () RETURNS TEXT AS $f$
391 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location')),'|') || ')'
392 FROM asset.copy_location
393 WHERE NOT opac_visible;
394 $f$ LANGUAGE SQL STABLE;
396 CREATE OR REPLACE FUNCTION asset.status_default () RETURNS TEXT AS $f$
397 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'status')),'|') || ')'
398 FROM config.copy_status
399 WHERE NOT opac_visible;
400 $f$ LANGUAGE SQL STABLE;
402 CREATE OR REPLACE FUNCTION asset.owning_lib_default () RETURNS TEXT AS $f$
403 SELECT * FROM asset.invisible_orgs('owning_lib');
404 $f$ LANGUAGE SQL STABLE;
406 CREATE OR REPLACE FUNCTION asset.circ_lib_default () RETURNS TEXT AS $f$
407 SELECT * FROM asset.invisible_orgs('circ_lib');
408 $f$ LANGUAGE SQL STABLE;
410 CREATE OR REPLACE FUNCTION asset.patron_default_visibility_mask () RETURNS TABLE (b_attrs TEXT, c_attrs TEXT) AS $f$
412 copy_flags TEXT; -- "c" attr
414 owning_lib TEXT; -- "c" attr
415 circ_lib TEXT; -- "c" attr
416 status TEXT; -- "c" attr
417 location TEXT; -- "c" attr
418 location_group TEXT; -- "c" attr
420 luri_org TEXT; -- "b" attr
421 bib_sources TEXT; -- "b" attr
423 bib_tests TEXT := '';
425 copy_flags := asset.all_visible_flags(); -- Will always have at least one
427 owning_lib := NULLIF(asset.owning_lib_default(),'!()');
429 circ_lib := NULLIF(asset.circ_lib_default(),'!()');
430 status := NULLIF(asset.status_default(),'!()');
431 location := NULLIF(asset.location_default(),'!()');
432 location_group := NULLIF(asset.location_group_default(),'!()');
434 -- LURIs will be handled at the perl layer directly
435 -- luri_org := NULLIF(asset.luri_org_default(),'!()');
436 bib_sources := NULLIF(asset.bib_source_default(),'()');
439 IF luri_org IS NOT NULL AND bib_sources IS NOT NULL THEN
440 bib_tests := '('||ARRAY_TO_STRING( ARRAY[luri_org,bib_sources], '|')||')&('||luri_org||')&';
441 ELSIF luri_org IS NOT NULL THEN
442 bib_tests := luri_org || '&';
443 ELSIF bib_sources IS NOT NULL THEN
444 bib_tests := bib_sources || '|';
447 RETURN QUERY SELECT bib_tests,
448 '('||ARRAY_TO_STRING(
449 ARRAY[copy_flags,owning_lib,circ_lib,status,location,location_group]::TEXT[],
453 $f$ LANGUAGE PLPGSQL STABLE ROWS 1;
455 CREATE OR REPLACE FUNCTION metabib.suggest_browse_entries(raw_query_text text, search_class text, headline_opts text, visibility_org integer, query_limit integer, normalization integer)
456 RETURNS TABLE(value text, field integer, buoyant_and_class_match boolean, field_match boolean, field_weight integer, rank real, buoyant boolean, match text)
459 prepared_query_texts TEXT[];
462 opac_visibility_join TEXT;
463 search_class_join TEXT;
467 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
469 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
470 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
472 visibility_org := NULLIF(visibility_org,-1);
473 IF visibility_org IS NOT NULL THEN
474 PERFORM FROM actor.org_unit WHERE id = visibility_org AND parent_ou IS NULL;
476 opac_visibility_join := '';
478 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
480 b_tests := search.calculate_visibility_attribute_test(
482 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(visibility_org))
485 b_tests := search.calculate_visibility_attribute_test(
487 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(visibility_org))
490 opac_visibility_join := '
491 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = x.source)
492 LEFT JOIN biblio.record_entry b ON (b.id = x.source)
493 JOIN vm ON (acvac.vis_attr_vector @@
494 (vm.c_attrs || $$&$$ ||
495 search.calculate_visibility_attribute_test(
497 (SELECT ARRAY_AGG(id) FROM actor.org_unit_descendants($4))
500 ) OR (b.vis_attr_vector @@ $$' || b_tests || '$$::query_int)
504 opac_visibility_join := '';
507 -- The following determines whether we only provide suggestsons matching
508 -- the user's selected search_class, or whether we show other suggestions
509 -- too. The reason for MIN() is that for search_classes like
510 -- 'title|proper|uniform' you would otherwise get multiple rows. The
511 -- implication is that if title as a class doesn't have restrict,
512 -- nor does the proper field, but the uniform field does, you're going
513 -- to get 'false' for your overall evaluation of 'should we restrict?'
514 -- To invert that, change from MIN() to MAX().
518 MIN(cmc.restrict::INT) AS restrict_class,
519 MIN(cmf.restrict::INT) AS restrict_field
520 FROM metabib.search_class_to_registered_components(search_class)
521 AS _registered (field_class TEXT, field INT)
523 config.metabib_class cmc ON (cmc.name = _registered.field_class)
525 config.metabib_field cmf ON (cmf.id = _registered.field);
527 -- evaluate 'should we restrict?'
528 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
529 search_class_join := '
531 metabib.search_class_to_registered_components($2)
532 AS _registered (field_class TEXT, field INT) ON (
533 (_registered.field IS NULL AND
534 _registered.field_class = cmf.field_class) OR
535 (_registered.field = cmf.id)
539 search_class_join := '
541 metabib.search_class_to_registered_components($2)
542 AS _registered (field_class TEXT, field INT) ON (
543 _registered.field_class = cmc.name
548 RETURN QUERY EXECUTE '
549 WITH vm AS ( SELECT * FROM asset.patron_default_visibility_mask() ),
550 mbe AS (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000)
559 TS_HEADLINE(value, $7, $3)
560 FROM (SELECT DISTINCT
563 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
564 _registered.field = cmf.id AS restrict,
566 TS_RANK_CD(mbe.index_vector, $1, $6),
569 FROM metabib.browse_entry_def_map mbedm
570 JOIN mbe ON (mbe.id = mbedm.entry)
571 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
572 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
573 ' || search_class_join || '
574 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
576 ' || opac_visibility_join || '
577 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
579 ' -- sic, repeat the order by clause in the outer select too
581 query, search_class, headline_opts,
582 visibility_org, query_limit, normalization, plain_query
586 -- buoyant AND chosen class = match class
587 -- chosen field = match field
594 $f$ LANGUAGE plpgsql ROWS 10;
596 CREATE OR REPLACE FUNCTION metabib.staged_browse(query text, fields integer[], context_org integer, context_locations integer[], staff boolean, browse_superpage_size integer, count_up_from_zero boolean, result_limit integer, next_pivot_pos integer)
597 RETURNS SETOF metabib.flat_browse_entry_appearance
606 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
607 results_skipped INT := 0;
608 row_counter INT := 0;
613 all_records BIGINT[];
614 all_brecords BIGINT[];
615 all_arecords BIGINT[];
616 superpage_of_records BIGINT[];
621 unauthorized_entry RECORD;
623 IF count_up_from_zero THEN
630 SELECT x.c_attrs, x.b_attrs INTO c_tests, b_tests FROM asset.patron_default_visibility_mask() x;
633 -- b_tests supplies its own query_int operator, c_tests does not
634 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
636 SELECT ARRAY_AGG(id) INTO c_orgs FROM actor.org_unit_descendants(context_org);
638 c_tests := c_tests || search.calculate_visibility_attribute_test('circ_lib',c_orgs)
639 || '&' || search.calculate_visibility_attribute_test('owning_lib',c_orgs);
641 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
643 b_tests := b_tests || search.calculate_visibility_attribute_test(
645 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(context_org) x)
648 b_tests := b_tests || search.calculate_visibility_attribute_test(
650 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(context_org) x)
654 IF context_locations THEN
655 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
656 c_tests := c_tests || search.calculate_visibility_attribute_test('location',context_locations);
659 OPEN curs NO SCROLL FOR EXECUTE query;
664 IF result_row.pivot_point IS NOT NULL THEN
665 RETURN NEXT result_row;
671 SELECT INTO unauthorized_entry *
672 FROM metabib.browse_entry_simple_heading_map mbeshm
673 INNER JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
674 INNER JOIN authority.control_set_authority_field acsaf ON ( acsaf.id = ash.atag )
675 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
676 WHERE mbeshm.entry = rec.id
677 AND ahf.heading_purpose = 'variant';
679 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
680 IF (unauthorized_entry.record IS NOT NULL) THEN
681 --unauthorized term belongs to an auth linked to a bib?
682 SELECT INTO all_arecords, result_row.sees, afields
683 ARRAY_AGG(DISTINCT abl.bib),
684 STRING_AGG(DISTINCT abl.authority::TEXT, $$,$$),
685 ARRAY_AGG(DISTINCT map.metabib_field)
686 FROM authority.bib_linking abl
687 INNER JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
688 map.authority_field = unauthorized_entry.atag
689 AND map.metabib_field = ANY(fields)
691 WHERE abl.authority = unauthorized_entry.record;
694 SELECT INTO all_arecords, result_row.sees, afields
695 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
696 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
697 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
699 FROM metabib.browse_entry_simple_heading_map mbeshm
700 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
701 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
702 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
703 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
704 ash.atag = map.authority_field
705 AND map.metabib_field = ANY(fields)
707 JOIN authority.control_set_authority_field acsaf ON (
708 map.authority_field = acsaf.id
710 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
711 WHERE mbeshm.entry = rec.id
712 AND ahf.heading_purpose = 'variant';
716 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
717 SELECT INTO all_brecords, result_row.authorities, bfields
718 ARRAY_AGG(DISTINCT source),
719 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
720 ARRAY_AGG(DISTINCT def)
721 FROM metabib.browse_entry_def_map
723 AND def = ANY(fields);
725 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
727 result_row.sources := 0;
728 result_row.asources := 0;
730 -- Bib-linked vis checking
731 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
733 SELECT INTO result_row.sources COUNT(DISTINCT b.id)
734 FROM biblio.record_entry b
735 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
736 WHERE b.id = ANY(all_brecords[1:browse_superpage_size])
738 acvac.vis_attr_vector @@ c_tests::query_int
739 OR b.vis_attr_vector @@ b_tests::query_int
742 result_row.accurate := TRUE;
746 -- Authority-linked vis checking
747 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
749 SELECT INTO result_row.asources COUNT(DISTINCT b.id)
750 FROM biblio.record_entry b
751 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
752 WHERE b.id = ANY(all_arecords[1:browse_superpage_size])
754 acvac.vis_attr_vector @@ c_tests::query_int
755 OR b.vis_attr_vector @@ b_tests::query_int
758 result_row.aaccurate := TRUE;
762 IF result_row.sources > 0 OR result_row.asources > 0 THEN
764 -- The function that calls this function needs row_number in order
765 -- to correctly order results from two different runs of this
767 result_row.row_number := row_number;
769 -- Now, if row_counter is still less than limit, return a row. If
770 -- not, but it is less than next_pivot_pos, continue on without
771 -- returning actual result rows until we find
772 -- that next pivot, and return it.
774 IF row_counter < result_limit THEN
775 result_row.browse_entry := rec.id;
776 result_row.value := rec.value;
778 RETURN NEXT result_row;
780 result_row.browse_entry := NULL;
781 result_row.authorities := NULL;
782 result_row.fields := NULL;
783 result_row.value := NULL;
784 result_row.sources := NULL;
785 result_row.sees := NULL;
786 result_row.accurate := NULL;
787 result_row.aaccurate := NULL;
788 result_row.pivot_point := rec.id;
790 IF row_counter >= next_pivot_pos THEN
791 RETURN NEXT result_row;
796 IF count_up_from_zero THEN
797 row_number := row_number + 1;
799 row_number := row_number - 1;
802 -- row_counter is different from row_number.
803 -- It simply counts up from zero so that we know when
804 -- we've reached our limit.
805 row_counter := row_counter + 1;
809 $f$ LANGUAGE plpgsql ROWS 10;
811 CREATE OR REPLACE FUNCTION metabib.browse(search_field integer[], browse_term text, context_org integer DEFAULT NULL::integer, context_loc_group integer DEFAULT NULL::integer, staff boolean DEFAULT false, pivot_id bigint DEFAULT NULL::bigint, result_limit integer DEFAULT 10)
812 RETURNS SETOF metabib.flat_browse_entry_appearance
818 pivot_sort_value TEXT;
819 pivot_sort_fallback TEXT;
820 context_locations INT[];
821 browse_superpage_size INT;
822 results_skipped INT := 0;
826 forward_to_pivot INT;
828 -- First, find the pivot if we were given a browse term but not a pivot.
829 IF pivot_id IS NULL THEN
830 pivot_id := metabib.browse_pivot(search_field, browse_term);
833 SELECT INTO pivot_sort_value, pivot_sort_fallback
834 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
836 -- Bail if we couldn't find a pivot.
837 IF pivot_sort_value IS NULL THEN
841 -- Transform the context_loc_group argument (if any) (logc at the
842 -- TPAC layer) into a form we'll be able to use.
843 IF context_loc_group IS NOT NULL THEN
844 SELECT INTO context_locations ARRAY_AGG(location)
845 FROM asset.copy_location_group_map
846 WHERE lgroup = context_loc_group;
849 -- Get the configured size of browse superpages.
850 SELECT INTO browse_superpage_size COALESCE(value::INT,100) -- NULL ok
851 FROM config.global_flag
852 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
854 -- First we're going to search backward from the pivot, then we're going
855 -- to search forward. In each direction, we need two limits. At the
856 -- lesser of the two limits, we delineate the edge of the result set
857 -- we're going to return. At the greater of the two limits, we find the
858 -- pivot value that would represent an offset from the current pivot
859 -- at a distance of one "page" in either direction, where a "page" is a
860 -- result set of the size specified in the "result_limit" argument.
862 -- The two limits in each direction make four derived values in total,
863 -- and we calculate them now.
864 back_limit := CEIL(result_limit::FLOAT / 2);
865 back_to_pivot := result_limit;
866 forward_limit := result_limit / 2;
867 forward_to_pivot := result_limit - 1;
869 -- This is the meat of the SQL query that finds browse entries. We'll
870 -- pass this to a function which uses it with a cursor, so that individual
871 -- rows may be fetched in a loop until some condition is satisfied, without
872 -- waiting for a result set of fixed size to be collected all at once.
877 FROM metabib.browse_entry mbe
879 EXISTS ( -- are there any bibs using this mbe via the requested fields?
881 FROM metabib.browse_entry_def_map mbedm
882 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
883 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
885 FROM metabib.browse_entry_simple_heading_map mbeshm
886 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
887 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
888 ash.atag = map.authority_field
889 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
891 JOIN authority.control_set_authority_field acsaf ON (
892 map.authority_field = acsaf.id
894 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
895 WHERE mbeshm.entry = mbe.id
896 AND ahf.heading_purpose IN (' || $$'variant'$$ || ')
897 -- and authority that variant is coming from is linked to a bib
900 FROM metabib.browse_entry_def_map mbedm2
901 WHERE mbedm2.authority = ash.record AND mbedm2.def = ANY(' || quote_literal(search_field) || ')
907 -- This is the variant of the query for browsing backward.
908 back_query := core_query ||
909 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
910 ' ORDER BY mbe.sort_value DESC, mbe.value DESC LIMIT 1000';
912 -- This variant browses forward.
913 forward_query := core_query ||
914 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
915 ' ORDER BY mbe.sort_value, mbe.value LIMIT 1000';
917 -- We now call the function which applies a cursor to the provided
918 -- queries, stopping at the appropriate limits and also giving us
919 -- the next page's pivot.
921 SELECT * FROM metabib.staged_browse(
922 back_query, search_field, context_org, context_locations,
923 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
925 SELECT * FROM metabib.staged_browse(
926 forward_query, search_field, context_org, context_locations,
927 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
928 ) ORDER BY row_number DESC;
931 $f$ LANGUAGE plpgsql ROWS 10;
933 CREATE OR REPLACE FUNCTION metabib.browse(
936 context_org INT DEFAULT NULL,
937 context_loc_group INT DEFAULT NULL,
938 staff BOOL DEFAULT FALSE,
939 pivot_id BIGINT DEFAULT NULL,
940 result_limit INT DEFAULT 10
941 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
943 RETURN QUERY SELECT * FROM metabib.browse(
944 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
945 FROM config.metabib_field WHERE field_class = search_class),
954 $p$ LANGUAGE PLPGSQL ROWS 10;
956 CREATE OR REPLACE VIEW search.best_tsconfig AS
958 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
959 FROM config.metabib_field m
960 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
961 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND f.index_weight = 'C');
963 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
965 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
968 field_list INT[] DEFAULT '{}'::INT[],
969 css_class TEXT DEFAULT 'oils_SH',
970 hl_all BOOL DEFAULT TRUE,
971 minwords INT DEFAULT 5,
972 maxwords INT DEFAULT 25,
973 shortwords INT DEFAULT 0,
974 maxfrags INT DEFAULT 0,
975 delimiter TEXT DEFAULT ' ... '
976 ) RETURNS SETOF search.highlight_result AS $f$
979 v_css_class TEXT := css_class;
980 v_delimiter TEXT := delimiter;
981 v_field_list INT[] := field_list;
984 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
985 v_delimiter := ' ... ';
989 opts := opts || 'MinWords=' || minwords;
990 opts := opts || ', MaxWords=' || maxwords;
991 opts := opts || ', ShortWords=' || shortwords;
992 opts := opts || ', MaxFragments=' || maxfrags;
993 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
995 opts := opts || 'HighlightAll=TRUE';
998 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
999 v_css_class := 'oils_SH';
1002 opts := opts || $$, StopSel=</b>, StartSel="<b class='$$ || v_css_class; -- "
1004 IF v_field_list = '{}'::INT[] THEN
1005 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
1012 evergreen.escape_for_html(de.value) AS value,
1014 ts_config::REGCONFIG,
1015 evergreen.escape_for_html(de.value),
1016 $$ || quote_literal(tsq) || $$,
1017 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
1019 FROM metabib.display_entry de
1020 JOIN config.metabib_field mf ON (mf.id = de.field)
1021 JOIN search.best_tsconfig t ON (t.id = de.field)
1022 WHERE de.source = $2
1023 AND field = ANY ($3)
1026 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
1028 $f$ LANGUAGE PLPGSQL;
1030 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
1031 SELECT regexp_replace(
1047 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
1049 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
1051 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
1052 css_class TEXT DEFAULT 'oils_SH',
1053 hl_all BOOL DEFAULT TRUE,
1054 minwords INT DEFAULT 5,
1055 maxwords INT DEFAULT 25,
1056 shortwords INT DEFAULT 0,
1057 maxfrags INT DEFAULT 0,
1058 delimiter TEXT DEFAULT ' ... '
1059 ) RETURNS SETOF search.highlight_result AS $f$
1067 IF (tsq_map ILIKE 'hstore%') THEN
1068 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
1070 tsq_hstore := tsq_map::HSTORE;
1073 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore::HSTORE) LOOP
1074 SELECT ARRAY_AGG(unnest::INT) INTO afields
1075 FROM unnest(regexp_split_to_array(fields,','));
1076 seen := seen || afields;
1079 SELECT * FROM search.highlight_display_fields_impl(
1080 rid, tsq, afields, css_class, hl_all,minwords,
1081 maxwords, shortwords, maxfrags, delimiter
1089 evergreen.escape_for_html(value) AS value,
1090 evergreen.escape_for_html(value) AS highlight
1091 FROM metabib.display_entry
1093 AND NOT (field = ANY (seen));
1095 $f$ LANGUAGE PLPGSQL ROWS 10;
1097 -- SymSpell implementation follows
1099 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
1100 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
1102 ) RETURNS TEXT[] AS $F$
1103 SELECT NULLIF(ARRAY(
1104 SELECT * FROM UNNEST($1) x
1106 SELECT * FROM UNNEST($2) y
1110 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1111 use String::KeyboardDistance qw(:all);
1112 return qwerty_keyboard_distance(@_);
1113 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1115 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1116 use String::KeyboardDistance qw(:all);
1117 return qwerty_keyboard_distance_match(@_);
1118 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1120 CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
1121 use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
1122 return xs_edistance(@_);
1123 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1125 CREATE TABLE search.symspell_dictionary (
1126 keyword_count INT NOT NULL DEFAULT 0,
1127 title_count INT NOT NULL DEFAULT 0,
1128 author_count INT NOT NULL DEFAULT 0,
1129 subject_count INT NOT NULL DEFAULT 0,
1130 series_count INT NOT NULL DEFAULT 0,
1131 identifier_count INT NOT NULL DEFAULT 0,
1133 prefix_key TEXT PRIMARY KEY,
1135 keyword_suggestions TEXT[],
1136 title_suggestions TEXT[],
1137 author_suggestions TEXT[],
1138 subject_suggestions TEXT[],
1139 series_suggestions TEXT[],
1140 identifier_suggestions TEXT[]
1141 ) WITH (fillfactor = 80);
1143 CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
1144 RETURNS SETOF TEXT AS $F$
1145 SELECT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1146 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1148 -- This version does not preserve input word order!
1149 CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
1150 RETURNS SETOF TEXT AS $F$
1151 SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1152 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1154 CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
1161 woChars := regexp_split_to_array(withoutCase,'');
1162 FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
1163 IF curr = evergreen.uppercase(curr) THEN
1164 woChars[ind] := evergreen.uppercase(woChars[ind]);
1168 RETURN ARRAY_TO_STRING(woChars,'');
1170 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1172 CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
1176 ) RETURNS TEXT[] AS $F$
1179 list TEXT[] := '{}';
1180 sublist TEXT[] := '{}';
1182 FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
1183 item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
1184 IF NOT list @> ARRAY[item] THEN
1185 list := item || list;
1186 IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
1187 sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
1193 RETURN evergreen.text_array_merge_unique(list, sublist);
1195 RETURN list || sublist;
1198 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1200 -- DROP TYPE search.symspell_lookup_output CASCADE;
1201 CREATE TYPE search.symspell_lookup_output AS (
1203 suggestion_count INT,
1205 pg_trgm_sim NUMERIC,
1206 qwerty_kb_match NUMERIC,
1207 soundex_sim NUMERIC,
1211 prefix_key_count INT,
1216 CREATE OR REPLACE FUNCTION search.symspell_lookup(
1219 verbosity integer DEFAULT 2,
1220 xfer_case boolean DEFAULT false,
1221 count_threshold integer DEFAULT 1,
1222 soundex_weight integer DEFAULT 0,
1223 pg_trgm_weight integer DEFAULT 0,
1224 kbdist_weight integer DEFAULT 0
1225 ) RETURNS SETOF search.symspell_lookup_output LANGUAGE plpgsql AS $function$
1231 edit_list TEXT[] := '{}';
1232 seen_list TEXT[] := '{}';
1233 output search.symspell_lookup_output;
1234 output_list search.symspell_lookup_output[];
1242 smallest_ed INT := -1;
1247 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1248 prefix_length := COALESCE(prefix_length, 6);
1250 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1251 maxED := COALESCE(maxED, 3);
1253 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
1255 -- Common case exact match test for preformance
1256 IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
1258 'SELECT '||search_class||'_suggestions AS suggestions,
1259 '||search_class||'_count AS count,
1261 FROM search.symspell_dictionary
1262 WHERE prefix_key = $1
1263 AND '||search_class||'_count >= $2
1264 AND '||search_class||'_suggestions @> ARRAY[$1]'
1265 INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
1266 IF entry.prefix_key IS NOT NULL THEN
1267 output.lev_distance := 0; -- definitionally
1268 output.prefix_key := entry.prefix_key;
1269 output.prefix_key_count := entry.count;
1270 output.suggestion_count := entry.count;
1271 output.input := word_list[1];
1273 output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
1275 output.suggestion := entry.prefix_key;
1277 output.norm_input := entry.prefix_key;
1278 output.qwerty_kb_match := 1;
1279 output.pg_trgm_sim := 1;
1280 output.soundex_sim := 1;
1287 FOREACH word IN ARRAY word_list LOOP
1289 input := evergreen.lowercase(word);
1290 i_len := CHARACTER_LENGTH(input);
1293 IF CHARACTER_LENGTH(input) > prefix_length THEN
1294 prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
1295 edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
1297 edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
1300 SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
1302 output_list := '{}';
1307 FOREACH entry_key IN ARRAY edit_list LOOP
1309 IF global_ed IS NOT NULL THEN
1310 smallest_ed := global_ed;
1313 FOR entry IN EXECUTE
1314 'SELECT '||search_class||'_suggestions AS suggestions,
1315 '||search_class||'_count AS count,
1317 FROM search.symspell_dictionary
1318 WHERE prefix_key = $1
1319 AND '||search_class||'_suggestions IS NOT NULL'
1325 ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
1326 ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
1330 FROM UNNEST(entry.suggestions) s
1331 WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
1332 AND NOT seen_list @> ARRAY[s];
1334 CONTINUE WHEN good_suggs IS NULL;
1336 FOR sugg, output.suggestion_count IN EXECUTE
1337 'SELECT prefix_key, '||search_class||'_count
1338 FROM search.symspell_dictionary
1339 WHERE prefix_key = ANY ($1)
1340 AND '||search_class||'_count >= $2'
1341 USING AKEYS(good_suggs), COALESCE(count_threshold,1)
1344 output.lev_distance := good_suggs->sugg;
1345 seen_list := seen_list || sugg;
1347 -- Track the smallest edit distance among suggestions from this prefix key.
1348 IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
1349 smallest_ed := output.lev_distance;
1352 -- Track the smallest edit distance for all prefix keys for this word.
1353 IF global_ed IS NULL OR smallest_ed < global_ed THEN
1354 global_ed = smallest_ed;
1355 -- And if low verbosity, ignore suggs with a larger distance from here on.
1356 IF verbosity <= 1 THEN
1357 l_maxED := global_ed;
1361 -- Lev distance is our main similarity measure. While
1362 -- trgm or soundex similarity could be the main filter,
1363 -- Lev is both language agnostic and faster.
1365 -- Here we will skip suggestions that have a longer edit distance
1366 -- than the shortest we've already found. This is simply an
1367 -- optimization that allows us to avoid further processing
1368 -- of this entry. It would be filtered out later.
1369 CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
1371 -- If we have an exact match on the suggestion key we can also avoid
1372 -- some function calls.
1373 IF output.lev_distance = 0 THEN
1374 output.qwerty_kb_match := 1;
1375 output.pg_trgm_sim := 1;
1376 output.soundex_sim := 1;
1378 IF kbdist_weight THEN
1379 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
1381 output.qwerty_kb_match := 0;
1383 IF pg_trgm_weight THEN
1384 output.pg_trgm_sim := similarity(input, sugg);
1386 output.pg_trgm_sim := 0;
1388 IF soundex_weight THEN
1389 output.soundex_sim := difference(input, sugg) / 4.0;
1391 output.soundex_sim := 0;
1395 -- Fill in some fields
1396 IF xfer_case AND input <> word THEN
1397 output.suggestion := search.symspell_transfer_casing(word, sugg);
1399 output.suggestion := sugg;
1401 output.prefix_key := entry.prefix_key;
1402 output.prefix_key_count := entry.count;
1403 output.input := word;
1404 output.norm_input := input;
1405 output.word_pos := w_pos;
1407 -- We can't "cache" a set of generated records directly, so
1408 -- here we build up an array of search.symspell_lookup_output
1409 -- records that we can revivicate later as a table using UNNEST().
1410 output_list := output_list || output;
1412 EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
1413 CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
1415 END LOOP; -- loop over suggestions
1416 END LOOP; -- loop over entries
1417 END LOOP; -- loop over entry_keys
1419 -- Now we're done examining this word
1420 IF verbosity = 0 THEN
1421 -- Return the "best" suggestion from the smallest edit
1422 -- distance group. We define best based on the weighting
1423 -- of the non-lev similarity measures and use the suggestion
1424 -- use count to break ties.
1426 SELECT * FROM UNNEST(output_list)
1427 ORDER BY lev_distance,
1428 (soundex_sim * COALESCE(soundex_weight,0))
1429 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1430 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1431 suggestion_count DESC
1433 ELSIF verbosity = 1 THEN
1434 -- Return all suggestions from the smallest
1435 -- edit distance group.
1437 SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
1438 ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
1439 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1440 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1441 suggestion_count DESC;
1442 ELSIF verbosity = 2 THEN
1443 -- Return everything we find, along with relevant stats
1445 SELECT * FROM UNNEST(output_list)
1446 ORDER BY lev_distance,
1447 (soundex_sim * COALESCE(soundex_weight,0))
1448 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1449 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1450 suggestion_count DESC;
1451 ELSIF verbosity = 3 THEN
1452 -- Return everything we find from the two smallest edit distance groups
1454 SELECT * FROM UNNEST(output_list)
1455 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
1456 ORDER BY lev_distance,
1457 (soundex_sim * COALESCE(soundex_weight,0))
1458 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1459 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1460 suggestion_count DESC;
1461 ELSIF verbosity = 4 THEN
1462 -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
1464 SELECT * FROM UNNEST(output_list)
1465 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
1466 ORDER BY lev_distance,
1467 (soundex_sim * COALESCE(soundex_weight,0))
1468 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1469 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1470 suggestion_count DESC;
1472 END LOOP; -- loop over words
1476 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
1479 no_limit BOOL DEFAULT FALSE,
1480 prefix_length INT DEFAULT 6,
1482 ) RETURNS SETOF search.symspell_dictionary AS $F$
1487 entry search.symspell_dictionary%ROWTYPE;
1491 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
1492 key := SUBSTRING(key FROM 1 FOR prefix_length);
1493 key_list := ARRAY[raw_input, key];
1495 key_list := ARRAY[key];
1498 FOREACH del_key IN ARRAY key_list LOOP
1500 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1502 entry.prefix_key := del_key;
1504 entry.keyword_count := 0;
1505 entry.title_count := 0;
1506 entry.author_count := 0;
1507 entry.subject_count := 0;
1508 entry.series_count := 0;
1509 entry.identifier_count := 0;
1511 entry.keyword_suggestions := '{}';
1512 entry.title_suggestions := '{}';
1513 entry.author_suggestions := '{}';
1514 entry.subject_suggestions := '{}';
1515 entry.series_suggestions := '{}';
1516 entry.identifier_suggestions := '{}';
1518 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1519 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1520 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1521 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1522 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1523 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1524 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1526 IF del_key = raw_input THEN
1527 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
1528 IF source_class = 'title' THEN entry.title_count := 1; END IF;
1529 IF source_class = 'author' THEN entry.author_count := 1; END IF;
1530 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
1531 IF source_class = 'series' THEN entry.series_count := 1; END IF;
1532 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
1538 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
1541 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1542 -- skip suggestions that are already too long for the prefix key
1543 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
1545 entry.keyword_suggestions := '{}';
1546 entry.title_suggestions := '{}';
1547 entry.author_suggestions := '{}';
1548 entry.subject_suggestions := '{}';
1549 entry.series_suggestions := '{}';
1550 entry.identifier_suggestions := '{}';
1552 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
1553 IF source_class = 'title' THEN entry.title_count := 0; END IF;
1554 IF source_class = 'author' THEN entry.author_count := 0; END IF;
1555 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
1556 IF source_class = 'series' THEN entry.series_count := 0; END IF;
1557 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
1559 entry.prefix_key := del_key;
1561 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1562 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1563 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1564 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1565 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1566 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1567 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1573 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1575 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
1578 old_input TEXT DEFAULT NULL,
1579 include_phrases BOOL DEFAULT FALSE
1580 ) RETURNS SETOF search.symspell_dictionary AS $F$
1587 entry search.symspell_dictionary;
1589 IF full_input IS NOT NULL THEN
1590 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1591 prefix_length := COALESCE(prefix_length, 6);
1593 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1594 maxED := COALESCE(maxED, 3);
1596 input := evergreen.lowercase(full_input);
1597 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
1598 IF word_list IS NULL THEN
1602 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
1603 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
1606 FOREACH word IN ARRAY word_list LOOP
1607 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
1608 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1609 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
1613 IF old_input IS NOT NULL THEN
1614 input := evergreen.lowercase(old_input);
1616 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
1617 -- similarly skip words that have 5 or more digits here to
1618 -- avoid adding erroneous prefix deletion entries to the dictionary
1619 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1620 entry.prefix_key := word;
1622 entry.keyword_count := 0;
1623 entry.title_count := 0;
1624 entry.author_count := 0;
1625 entry.subject_count := 0;
1626 entry.series_count := 0;
1627 entry.identifier_count := 0;
1629 entry.keyword_suggestions := '{}';
1630 entry.title_suggestions := '{}';
1631 entry.author_suggestions := '{}';
1632 entry.subject_suggestions := '{}';
1633 entry.series_suggestions := '{}';
1634 entry.identifier_suggestions := '{}';
1636 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
1637 IF source_class = 'title' THEN entry.title_count := -1; END IF;
1638 IF source_class = 'author' THEN entry.author_count := -1; END IF;
1639 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
1640 IF source_class = 'series' THEN entry.series_count := -1; END IF;
1641 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
1647 $F$ LANGUAGE PLPGSQL;
1649 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
1652 old_input TEXT DEFAULT NULL,
1653 include_phrases BOOL DEFAULT FALSE
1654 ) RETURNS SETOF search.symspell_dictionary AS $F$
1657 conflict_entry RECORD;
1660 IF full_input = old_input THEN -- neither NULL, and are the same
1664 FOR new_entry IN EXECUTE $q$
1668 FROM (SELECT prefix_key,
1669 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
1670 SUM($q$ || source_class || $q$_count) count
1671 FROM search.symspell_build_entries($1, $2, $3, $4)
1673 $q$ USING full_input, source_class, old_input, include_phrases
1677 $q$ || source_class || $q$_suggestions suggestions,
1678 $q$ || source_class || $q$_count count
1679 FROM search.symspell_dictionary
1680 WHERE prefix_key = $1 $q$
1682 USING new_entry.prefix_key;
1684 IF new_entry.count <> 0 THEN -- Real word, and count changed
1685 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
1686 IF conflict_entry.count > 0 THEN -- it's a real word
1687 RETURN QUERY EXECUTE $q$
1688 UPDATE search.symspell_dictionary
1689 SET $q$ || source_class || $q$_count = $2
1690 WHERE prefix_key = $1
1692 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
1693 ELSE -- it was a prefix key or delete-emptied word before
1694 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
1695 RETURN QUERY EXECUTE $q$
1696 UPDATE search.symspell_dictionary
1697 SET $q$ || source_class || $q$_count = $2
1698 WHERE prefix_key = $1
1700 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
1701 ELSE -- new suggestion!
1702 RETURN QUERY EXECUTE $q$
1703 UPDATE search.symspell_dictionary
1704 SET $q$ || source_class || $q$_count = $2,
1705 $q$ || source_class || $q$_suggestions = $3
1706 WHERE prefix_key = $1
1708 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
1712 -- We keep the on-conflict clause just in case...
1713 RETURN QUERY EXECUTE $q$
1714 INSERT INTO search.symspell_dictionary AS d (
1715 $q$ || source_class || $q$_count,
1717 $q$ || source_class || $q$_suggestions
1718 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
1719 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
1720 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
1722 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
1724 ELSE -- key only, or no change
1725 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
1726 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
1727 RETURN QUERY EXECUTE $q$
1728 UPDATE search.symspell_dictionary
1729 SET $q$ || source_class || $q$_suggestions = $2
1730 WHERE prefix_key = $1
1732 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
1735 RETURN QUERY EXECUTE $q$
1736 INSERT INTO search.symspell_dictionary AS d (
1737 $q$ || source_class || $q$_count,
1739 $q$ || source_class || $q$_suggestions
1740 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
1741 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
1743 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
1748 $F$ LANGUAGE PLPGSQL;
1750 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
1753 new_value TEXT := NULL;
1754 old_value TEXT := NULL;
1756 search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
1758 IF TG_OP IN ('INSERT', 'UPDATE') THEN
1759 new_value := NEW.value;
1762 IF TG_OP IN ('DELETE', 'UPDATE') THEN
1763 old_value := OLD.value;
1766 PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
1768 RETURN NULL; -- always fired AFTER
1770 $f$ LANGUAGE PLPGSQL;
1772 CREATE TRIGGER maintain_symspell_entries_tgr
1773 AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
1774 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1776 CREATE TRIGGER maintain_symspell_entries_tgr
1777 AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
1778 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1780 CREATE TRIGGER maintain_symspell_entries_tgr
1781 AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
1782 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1784 CREATE TRIGGER maintain_symspell_entries_tgr
1785 AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
1786 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1788 CREATE TRIGGER maintain_symspell_entries_tgr
1789 AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
1790 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1792 CREATE TRIGGER maintain_symspell_entries_tgr
1793 AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
1794 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();