2 * Copyright (C) 2007-2010 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS search CASCADE;
24 CREATE OR REPLACE FUNCTION evergreen.pg_statistics (tab TEXT, col TEXT) RETURNS TABLE(element TEXT, frequency INT) AS $$
26 -- This query will die on PG < 9.2, but the function can be created. We just won't use it where we can't.
30 FROM (SELECT ROW_NUMBER() OVER (),
32 FROM (SELECT UNNEST(most_common_elem_freqs) AS f
38 JOIN (SELECT ROW_NUMBER() OVER (),
40 FROM (SELECT UNNEST(most_common_elems::text::text[]) AS e
45 ) AS elems USING (row_number);
49 CREATE OR REPLACE FUNCTION evergreen.query_int_wrapper (INT[],TEXT) RETURNS BOOL AS $$
51 RETURN $1 @@ $2::query_int;
53 $$ LANGUAGE PLPGSQL STABLE;
55 CREATE TABLE search.relevance_adjustment (
56 id SERIAL PRIMARY KEY,
57 active BOOL NOT NULL DEFAULT TRUE,
58 field INT NOT NULL REFERENCES config.metabib_field (id) DEFERRABLE INITIALLY DEFERRED,
59 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
60 multiplier NUMERIC NOT NULL DEFAULT 1.0
62 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
64 -- XXX not required in 3.0+ ?
65 CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT, badges TEXT, popularity NUMERIC );
66 CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
68 CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes text[], hits bigint[]) RETURNS TABLE(id integer, value text, count bigint)
70 SELECT id, value, count
72 SELECT mfae.field AS id,
74 COUNT(DISTINCT mfae.source),
76 PARTITION BY mfae.field ORDER BY COUNT(DISTINCT mfae.source) DESC
78 FROM metabib.facet_entry mfae
79 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
80 WHERE mfae.source = ANY ($2)
82 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
87 (SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled),
93 CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
94 SELECT id, value, count FROM (
95 SELECT mfae.field AS id,
97 COUNT(DISTINCT mmrsm.metarecord),
99 PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
101 FROM metabib.facet_entry mfae
102 JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
103 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
104 WHERE mmrsm.metarecord IN (SELECT * FROM unnest($2))
106 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
109 WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
112 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute ( value INT, attr TEXT ) RETURNS INT AS $f$
115 WHEN 'luri_org' THEN 0 -- "b" attr
116 WHEN 'bib_source' THEN 1 -- "b" attr
118 WHEN 'copy_flags' THEN 0 -- "c" attr
119 WHEN 'owning_lib' THEN 1 -- "c" attr
120 WHEN 'circ_lib' THEN 2 -- "c" attr
121 WHEN 'status' THEN 3 -- "c" attr
122 WHEN 'location' THEN 4 -- "c" attr
123 WHEN 'location_group' THEN 5 -- "c" attr
127 /* copy_flags bit positions, LSB-first:
129 0: asset.copy.opac_visible
132 When adding flags, you must update asset.all_visible_flags()
134 Because bib and copy values are stored separately, we can reuse
135 shifts, saving us some space. We could probably take back a bit
136 too, but I'm not sure its worth squeezing that last one out. We'd
137 be left with just 2 slots for copy attrs, rather than 10.
140 $f$ LANGUAGE SQL IMMUTABLE;
142 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_list ( attr TEXT, value INT[] ) RETURNS INT[] AS $f$
143 SELECT ARRAY_AGG(search.calculate_visibility_attribute(x, $1)) FROM UNNEST($2) AS X;
144 $f$ LANGUAGE SQL IMMUTABLE;
146 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_test ( attr TEXT, value INT[], negate BOOL DEFAULT FALSE ) RETURNS TEXT AS $f$
147 SELECT CASE WHEN $3 THEN '!' ELSE '' END || '(' || ARRAY_TO_STRING(search.calculate_visibility_attribute_list($1,$2),'|') || ')';
148 $f$ LANGUAGE SQL IMMUTABLE;
150 CREATE OR REPLACE FUNCTION asset.calculate_copy_visibility_attribute_set ( copy_id BIGINT ) RETURNS INT[] AS $f$
152 copy_row asset.copy%ROWTYPE;
153 lgroup_map asset.copy_location_group_map%ROWTYPE;
154 attr_set INT[] := '{}'::INT[];
156 SELECT * INTO copy_row FROM asset.copy WHERE id = copy_id;
158 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.opac_visible::INT, 'copy_flags');
159 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.circ_lib, 'circ_lib');
160 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.status, 'status');
161 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.location, 'location');
165 search.calculate_visibility_attribute(owning_lib, 'owning_lib')
167 FROM asset.call_number
168 WHERE id = copy_row.call_number;
170 FOR lgroup_map IN SELECT * FROM asset.copy_location_group_map WHERE location = copy_row.location LOOP
171 attr_set := attr_set || search.calculate_visibility_attribute(lgroup_map.lgroup, 'location_group');
176 $f$ LANGUAGE PLPGSQL;
178 CREATE OR REPLACE FUNCTION biblio.calculate_bib_visibility_attribute_set ( bib_id BIGINT, new_source INT DEFAULT NULL, force_source BOOL DEFAULT FALSE ) RETURNS INT[] AS $f$
180 bib_row biblio.record_entry%ROWTYPE;
181 cn_row asset.call_number%ROWTYPE;
182 attr_set INT[] := '{}'::INT[];
184 SELECT * INTO bib_row FROM biblio.record_entry WHERE id = bib_id;
187 IF new_source IS NOT NULL THEN
188 attr_set := attr_set || search.calculate_visibility_attribute(new_source, 'bib_source');
190 ELSIF bib_row.source IS NOT NULL THEN
191 attr_set := attr_set || search.calculate_visibility_attribute(bib_row.source, 'bib_source');
196 FROM asset.call_number
197 WHERE record = bib_id
198 AND label = '##URI##'
201 attr_set := attr_set || search.calculate_visibility_attribute(cn_row.owning_lib, 'luri_org');
206 $f$ LANGUAGE PLPGSQL;
208 CREATE OR REPLACE FUNCTION asset.cache_copy_visibility () RETURNS TRIGGER as $func$
210 ocn asset.call_number%ROWTYPE;
211 ncn asset.call_number%ROWTYPE;
216 SELECT enabled = FALSE INTO dobib FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc';
218 IF TG_TABLE_NAME = 'peer_bib_copy_map' THEN -- Only needs ON INSERT OR DELETE, so handle separately
219 IF TG_OP = 'INSERT' THEN
220 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
223 asset.calculate_copy_visibility_attribute_set(NEW.target_copy)
227 ELSIF TG_OP = 'DELETE' THEN
228 DELETE FROM asset.copy_vis_attr_cache
229 WHERE record = OLD.peer_record AND target_copy = OLD.target_copy;
235 IF TG_OP = 'INSERT' THEN -- Handles ON INSERT. ON UPDATE is below.
236 IF TG_TABLE_NAME IN ('copy', 'unit') THEN
237 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
238 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
241 asset.calculate_copy_visibility_attribute_set(NEW.id)
243 ELSIF TG_TABLE_NAME = 'record_entry' THEN
244 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
245 ELSIF TG_TABLE_NAME = 'call_number' AND NEW.label = '##URI##' AND dobib THEN -- New located URI
246 UPDATE biblio.record_entry
247 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
248 WHERE id = NEW.record;
255 -- handle items first, since with circulation activity
256 -- their statuses change frequently
257 IF TG_TABLE_NAME IN ('copy', 'unit') THEN -- This handles ON UPDATE OR DELETE. ON INSERT above
259 IF TG_OP = 'DELETE' THEN -- Shouldn't get here, normally
260 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
264 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
266 IF OLD.deleted <> NEW.deleted THEN
268 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
270 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
273 asset.calculate_copy_visibility_attribute_set(NEW.id)
278 ELSIF OLD.location <> NEW.location OR
279 OLD.status <> NEW.status OR
280 OLD.opac_visible <> NEW.opac_visible OR
281 OLD.circ_lib <> NEW.circ_lib OR
282 OLD.call_number <> NEW.call_number
284 IF OLD.call_number <> NEW.call_number THEN -- Special check since it's more expensive than the next branch
285 SELECT * INTO ocn FROM asset.call_number cn WHERE id = OLD.call_number;
287 IF ncn.record <> ocn.record THEN
288 -- We have to use a record-specific WHERE clause
289 -- to avoid modifying the entries for peer-bib copies.
290 UPDATE asset.copy_vis_attr_cache
291 SET target_copy = NEW.id,
293 WHERE target_copy = OLD.id
294 AND record = ocn.record;
298 -- Any of these could change visibility, but
299 -- we'll save some queries and not try to calculate
300 -- the change directly. We want to update peer-bib
301 -- entries in this case, unlike above.
302 UPDATE asset.copy_vis_attr_cache
303 SET target_copy = NEW.id,
304 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(NEW.id)
305 WHERE target_copy = OLD.id;
309 ELSIF TG_TABLE_NAME = 'call_number' THEN
311 IF TG_OP = 'DELETE' AND OLD.label = '##URI##' AND dobib THEN -- really deleted located URI, if the delete protection rule is disabled...
312 UPDATE biblio.record_entry
313 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
314 WHERE id = OLD.record;
318 IF OLD.label = '##URI##' AND dobib THEN -- Located URI
319 IF OLD.deleted <> NEW.deleted OR OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
320 UPDATE biblio.record_entry
321 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
322 WHERE id = NEW.record;
324 IF OLD.record <> NEW.record THEN -- maybe on merge?
325 UPDATE biblio.record_entry
326 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
327 WHERE id = OLD.record;
331 ELSIF OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
332 UPDATE asset.copy_vis_attr_cache
333 SET record = NEW.record,
334 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(target_copy)
335 WHERE target_copy IN (SELECT id FROM asset.copy WHERE call_number = NEW.id)
336 AND record = OLD.record;
340 ELSIF TG_TABLE_NAME = 'record_entry' AND OLD.source IS DISTINCT FROM NEW.source THEN -- Only handles ON UPDATE, INSERT above
341 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
346 $func$ LANGUAGE PLPGSQL;
348 CREATE TRIGGER z_opac_vis_mat_view_tgr BEFORE INSERT OR UPDATE ON biblio.record_entry FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
349 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR DELETE ON biblio.peer_bib_copy_map FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
350 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE OR DELETE ON asset.call_number FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
351 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
352 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
353 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
354 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
356 CREATE OR REPLACE FUNCTION asset.all_visible_flags () RETURNS TEXT AS $f$
357 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(1 << x, 'copy_flags')),'&') || ')'
358 FROM GENERATE_SERIES(0,0) AS x; -- increment as new flags are added.
359 $f$ LANGUAGE SQL STABLE;
361 CREATE OR REPLACE FUNCTION asset.visible_orgs (otype TEXT) RETURNS TEXT AS $f$
362 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
365 $f$ LANGUAGE SQL STABLE;
367 CREATE OR REPLACE FUNCTION asset.invisible_orgs (otype TEXT) RETURNS TEXT AS $f$
368 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, $1)),'|') || ')'
370 WHERE NOT opac_visible;
371 $f$ LANGUAGE SQL STABLE;
373 -- Bib-oriented defaults for search
374 CREATE OR REPLACE FUNCTION asset.bib_source_default () RETURNS TEXT AS $f$
375 SELECT '(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'bib_source')),'|') || ')'
376 FROM config.bib_source
378 $f$ LANGUAGE SQL IMMUTABLE;
380 CREATE OR REPLACE FUNCTION asset.luri_org_default () RETURNS TEXT AS $f$
381 SELECT * FROM asset.invisible_orgs('luri_org');
382 $f$ LANGUAGE SQL STABLE;
384 -- Copy-oriented defaults for search
385 CREATE OR REPLACE FUNCTION asset.location_group_default () RETURNS TEXT AS $f$
386 SELECT '!()'::TEXT; -- For now, as there's no way to cause a location group to hide all copies.
388 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location_group')),'|') || ')'
389 FROM asset.copy_location_group
390 WHERE NOT opac_visible;
392 $f$ LANGUAGE SQL IMMUTABLE;
394 CREATE OR REPLACE FUNCTION asset.location_default () RETURNS TEXT AS $f$
395 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'location')),'|') || ')'
396 FROM asset.copy_location
397 WHERE NOT opac_visible;
398 $f$ LANGUAGE SQL STABLE;
400 CREATE OR REPLACE FUNCTION asset.status_default () RETURNS TEXT AS $f$
401 SELECT '!(' || ARRAY_TO_STRING(ARRAY_AGG(search.calculate_visibility_attribute(id, 'status')),'|') || ')'
402 FROM config.copy_status
403 WHERE NOT opac_visible;
404 $f$ LANGUAGE SQL STABLE;
406 CREATE OR REPLACE FUNCTION asset.owning_lib_default () RETURNS TEXT AS $f$
407 SELECT * FROM asset.invisible_orgs('owning_lib');
408 $f$ LANGUAGE SQL STABLE;
410 CREATE OR REPLACE FUNCTION asset.circ_lib_default () RETURNS TEXT AS $f$
411 SELECT * FROM asset.invisible_orgs('circ_lib');
412 $f$ LANGUAGE SQL STABLE;
414 CREATE OR REPLACE FUNCTION asset.patron_default_visibility_mask () RETURNS TABLE (b_attrs TEXT, c_attrs TEXT) AS $f$
416 copy_flags TEXT; -- "c" attr
418 owning_lib TEXT; -- "c" attr
419 circ_lib TEXT; -- "c" attr
420 status TEXT; -- "c" attr
421 location TEXT; -- "c" attr
422 location_group TEXT; -- "c" attr
424 luri_org TEXT; -- "b" attr
425 bib_sources TEXT; -- "b" attr
427 bib_tests TEXT := '';
429 copy_flags := asset.all_visible_flags(); -- Will always have at least one
431 owning_lib := NULLIF(asset.owning_lib_default(),'!()');
433 circ_lib := NULLIF(asset.circ_lib_default(),'!()');
434 status := NULLIF(asset.status_default(),'!()');
435 location := NULLIF(asset.location_default(),'!()');
436 location_group := NULLIF(asset.location_group_default(),'!()');
438 -- LURIs will be handled at the perl layer directly
439 -- luri_org := NULLIF(asset.luri_org_default(),'!()');
440 bib_sources := NULLIF(asset.bib_source_default(),'()');
443 IF luri_org IS NOT NULL AND bib_sources IS NOT NULL THEN
444 bib_tests := '('||ARRAY_TO_STRING( ARRAY[luri_org,bib_sources], '|')||')&('||luri_org||')&';
445 ELSIF luri_org IS NOT NULL THEN
446 bib_tests := luri_org || '&';
447 ELSIF bib_sources IS NOT NULL THEN
448 bib_tests := bib_sources || '|';
451 RETURN QUERY SELECT bib_tests,
452 '('||ARRAY_TO_STRING(
453 ARRAY[copy_flags,owning_lib,circ_lib,status,location,location_group]::TEXT[],
457 $f$ LANGUAGE PLPGSQL STABLE ROWS 1;
459 CREATE OR REPLACE FUNCTION metabib.suggest_browse_entries(raw_query_text text, search_class text, headline_opts text, visibility_org integer, query_limit integer, normalization integer)
460 RETURNS TABLE(value text, field integer, buoyant_and_class_match boolean, field_match boolean, field_weight integer, rank real, buoyant boolean, match text)
463 prepared_query_texts TEXT[];
466 opac_visibility_join TEXT;
467 search_class_join TEXT;
471 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
473 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
474 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
476 visibility_org := NULLIF(visibility_org,-1);
477 IF visibility_org IS NOT NULL THEN
478 PERFORM FROM actor.org_unit WHERE id = visibility_org AND parent_ou IS NULL;
480 opac_visibility_join := '';
482 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
484 b_tests := search.calculate_visibility_attribute_test(
486 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(visibility_org))
489 b_tests := search.calculate_visibility_attribute_test(
491 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(visibility_org))
494 opac_visibility_join := '
495 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = x.source)
496 LEFT JOIN biblio.record_entry b ON (b.id = x.source)
497 JOIN vm ON (acvac.vis_attr_vector @@
498 (vm.c_attrs || $$&$$ ||
499 search.calculate_visibility_attribute_test(
501 (SELECT ARRAY_AGG(id) FROM actor.org_unit_descendants($4))
504 ) OR (b.vis_attr_vector @@ $$' || b_tests || '$$::query_int)
508 opac_visibility_join := '';
511 -- The following determines whether we only provide suggestsons matching
512 -- the user's selected search_class, or whether we show other suggestions
513 -- too. The reason for MIN() is that for search_classes like
514 -- 'title|proper|uniform' you would otherwise get multiple rows. The
515 -- implication is that if title as a class doesn't have restrict,
516 -- nor does the proper field, but the uniform field does, you're going
517 -- to get 'false' for your overall evaluation of 'should we restrict?'
518 -- To invert that, change from MIN() to MAX().
522 MIN(cmc.restrict::INT) AS restrict_class,
523 MIN(cmf.restrict::INT) AS restrict_field
524 FROM metabib.search_class_to_registered_components(search_class)
525 AS _registered (field_class TEXT, field INT)
527 config.metabib_class cmc ON (cmc.name = _registered.field_class)
529 config.metabib_field cmf ON (cmf.id = _registered.field);
531 -- evaluate 'should we restrict?'
532 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
533 search_class_join := '
535 metabib.search_class_to_registered_components($2)
536 AS _registered (field_class TEXT, field INT) ON (
537 (_registered.field IS NULL AND
538 _registered.field_class = cmf.field_class) OR
539 (_registered.field = cmf.id)
543 search_class_join := '
545 metabib.search_class_to_registered_components($2)
546 AS _registered (field_class TEXT, field INT) ON (
547 _registered.field_class = cmc.name
552 RETURN QUERY EXECUTE '
553 WITH vm AS ( SELECT * FROM asset.patron_default_visibility_mask() ),
554 mbe AS (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000)
563 TS_HEADLINE(value, $7, $3)
564 FROM (SELECT DISTINCT
567 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
568 _registered.field = cmf.id AS restrict,
570 TS_RANK_CD(mbe.index_vector, $1, $6),
573 FROM metabib.browse_entry_def_map mbedm
574 JOIN mbe ON (mbe.id = mbedm.entry)
575 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
576 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
577 ' || search_class_join || '
578 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
580 ' || opac_visibility_join || '
581 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
583 ' -- sic, repeat the order by clause in the outer select too
585 query, search_class, headline_opts,
586 visibility_org, query_limit, normalization, plain_query
590 -- buoyant AND chosen class = match class
591 -- chosen field = match field
598 $f$ LANGUAGE plpgsql ROWS 10;
600 CREATE OR REPLACE FUNCTION metabib.staged_browse(query text, fields integer[], context_org integer, context_locations integer[], staff boolean, browse_superpage_size integer, count_up_from_zero boolean, result_limit integer, next_pivot_pos integer)
601 RETURNS SETOF metabib.flat_browse_entry_appearance
610 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
611 results_skipped INT := 0;
612 row_counter INT := 0;
617 all_records BIGINT[];
618 all_brecords BIGINT[];
619 all_arecords BIGINT[];
620 superpage_of_records BIGINT[];
625 unauthorized_entry RECORD;
627 IF count_up_from_zero THEN
634 SELECT x.c_attrs, x.b_attrs INTO c_tests, b_tests FROM asset.patron_default_visibility_mask() x;
637 -- b_tests supplies its own query_int operator, c_tests does not
638 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
640 SELECT ARRAY_AGG(id) INTO c_orgs FROM actor.org_unit_descendants(context_org);
642 c_tests := c_tests || search.calculate_visibility_attribute_test('circ_lib',c_orgs)
643 || '&' || search.calculate_visibility_attribute_test('owning_lib',c_orgs);
645 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
647 b_tests := b_tests || search.calculate_visibility_attribute_test(
649 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(context_org) x)
652 b_tests := b_tests || search.calculate_visibility_attribute_test(
654 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(context_org) x)
658 IF context_locations THEN
659 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
660 c_tests := c_tests || search.calculate_visibility_attribute_test('location',context_locations);
663 OPEN curs NO SCROLL FOR EXECUTE query;
668 IF result_row.pivot_point IS NOT NULL THEN
669 RETURN NEXT result_row;
675 SELECT INTO unauthorized_entry *
676 FROM metabib.browse_entry_simple_heading_map mbeshm
677 INNER JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
678 INNER JOIN authority.control_set_authority_field acsaf ON ( acsaf.id = ash.atag )
679 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
680 WHERE mbeshm.entry = rec.id
681 AND ahf.heading_purpose = 'variant';
683 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
684 IF (unauthorized_entry.record IS NOT NULL) THEN
685 --unauthorized term belongs to an auth linked to a bib?
686 SELECT INTO all_arecords, result_row.sees, afields
687 ARRAY_AGG(DISTINCT abl.bib),
688 STRING_AGG(DISTINCT abl.authority::TEXT, $$,$$),
689 ARRAY_AGG(DISTINCT map.metabib_field)
690 FROM authority.bib_linking abl
691 INNER JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
692 map.authority_field = unauthorized_entry.atag
693 AND map.metabib_field = ANY(fields)
695 WHERE abl.authority = unauthorized_entry.record;
698 SELECT INTO all_arecords, result_row.sees, afields
699 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
700 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
701 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
703 FROM metabib.browse_entry_simple_heading_map mbeshm
704 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
705 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
706 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
707 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
708 ash.atag = map.authority_field
709 AND map.metabib_field = ANY(fields)
711 JOIN authority.control_set_authority_field acsaf ON (
712 map.authority_field = acsaf.id
714 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
715 WHERE mbeshm.entry = rec.id
716 AND ahf.heading_purpose = 'variant';
720 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
721 SELECT INTO all_brecords, result_row.authorities, bfields
722 ARRAY_AGG(DISTINCT source),
723 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
724 ARRAY_AGG(DISTINCT def)
725 FROM metabib.browse_entry_def_map
727 AND def = ANY(fields);
729 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
731 result_row.sources := 0;
732 result_row.asources := 0;
734 -- Bib-linked vis checking
735 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
737 SELECT INTO result_row.sources COUNT(DISTINCT b.id)
738 FROM biblio.record_entry b
739 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
740 WHERE b.id = ANY(all_brecords[1:browse_superpage_size])
742 acvac.vis_attr_vector @@ c_tests::query_int
743 OR b.vis_attr_vector @@ b_tests::query_int
746 result_row.accurate := TRUE;
750 -- Authority-linked vis checking
751 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
753 SELECT INTO result_row.asources COUNT(DISTINCT b.id)
754 FROM biblio.record_entry b
755 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
756 WHERE b.id = ANY(all_arecords[1:browse_superpage_size])
758 acvac.vis_attr_vector @@ c_tests::query_int
759 OR b.vis_attr_vector @@ b_tests::query_int
762 result_row.aaccurate := TRUE;
766 IF result_row.sources > 0 OR result_row.asources > 0 THEN
768 -- The function that calls this function needs row_number in order
769 -- to correctly order results from two different runs of this
771 result_row.row_number := row_number;
773 -- Now, if row_counter is still less than limit, return a row. If
774 -- not, but it is less than next_pivot_pos, continue on without
775 -- returning actual result rows until we find
776 -- that next pivot, and return it.
778 IF row_counter < result_limit THEN
779 result_row.browse_entry := rec.id;
780 result_row.value := rec.value;
782 RETURN NEXT result_row;
784 result_row.browse_entry := NULL;
785 result_row.authorities := NULL;
786 result_row.fields := NULL;
787 result_row.value := NULL;
788 result_row.sources := NULL;
789 result_row.sees := NULL;
790 result_row.accurate := NULL;
791 result_row.aaccurate := NULL;
792 result_row.pivot_point := rec.id;
794 IF row_counter >= next_pivot_pos THEN
795 RETURN NEXT result_row;
800 IF count_up_from_zero THEN
801 row_number := row_number + 1;
803 row_number := row_number - 1;
806 -- row_counter is different from row_number.
807 -- It simply counts up from zero so that we know when
808 -- we've reached our limit.
809 row_counter := row_counter + 1;
813 $f$ LANGUAGE plpgsql ROWS 10;
815 CREATE OR REPLACE FUNCTION metabib.browse(search_field integer[], browse_term text, context_org integer DEFAULT NULL::integer, context_loc_group integer DEFAULT NULL::integer, staff boolean DEFAULT false, pivot_id bigint DEFAULT NULL::bigint, result_limit integer DEFAULT 10)
816 RETURNS SETOF metabib.flat_browse_entry_appearance
822 pivot_sort_value TEXT;
823 pivot_sort_fallback TEXT;
824 context_locations INT[];
825 browse_superpage_size INT;
826 results_skipped INT := 0;
830 forward_to_pivot INT;
832 -- First, find the pivot if we were given a browse term but not a pivot.
833 IF pivot_id IS NULL THEN
834 pivot_id := metabib.browse_pivot(search_field, browse_term);
837 SELECT INTO pivot_sort_value, pivot_sort_fallback
838 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
840 -- Bail if we couldn't find a pivot.
841 IF pivot_sort_value IS NULL THEN
845 -- Transform the context_loc_group argument (if any) (logc at the
846 -- TPAC layer) into a form we'll be able to use.
847 IF context_loc_group IS NOT NULL THEN
848 SELECT INTO context_locations ARRAY_AGG(location)
849 FROM asset.copy_location_group_map
850 WHERE lgroup = context_loc_group;
853 -- Get the configured size of browse superpages.
854 SELECT INTO browse_superpage_size COALESCE(value::INT,100) -- NULL ok
855 FROM config.global_flag
856 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
858 -- First we're going to search backward from the pivot, then we're going
859 -- to search forward. In each direction, we need two limits. At the
860 -- lesser of the two limits, we delineate the edge of the result set
861 -- we're going to return. At the greater of the two limits, we find the
862 -- pivot value that would represent an offset from the current pivot
863 -- at a distance of one "page" in either direction, where a "page" is a
864 -- result set of the size specified in the "result_limit" argument.
866 -- The two limits in each direction make four derived values in total,
867 -- and we calculate them now.
868 back_limit := CEIL(result_limit::FLOAT / 2);
869 back_to_pivot := result_limit;
870 forward_limit := result_limit / 2;
871 forward_to_pivot := result_limit - 1;
873 -- This is the meat of the SQL query that finds browse entries. We'll
874 -- pass this to a function which uses it with a cursor, so that individual
875 -- rows may be fetched in a loop until some condition is satisfied, without
876 -- waiting for a result set of fixed size to be collected all at once.
881 FROM metabib.browse_entry mbe
883 EXISTS ( -- are there any bibs using this mbe via the requested fields?
885 FROM metabib.browse_entry_def_map mbedm
886 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
887 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
889 FROM metabib.browse_entry_simple_heading_map mbeshm
890 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
891 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
892 ash.atag = map.authority_field
893 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
895 JOIN authority.control_set_authority_field acsaf ON (
896 map.authority_field = acsaf.id
898 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
899 WHERE mbeshm.entry = mbe.id
900 AND ahf.heading_purpose IN (' || $$'variant'$$ || ')
901 -- and authority that variant is coming from is linked to a bib
904 FROM metabib.browse_entry_def_map mbedm2
905 WHERE mbedm2.authority = ash.record AND mbedm2.def = ANY(' || quote_literal(search_field) || ')
911 -- This is the variant of the query for browsing backward.
912 back_query := core_query ||
913 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
914 ' ORDER BY mbe.sort_value DESC, mbe.value DESC LIMIT 1000';
916 -- This variant browses forward.
917 forward_query := core_query ||
918 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
919 ' ORDER BY mbe.sort_value, mbe.value LIMIT 1000';
921 -- We now call the function which applies a cursor to the provided
922 -- queries, stopping at the appropriate limits and also giving us
923 -- the next page's pivot.
925 SELECT * FROM metabib.staged_browse(
926 back_query, search_field, context_org, context_locations,
927 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
929 SELECT * FROM metabib.staged_browse(
930 forward_query, search_field, context_org, context_locations,
931 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
932 ) ORDER BY row_number DESC;
935 $f$ LANGUAGE plpgsql ROWS 10;
937 CREATE OR REPLACE FUNCTION metabib.browse(
940 context_org INT DEFAULT NULL,
941 context_loc_group INT DEFAULT NULL,
942 staff BOOL DEFAULT FALSE,
943 pivot_id BIGINT DEFAULT NULL,
944 result_limit INT DEFAULT 10
945 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
947 RETURN QUERY SELECT * FROM metabib.browse(
948 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
949 FROM config.metabib_field WHERE field_class = search_class),
958 $p$ LANGUAGE PLPGSQL ROWS 10;
960 CREATE OR REPLACE VIEW search.best_tsconfig AS
962 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
963 FROM config.metabib_field m
964 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
965 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND f.index_weight = 'C');
967 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
969 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
972 field_list INT[] DEFAULT '{}'::INT[],
973 css_class TEXT DEFAULT 'oils_SH',
974 hl_all BOOL DEFAULT TRUE,
975 minwords INT DEFAULT 5,
976 maxwords INT DEFAULT 25,
977 shortwords INT DEFAULT 0,
978 maxfrags INT DEFAULT 0,
979 delimiter TEXT DEFAULT ' ... '
980 ) RETURNS SETOF search.highlight_result AS $f$
983 v_css_class TEXT := css_class;
984 v_delimiter TEXT := delimiter;
985 v_field_list INT[] := field_list;
988 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
989 v_delimiter := ' ... ';
993 opts := opts || 'MinWords=' || minwords;
994 opts := opts || ', MaxWords=' || maxwords;
995 opts := opts || ', ShortWords=' || shortwords;
996 opts := opts || ', MaxFragments=' || maxfrags;
997 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
999 opts := opts || 'HighlightAll=TRUE';
1002 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
1003 v_css_class := 'oils_SH';
1006 opts := opts || $$, StopSel=</b>, StartSel="<b class='$$ || v_css_class; -- "
1008 IF v_field_list = '{}'::INT[] THEN
1009 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
1016 evergreen.escape_for_html(de.value) AS value,
1018 ts_config::REGCONFIG,
1019 evergreen.escape_for_html(de.value),
1020 $$ || quote_literal(tsq) || $$,
1021 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
1023 FROM metabib.display_entry de
1024 JOIN config.metabib_field mf ON (mf.id = de.field)
1025 JOIN search.best_tsconfig t ON (t.id = de.field)
1026 WHERE de.source = $2
1027 AND field = ANY ($3)
1030 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
1032 $f$ LANGUAGE PLPGSQL;
1034 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
1035 SELECT regexp_replace(
1051 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
1053 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
1055 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
1056 css_class TEXT DEFAULT 'oils_SH',
1057 hl_all BOOL DEFAULT TRUE,
1058 minwords INT DEFAULT 5,
1059 maxwords INT DEFAULT 25,
1060 shortwords INT DEFAULT 0,
1061 maxfrags INT DEFAULT 0,
1062 delimiter TEXT DEFAULT ' ... '
1063 ) RETURNS SETOF search.highlight_result AS $f$
1071 IF (tsq_map ILIKE 'hstore%') THEN
1072 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
1074 tsq_hstore := tsq_map::HSTORE;
1077 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore::HSTORE) LOOP
1078 SELECT ARRAY_AGG(unnest::INT) INTO afields
1079 FROM unnest(regexp_split_to_array(fields,','));
1080 seen := seen || afields;
1083 SELECT * FROM search.highlight_display_fields_impl(
1084 rid, tsq, afields, css_class, hl_all,minwords,
1085 maxwords, shortwords, maxfrags, delimiter
1093 evergreen.escape_for_html(value) AS value,
1094 evergreen.escape_for_html(value) AS highlight
1095 FROM metabib.display_entry
1097 AND NOT (field = ANY (seen));
1099 $f$ LANGUAGE PLPGSQL ROWS 10;
1101 -- SymSpell implementation follows
1103 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
1104 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
1106 ) RETURNS TEXT[] AS $F$
1107 SELECT NULLIF(ARRAY(
1108 SELECT * FROM UNNEST($1) x
1110 SELECT * FROM UNNEST($2) y
1114 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1115 use String::KeyboardDistance qw(:all);
1116 return qwerty_keyboard_distance(@_);
1117 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1119 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1120 use String::KeyboardDistance qw(:all);
1121 return qwerty_keyboard_distance_match(@_);
1122 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1124 CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
1125 use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
1126 return xs_edistance(@_);
1127 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1129 CREATE TABLE search.symspell_dictionary (
1130 keyword_count INT NOT NULL DEFAULT 0,
1131 title_count INT NOT NULL DEFAULT 0,
1132 author_count INT NOT NULL DEFAULT 0,
1133 subject_count INT NOT NULL DEFAULT 0,
1134 series_count INT NOT NULL DEFAULT 0,
1135 identifier_count INT NOT NULL DEFAULT 0,
1137 prefix_key TEXT PRIMARY KEY,
1139 keyword_suggestions TEXT[],
1140 title_suggestions TEXT[],
1141 author_suggestions TEXT[],
1142 subject_suggestions TEXT[],
1143 series_suggestions TEXT[],
1144 identifier_suggestions TEXT[]
1145 ) WITH (fillfactor = 80);
1147 CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
1148 RETURNS SETOF TEXT AS $F$
1149 SELECT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1150 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1152 -- This version does not preserve input word order!
1153 CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
1154 RETURNS SETOF TEXT AS $F$
1155 SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1156 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1158 CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
1165 woChars := regexp_split_to_array(withoutCase,'');
1166 FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
1167 IF curr = evergreen.uppercase(curr) THEN
1168 woChars[ind] := evergreen.uppercase(woChars[ind]);
1172 RETURN ARRAY_TO_STRING(woChars,'');
1174 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1176 CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
1180 ) RETURNS TEXT[] AS $F$
1183 list TEXT[] := '{}';
1184 sublist TEXT[] := '{}';
1186 FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
1187 item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
1188 IF NOT list @> ARRAY[item] THEN
1189 list := item || list;
1190 IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
1191 sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
1197 RETURN evergreen.text_array_merge_unique(list, sublist);
1199 RETURN list || sublist;
1202 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1204 -- DROP TYPE search.symspell_lookup_output CASCADE;
1205 CREATE TYPE search.symspell_lookup_output AS (
1207 suggestion_count INT,
1209 pg_trgm_sim NUMERIC,
1210 qwerty_kb_match NUMERIC,
1211 soundex_sim NUMERIC,
1215 prefix_key_count INT,
1220 CREATE OR REPLACE FUNCTION search.symspell_lookup(
1223 verbosity integer DEFAULT 2,
1224 xfer_case boolean DEFAULT false,
1225 count_threshold integer DEFAULT 1,
1226 soundex_weight integer DEFAULT 0,
1227 pg_trgm_weight integer DEFAULT 0,
1228 kbdist_weight integer DEFAULT 0
1229 ) RETURNS SETOF search.symspell_lookup_output LANGUAGE plpgsql AS $function$
1235 edit_list TEXT[] := '{}';
1236 seen_list TEXT[] := '{}';
1237 output search.symspell_lookup_output;
1238 output_list search.symspell_lookup_output[];
1246 smallest_ed INT := -1;
1251 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1252 prefix_length := COALESCE(prefix_length, 6);
1254 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1255 maxED := COALESCE(maxED, 3);
1257 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
1259 -- Common case exact match test for preformance
1260 IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
1262 'SELECT '||search_class||'_suggestions AS suggestions,
1263 '||search_class||'_count AS count,
1265 FROM search.symspell_dictionary
1266 WHERE prefix_key = $1
1267 AND '||search_class||'_count >= $2
1268 AND '||search_class||'_suggestions @> ARRAY[$1]'
1269 INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
1270 IF entry.prefix_key IS NOT NULL THEN
1271 output.lev_distance := 0; -- definitionally
1272 output.prefix_key := entry.prefix_key;
1273 output.prefix_key_count := entry.count;
1274 output.suggestion_count := entry.count;
1275 output.input := word_list[1];
1277 output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
1279 output.suggestion := entry.prefix_key;
1281 output.norm_input := entry.prefix_key;
1282 output.qwerty_kb_match := 1;
1283 output.pg_trgm_sim := 1;
1284 output.soundex_sim := 1;
1291 FOREACH word IN ARRAY word_list LOOP
1293 input := evergreen.lowercase(word);
1294 i_len := CHARACTER_LENGTH(input);
1297 IF CHARACTER_LENGTH(input) > prefix_length THEN
1298 prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
1299 edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
1301 edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
1304 SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
1306 output_list := '{}';
1311 FOREACH entry_key IN ARRAY edit_list LOOP
1313 IF global_ed IS NOT NULL THEN
1314 smallest_ed := global_ed;
1317 FOR entry IN EXECUTE
1318 'SELECT '||search_class||'_suggestions AS suggestions,
1319 '||search_class||'_count AS count,
1321 FROM search.symspell_dictionary
1322 WHERE prefix_key = $1
1323 AND '||search_class||'_suggestions IS NOT NULL'
1329 ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
1330 ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
1334 FROM UNNEST(entry.suggestions) s
1335 WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
1336 AND NOT seen_list @> ARRAY[s];
1338 CONTINUE WHEN good_suggs IS NULL;
1340 FOR sugg, output.suggestion_count IN EXECUTE
1341 'SELECT prefix_key, '||search_class||'_count
1342 FROM search.symspell_dictionary
1343 WHERE prefix_key = ANY ($1)
1344 AND '||search_class||'_count >= $2'
1345 USING AKEYS(good_suggs), COALESCE(count_threshold,1)
1348 output.lev_distance := good_suggs->sugg;
1349 seen_list := seen_list || sugg;
1351 -- Track the smallest edit distance among suggestions from this prefix key.
1352 IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
1353 smallest_ed := output.lev_distance;
1356 -- Track the smallest edit distance for all prefix keys for this word.
1357 IF global_ed IS NULL OR smallest_ed < global_ed THEN
1358 global_ed = smallest_ed;
1359 -- And if low verbosity, ignore suggs with a larger distance from here on.
1360 IF verbosity <= 1 THEN
1361 l_maxED := global_ed;
1365 -- Lev distance is our main similarity measure. While
1366 -- trgm or soundex similarity could be the main filter,
1367 -- Lev is both language agnostic and faster.
1369 -- Here we will skip suggestions that have a longer edit distance
1370 -- than the shortest we've already found. This is simply an
1371 -- optimization that allows us to avoid further processing
1372 -- of this entry. It would be filtered out later.
1373 CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
1375 -- If we have an exact match on the suggestion key we can also avoid
1376 -- some function calls.
1377 IF output.lev_distance = 0 THEN
1378 output.qwerty_kb_match := 1;
1379 output.pg_trgm_sim := 1;
1380 output.soundex_sim := 1;
1382 IF kbdist_weight THEN
1383 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
1385 output.qwerty_kb_match := 0;
1387 IF pg_trgm_weight THEN
1388 output.pg_trgm_sim := similarity(input, sugg);
1390 output.pg_trgm_sim := 0;
1392 IF soundex_weight THEN
1393 output.soundex_sim := difference(input, sugg) / 4.0;
1395 output.soundex_sim := 0;
1399 -- Fill in some fields
1400 IF xfer_case AND input <> word THEN
1401 output.suggestion := search.symspell_transfer_casing(word, sugg);
1403 output.suggestion := sugg;
1405 output.prefix_key := entry.prefix_key;
1406 output.prefix_key_count := entry.count;
1407 output.input := word;
1408 output.norm_input := input;
1409 output.word_pos := w_pos;
1411 -- We can't "cache" a set of generated records directly, so
1412 -- here we build up an array of search.symspell_lookup_output
1413 -- records that we can revivicate later as a table using UNNEST().
1414 output_list := output_list || output;
1416 EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
1417 CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
1419 END LOOP; -- loop over suggestions
1420 END LOOP; -- loop over entries
1421 END LOOP; -- loop over entry_keys
1423 -- Now we're done examining this word
1424 IF verbosity = 0 THEN
1425 -- Return the "best" suggestion from the smallest edit
1426 -- distance group. We define best based on the weighting
1427 -- of the non-lev similarity measures and use the suggestion
1428 -- use count to break ties.
1430 SELECT * FROM UNNEST(output_list)
1431 ORDER BY lev_distance,
1432 (soundex_sim * COALESCE(soundex_weight,0))
1433 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1434 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1435 suggestion_count DESC
1437 ELSIF verbosity = 1 THEN
1438 -- Return all suggestions from the smallest
1439 -- edit distance group.
1441 SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
1442 ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
1443 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1444 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1445 suggestion_count DESC;
1446 ELSIF verbosity = 2 THEN
1447 -- Return everything we find, along with relevant stats
1449 SELECT * FROM UNNEST(output_list)
1450 ORDER BY lev_distance,
1451 (soundex_sim * COALESCE(soundex_weight,0))
1452 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1453 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1454 suggestion_count DESC;
1455 ELSIF verbosity = 3 THEN
1456 -- Return everything we find from the two smallest edit distance groups
1458 SELECT * FROM UNNEST(output_list)
1459 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
1460 ORDER BY lev_distance,
1461 (soundex_sim * COALESCE(soundex_weight,0))
1462 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1463 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1464 suggestion_count DESC;
1465 ELSIF verbosity = 4 THEN
1466 -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
1468 SELECT * FROM UNNEST(output_list)
1469 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
1470 ORDER BY lev_distance,
1471 (soundex_sim * COALESCE(soundex_weight,0))
1472 + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
1473 + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
1474 suggestion_count DESC;
1476 END LOOP; -- loop over words
1480 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
1483 no_limit BOOL DEFAULT FALSE,
1484 prefix_length INT DEFAULT 6,
1486 ) RETURNS SETOF search.symspell_dictionary AS $F$
1491 entry search.symspell_dictionary%ROWTYPE;
1495 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
1496 key := SUBSTRING(key FROM 1 FOR prefix_length);
1497 key_list := ARRAY[raw_input, key];
1499 key_list := ARRAY[key];
1502 FOREACH del_key IN ARRAY key_list LOOP
1504 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1506 entry.prefix_key := del_key;
1508 entry.keyword_count := 0;
1509 entry.title_count := 0;
1510 entry.author_count := 0;
1511 entry.subject_count := 0;
1512 entry.series_count := 0;
1513 entry.identifier_count := 0;
1515 entry.keyword_suggestions := '{}';
1516 entry.title_suggestions := '{}';
1517 entry.author_suggestions := '{}';
1518 entry.subject_suggestions := '{}';
1519 entry.series_suggestions := '{}';
1520 entry.identifier_suggestions := '{}';
1522 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1523 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1524 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1525 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1526 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1527 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1528 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1530 IF del_key = raw_input THEN
1531 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
1532 IF source_class = 'title' THEN entry.title_count := 1; END IF;
1533 IF source_class = 'author' THEN entry.author_count := 1; END IF;
1534 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
1535 IF source_class = 'series' THEN entry.series_count := 1; END IF;
1536 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
1542 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
1545 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
1546 -- skip suggestions that are already too long for the prefix key
1547 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
1549 entry.keyword_suggestions := '{}';
1550 entry.title_suggestions := '{}';
1551 entry.author_suggestions := '{}';
1552 entry.subject_suggestions := '{}';
1553 entry.series_suggestions := '{}';
1554 entry.identifier_suggestions := '{}';
1556 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
1557 IF source_class = 'title' THEN entry.title_count := 0; END IF;
1558 IF source_class = 'author' THEN entry.author_count := 0; END IF;
1559 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
1560 IF source_class = 'series' THEN entry.series_count := 0; END IF;
1561 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
1563 entry.prefix_key := del_key;
1565 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1566 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
1567 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
1568 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
1569 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
1570 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
1571 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
1577 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1579 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
1582 old_input TEXT DEFAULT NULL,
1583 include_phrases BOOL DEFAULT FALSE
1584 ) RETURNS SETOF search.symspell_dictionary AS $F$
1591 entry search.symspell_dictionary;
1593 IF full_input IS NOT NULL THEN
1594 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1595 prefix_length := COALESCE(prefix_length, 6);
1597 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1598 maxED := COALESCE(maxED, 3);
1600 input := evergreen.lowercase(full_input);
1601 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
1602 IF word_list IS NULL THEN
1606 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
1607 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
1610 FOREACH word IN ARRAY word_list LOOP
1611 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
1612 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1613 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
1617 IF old_input IS NOT NULL THEN
1618 input := evergreen.lowercase(old_input);
1620 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
1621 -- similarly skip words that have 5 or more digits here to
1622 -- avoid adding erroneous prefix deletion entries to the dictionary
1623 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
1624 entry.prefix_key := word;
1626 entry.keyword_count := 0;
1627 entry.title_count := 0;
1628 entry.author_count := 0;
1629 entry.subject_count := 0;
1630 entry.series_count := 0;
1631 entry.identifier_count := 0;
1633 entry.keyword_suggestions := '{}';
1634 entry.title_suggestions := '{}';
1635 entry.author_suggestions := '{}';
1636 entry.subject_suggestions := '{}';
1637 entry.series_suggestions := '{}';
1638 entry.identifier_suggestions := '{}';
1640 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
1641 IF source_class = 'title' THEN entry.title_count := -1; END IF;
1642 IF source_class = 'author' THEN entry.author_count := -1; END IF;
1643 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
1644 IF source_class = 'series' THEN entry.series_count := -1; END IF;
1645 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
1651 $F$ LANGUAGE PLPGSQL;
1653 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
1656 old_input TEXT DEFAULT NULL,
1657 include_phrases BOOL DEFAULT FALSE
1658 ) RETURNS SETOF search.symspell_dictionary AS $F$
1661 conflict_entry RECORD;
1664 IF full_input = old_input THEN -- neither NULL, and are the same
1668 FOR new_entry IN EXECUTE $q$
1672 FROM (SELECT prefix_key,
1673 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
1674 SUM($q$ || source_class || $q$_count) count
1675 FROM search.symspell_build_entries($1, $2, $3, $4)
1677 $q$ USING full_input, source_class, old_input, include_phrases
1681 $q$ || source_class || $q$_suggestions suggestions,
1682 $q$ || source_class || $q$_count count
1683 FROM search.symspell_dictionary
1684 WHERE prefix_key = $1 $q$
1686 USING new_entry.prefix_key;
1688 IF new_entry.count <> 0 THEN -- Real word, and count changed
1689 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
1690 IF conflict_entry.count > 0 THEN -- it's a real word
1691 RETURN QUERY EXECUTE $q$
1692 UPDATE search.symspell_dictionary
1693 SET $q$ || source_class || $q$_count = $2
1694 WHERE prefix_key = $1
1696 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
1697 ELSE -- it was a prefix key or delete-emptied word before
1698 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
1699 RETURN QUERY EXECUTE $q$
1700 UPDATE search.symspell_dictionary
1701 SET $q$ || source_class || $q$_count = $2
1702 WHERE prefix_key = $1
1704 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
1705 ELSE -- new suggestion!
1706 RETURN QUERY EXECUTE $q$
1707 UPDATE search.symspell_dictionary
1708 SET $q$ || source_class || $q$_count = $2,
1709 $q$ || source_class || $q$_suggestions = $3
1710 WHERE prefix_key = $1
1712 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
1716 -- We keep the on-conflict clause just in case...
1717 RETURN QUERY EXECUTE $q$
1718 INSERT INTO search.symspell_dictionary AS d (
1719 $q$ || source_class || $q$_count,
1721 $q$ || source_class || $q$_suggestions
1722 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
1723 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
1724 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
1726 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
1728 ELSE -- key only, or no change
1729 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
1730 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
1731 RETURN QUERY EXECUTE $q$
1732 UPDATE search.symspell_dictionary
1733 SET $q$ || source_class || $q$_suggestions = $2
1734 WHERE prefix_key = $1
1736 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
1739 RETURN QUERY EXECUTE $q$
1740 INSERT INTO search.symspell_dictionary AS d (
1741 $q$ || source_class || $q$_count,
1743 $q$ || source_class || $q$_suggestions
1744 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
1745 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
1747 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
1752 $F$ LANGUAGE PLPGSQL;
1754 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
1757 new_value TEXT := NULL;
1758 old_value TEXT := NULL;
1760 search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
1762 IF TG_OP IN ('INSERT', 'UPDATE') THEN
1763 new_value := NEW.value;
1766 IF TG_OP IN ('DELETE', 'UPDATE') THEN
1767 old_value := OLD.value;
1770 PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
1772 RETURN NULL; -- always fired AFTER
1774 $f$ LANGUAGE PLPGSQL;
1776 CREATE TRIGGER maintain_symspell_entries_tgr
1777 AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
1778 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1780 CREATE TRIGGER maintain_symspell_entries_tgr
1781 AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
1782 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1784 CREATE TRIGGER maintain_symspell_entries_tgr
1785 AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
1786 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1788 CREATE TRIGGER maintain_symspell_entries_tgr
1789 AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
1790 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1792 CREATE TRIGGER maintain_symspell_entries_tgr
1793 AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
1794 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
1796 CREATE TRIGGER maintain_symspell_entries_tgr
1797 AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
1798 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();