3 SELECT evergreen.upgrade_deps_block_check('0676', :eg_version);
5 INSERT INTO config.global_flag (name, label, enabled, value) VALUES (
6 'opac.use_autosuggest',
7 'OPAC: Show auto-completing suggestions dialog under basic search box (put ''opac_visible'' into the value field to limit suggestions to OPAC-visible items, or blank the field for a possible performance improvement)',
12 CREATE TABLE metabib.browse_entry (
13 id BIGSERIAL PRIMARY KEY,
17 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIST (index_vector);
18 CREATE TRIGGER metabib_browse_entry_fti_trigger
19 BEFORE INSERT OR UPDATE ON metabib.browse_entry
20 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
23 CREATE TABLE metabib.browse_entry_def_map (
24 id BIGSERIAL PRIMARY KEY,
25 entry BIGINT REFERENCES metabib.browse_entry (id),
26 def INT REFERENCES config.metabib_field (id),
27 source BIGINT REFERENCES biblio.record_entry (id)
30 ALTER TABLE config.metabib_field ADD COLUMN browse_field BOOLEAN DEFAULT TRUE NOT NULL;
31 ALTER TABLE config.metabib_field ADD COLUMN browse_xpath TEXT;
33 ALTER TABLE config.metabib_class ADD COLUMN bouyant BOOLEAN DEFAULT FALSE NOT NULL;
34 ALTER TABLE config.metabib_class ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
35 ALTER TABLE config.metabib_field ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
37 -- one good exception to default true:
38 UPDATE config.metabib_field
39 SET browse_field = FALSE
40 WHERE (field_class = 'keyword' AND name = 'keyword') OR
41 (field_class = 'subject' AND name = 'complete');
43 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
44 -- We're only touching it here to add a DELETE statement to the IF NEW.deleted
47 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
52 xfrm config.xml_transform%ROWTYPE;
54 new_attrs HSTORE := ''::HSTORE;
55 attr_def config.record_attr_definition%ROWTYPE;
58 IF NEW.deleted IS TRUE THEN -- If this bib is deleted
59 DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id; -- Rid ourselves of the search-estimate-killing linkage
60 DELETE FROM metabib.record_attr WHERE id = NEW.id; -- Kill the attrs hash, useless on deleted records
61 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
62 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
63 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
64 RETURN NEW; -- and we're done
67 IF TG_OP = 'UPDATE' THEN -- re-ingest?
68 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
70 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
75 -- Record authority linking
76 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
78 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
81 -- Flatten and insert the mfr data
82 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
84 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
86 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
87 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
89 FOR attr_def IN SELECT * FROM config.record_attr_definition ORDER BY format LOOP
91 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
92 SELECT ARRAY_TO_STRING(ARRAY_ACCUM(value), COALESCE(attr_def.joiner,' ')) INTO attr_value
93 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
95 AND tag LIKE attr_def.tag
97 WHEN attr_def.sf_list IS NOT NULL
98 THEN POSITION(subfield IN attr_def.sf_list) > 0
105 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
106 attr_value := biblio.marc21_extract_fixed_field(NEW.id, attr_def.fixed_field);
108 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
110 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
112 -- See if we can skip the XSLT ... it's expensive
113 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
114 -- Can't skip the transform
115 IF xfrm.xslt <> '---' THEN
116 transformed_xml := oils_xslt_process(NEW.marc,xfrm.xslt);
118 transformed_xml := NEW.marc;
121 prev_xfrm := xfrm.name;
124 IF xfrm.name IS NULL THEN
125 -- just grab the marcxml (empty) transform
126 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
127 prev_xfrm := xfrm.name;
130 attr_value := oils_xpath_string(attr_def.xpath, transformed_xml, COALESCE(attr_def.joiner,' '), ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]);
132 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
133 SELECT m.value INTO attr_value
134 FROM biblio.marc21_physical_characteristics(NEW.id) v
135 JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
136 WHERE v.subfield = attr_def.phys_char_sf
137 LIMIT 1; -- Just in case ...
141 -- apply index normalizers to attr_value
143 SELECT n.func AS func,
144 n.param_count AS param_count,
146 FROM config.index_normalizer n
147 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
148 WHERE attr = attr_def.name
150 EXECUTE 'SELECT ' || normalizer.func || '(' ||
151 COALESCE( quote_literal( attr_value ), 'NULL' ) ||
153 WHEN normalizer.param_count > 0
154 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
161 -- Add the new value to the hstore
162 new_attrs := new_attrs || hstore( attr_def.name, attr_value );
166 IF TG_OP = 'INSERT' OR OLD.deleted THEN -- initial insert OR revivication
167 INSERT INTO metabib.record_attr (id, attrs) VALUES (NEW.id, new_attrs);
169 UPDATE metabib.record_attr SET attrs = new_attrs WHERE id = NEW.id;
175 -- Gather and insert the field entry data
176 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
179 IF TG_OP = 'INSERT' THEN
180 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
182 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
185 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
187 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
191 -- (re)map metarecord-bib linking
192 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
193 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
195 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
197 ELSE -- we're doing an update, and we're not deleted, remap
198 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
200 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
206 $func$ LANGUAGE PLPGSQL;
208 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
214 SELECT n.func AS func,
215 n.param_count AS param_count,
217 FROM config.index_normalizer n
218 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
219 WHERE m.field = mapped_field AND m.pos < 0
222 EXECUTE 'SELECT ' || normalizer.func || '(' ||
223 quote_literal( facet_text ) ||
225 WHEN normalizer.param_count > 0
226 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
238 DROP FUNCTION biblio.extract_metabib_field_entry(bigint, text);
239 DROP FUNCTION biblio.extract_metabib_field_entry(bigint);
241 DROP TYPE metabib.field_entry_template;
242 CREATE TYPE metabib.field_entry_template AS (
253 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
255 bib biblio.record_entry%ROWTYPE;
256 idx config.metabib_field%ROWTYPE;
257 xfrm config.xml_transform%ROWTYPE;
259 transformed_xml TEXT;
261 xml_node_list TEXT[];
266 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
267 output_row metabib.field_entry_template%ROWTYPE;
271 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
273 -- Loop over the indexing entries
274 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
276 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
278 -- See if we can skip the XSLT ... it's expensive
279 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
280 -- Can't skip the transform
281 IF xfrm.xslt <> '---' THEN
282 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
284 transformed_xml := bib.marc;
287 prev_xfrm := xfrm.name;
290 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
293 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
294 CONTINUE WHEN xml_node !~ E'^\\s*<';
296 curr_text := ARRAY_TO_STRING(
297 oils_xpath( '//text()',
298 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
299 REGEXP_REPLACE( -- This escapes embeded <s
301 $re$(>[^<]+)(<)([^>]+<)$re$,
313 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
315 IF raw_text IS NOT NULL THEN
316 raw_text := raw_text || joiner;
319 raw_text := COALESCE(raw_text,'') || curr_text;
321 -- autosuggest/metabib.browse_entry
322 IF idx.browse_field THEN
324 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
325 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
327 browse_text := curr_text;
330 output_row.field_class = idx.field_class;
331 output_row.field = idx.id;
332 output_row.source = rid;
333 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
335 output_row.browse_field = TRUE;
336 RETURN NEXT output_row;
337 output_row.browse_field = FALSE;
340 -- insert raw node text for faceting
341 IF idx.facet_field THEN
343 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
344 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
346 facet_text := curr_text;
349 output_row.field_class = idx.field_class;
350 output_row.field = -1 * idx.id;
351 output_row.source = rid;
352 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
354 output_row.facet_field = TRUE;
355 RETURN NEXT output_row;
356 output_row.facet_field = FALSE;
361 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
363 -- insert combined node text for searching
364 IF idx.search_field THEN
365 output_row.field_class = idx.field_class;
366 output_row.field = idx.id;
367 output_row.source = rid;
368 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
370 output_row.search_field = TRUE;
371 RETURN NEXT output_row;
377 $func$ LANGUAGE PLPGSQL;
379 -- default to a space joiner
380 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
381 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
385 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$
388 ind_data metabib.field_entry_template%ROWTYPE;
389 mbe_row metabib.browse_entry%ROWTYPE;
392 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
394 FOR fclass IN SELECT * FROM config.metabib_class LOOP
395 -- RAISE NOTICE 'Emptying out %', fclass.name;
396 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
398 DELETE FROM metabib.facet_entry WHERE source = bib_id;
399 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
402 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
403 IF ind_data.field < 0 THEN
404 ind_data.field = -1 * ind_data.field;
407 IF ind_data.facet_field THEN
408 INSERT INTO metabib.facet_entry (field, source, value)
409 VALUES (ind_data.field, ind_data.source, ind_data.value);
412 IF ind_data.browse_field THEN
413 SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
415 mbe_id := mbe_row.id;
417 INSERT INTO metabib.browse_entry (value) VALUES
418 (metabib.browse_normalize(ind_data.value, ind_data.field));
419 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
422 INSERT INTO metabib.browse_entry_def_map (entry, def, source)
423 VALUES (mbe_id, ind_data.field, ind_data.source);
426 IF ind_data.search_field THEN
428 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
430 quote_literal(ind_data.field) || $$, $$ ||
431 quote_literal(ind_data.source) || $$, $$ ||
432 quote_literal(ind_data.value) ||
440 $func$ LANGUAGE PLPGSQL;
442 -- This mimics a specific part of QueryParser, turning the first part of a
443 -- classed search (search_class) into a set of classes and possibly fields.
444 -- search_class might look like "author" or "title|proper" or "ti|uniform"
445 -- or "au" or "au|corporate|personal" or anything like that, where the first
446 -- element of the list you get by separating on the "|" character is either
447 -- a registered class (config.metabib_class) or an alias
448 -- (config.metabib_search_alias), and the rest of any such elements are
449 -- fields (config.metabib_field).
451 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
452 RETURNS SETOF RECORD AS $func$
456 search_part_count INTEGER;
458 registered_class config.metabib_class%ROWTYPE;
459 registered_alias config.metabib_search_alias%ROWTYPE;
460 registered_field config.metabib_field%ROWTYPE;
462 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
464 search_part_count := ARRAY_LENGTH(search_parts, 1);
465 IF search_part_count = 0 THEN
468 SELECT INTO registered_class
469 * FROM config.metabib_class WHERE name = search_parts[1];
471 IF search_part_count < 2 THEN -- all fields
472 rec := (registered_class.name, NULL::INTEGER);
476 FOR field_name IN SELECT *
477 FROM UNNEST(search_parts[2:search_part_count]) LOOP
478 SELECT INTO registered_field
479 * FROM config.metabib_field
480 WHERE name = field_name AND
481 field_class = registered_class.name;
483 rec := (registered_class.name, registered_field.id);
488 -- maybe we have an alias?
489 SELECT INTO registered_alias
490 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
494 IF search_part_count < 2 THEN -- return w/e the alias says
496 registered_alias.field_class, registered_alias.field
501 FOR field_name IN SELECT *
502 FROM UNNEST(search_parts[2:search_part_count]) LOOP
503 SELECT INTO registered_field
504 * FROM config.metabib_field
505 WHERE name = field_name AND
506 field_class = registered_alias.field_class;
509 registered_alias.field_class,
520 $func$ LANGUAGE PLPGSQL;
524 FUNCTION metabib.suggest_browse_entries(
525 query_text TEXT, -- 'foo' or 'foo & ba:*',ready for to_tsquery()
526 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
527 headline_opts TEXT, -- markup options for ts_headline()
528 visibility_org INTEGER,-- null if you don't want opac visibility test
529 query_limit INTEGER,-- use in LIMIT clause of interal query
530 normalization INTEGER -- argument to TS_RANK_CD()
534 bouyant_and_class_match BOOL,
536 field_weight INTEGER,
539 match TEXT -- marked up
543 opac_visibility_join TEXT;
544 search_class_join TEXT;
547 query := TO_TSQUERY('keyword', query_text);
549 IF visibility_org IS NOT NULL THEN
550 opac_visibility_join := '
551 JOIN asset.opac_visible_copies aovc ON (
552 aovc.record = mbedm.source AND
553 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
556 opac_visibility_join := '';
559 -- The following determines whether we only provide suggestsons matching
560 -- the user's selected search_class, or whether we show other suggestions
561 -- too. The reason for MIN() is that for search_classes like
562 -- 'title|proper|uniform' you would otherwise get multiple rows. The
563 -- implication is that if title as a class doesn't have restrict,
564 -- nor does the proper field, but the uniform field does, you're going
565 -- to get 'false' for your overall evaluation of 'should we restrict?'
566 -- To invert that, change from MIN() to MAX().
570 MIN(cmc.restrict::INT) AS restrict_class,
571 MIN(cmf.restrict::INT) AS restrict_field
572 FROM metabib.search_class_to_registered_components(search_class)
573 AS _registered (field_class TEXT, field INT)
575 config.metabib_class cmc ON (cmc.name = _registered.field_class)
577 config.metabib_field cmf ON (cmf.id = _registered.field);
579 -- evaluate 'should we restrict?'
580 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
581 search_class_join := '
583 metabib.search_class_to_registered_components($2)
584 AS _registered (field_class TEXT, field INT) ON (
585 (_registered.field IS NULL AND
586 _registered.field_class = cmf.field_class) OR
587 (_registered.field = cmf.id)
591 search_class_join := '
593 metabib.search_class_to_registered_components($2)
594 AS _registered (field_class TEXT, field INT) ON (
595 _registered.field_class = cmc.name
600 RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT
603 cmc.bouyant AND _registered.field_class IS NOT NULL,
604 _registered.field = cmf.id,
606 TS_RANK_CD(mbe.index_vector, $1, $6),
608 FROM metabib.browse_entry_def_map mbedm
609 JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
610 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
611 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
612 ' || search_class_join || opac_visibility_join ||
613 ' WHERE $1 @@ mbe.index_vector
614 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
616 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
617 ' -- sic, repeat the order by clause in the outer select too
619 query, search_class, headline_opts,
620 visibility_org, query_limit, normalization
624 -- bouyant AND chosen class = match class
625 -- chosen field = match field
632 $func$ LANGUAGE PLPGSQL;
634 -- The advantage of this over the stock regexp_split_to_array() is that it
635 -- won't degrade unicode strings.
636 CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT)
638 return encode_array_literal([split $_[1], $_[0]]);
639 $$ LANGUAGE PLPERLU STRICT IMMUTABLE;
642 -- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize
643 CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
651 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
653 SELECT n.func AS func,
654 n.param_count AS param_count,
656 FROM config.index_normalizer n
657 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
658 WHERE field = NEW.field AND m.pos < 0
660 EXECUTE 'SELECT ' || normalizer.func || '(' ||
661 quote_literal( value ) ||
663 WHEN normalizer.param_count > 0
664 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
674 IF NEW.index_vector = ''::tsvector THEN
678 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
680 SELECT n.func AS func,
681 n.param_count AS param_count,
683 FROM config.index_normalizer n
684 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
685 WHERE field = NEW.field AND m.pos >= 0
687 EXECUTE 'SELECT ' || normalizer.func || '(' ||
688 quote_literal( value ) ||
690 WHEN normalizer.param_count > 0
691 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
699 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
700 value := ARRAY_TO_STRING(
701 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
705 NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);