2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.browse_entry (
187 id BIGSERIAL PRIMARY KEY,
189 index_vector tsvector,
190 sort_value TEXT NOT NULL,
191 UNIQUE(sort_value, value)
195 CREATE INDEX browse_entry_sort_value_idx
196 ON metabib.browse_entry USING BTREE (sort_value);
198 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
199 CREATE TRIGGER metabib_browse_entry_fti_trigger
200 BEFORE INSERT OR UPDATE ON metabib.browse_entry
201 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
204 CREATE TABLE metabib.browse_entry_def_map (
205 id BIGSERIAL PRIMARY KEY,
206 entry BIGINT REFERENCES metabib.browse_entry (id),
207 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
208 source BIGINT REFERENCES biblio.record_entry (id),
209 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
211 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
212 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
213 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
215 CREATE TABLE metabib.browse_entry_simple_heading_map (
216 id BIGSERIAL PRIMARY KEY,
217 entry BIGINT REFERENCES metabib.browse_entry (id),
218 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
220 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
221 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
223 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
228 facet_text := NEW.value;
231 SELECT n.func AS func,
232 n.param_count AS param_count,
234 FROM config.index_normalizer n
235 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
236 WHERE m.field = NEW.field AND m.pos < 0
239 EXECUTE 'SELECT ' || normalizer.func || '(' ||
240 quote_literal( facet_text ) ||
242 WHEN normalizer.param_count > 0
243 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
250 NEW.value = facet_text;
256 CREATE TRIGGER facet_normalize_tgr
257 BEFORE UPDATE OR INSERT ON metabib.facet_entry
258 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
260 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
262 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
267 CREATE TRIGGER facet_force_nfc_tgr
268 BEFORE UPDATE OR INSERT ON metabib.facet_entry
269 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
271 -- DECREMENTING serial starts at -1
272 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
274 CREATE TABLE metabib.uncontrolled_record_attr_value (
275 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
276 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
279 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
281 CREATE VIEW metabib.record_attr_id_map AS
282 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
284 SELECT c.id, c.ctype AS attr, c.code AS value
285 FROM config.coded_value_map c
286 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
288 CREATE VIEW metabib.composite_attr_id_map AS
289 SELECT c.id, c.ctype AS attr, c.code AS value
290 FROM config.coded_value_map c
291 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
293 CREATE VIEW metabib.full_attr_id_map AS
294 SELECT id, attr, value FROM metabib.record_attr_id_map
296 SELECT id, attr, value FROM metabib.composite_attr_id_map;
299 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
300 $_SHARED{metabib_compile_composite_attr_cache} = {}
301 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
302 return exists $_SHARED{metabib_compile_composite_attr_cache};
303 $f$ LANGUAGE PLPERLU;
305 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
306 delete $_SHARED{metabib_compile_composite_attr_cache};
307 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
308 $f$ LANGUAGE PLPERLU;
310 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
311 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
314 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
316 PERFORM metabib.compile_composite_attr_cache_invalidate();
319 $f$ LANGUAGE PLPGSQL;
321 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
323 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
328 my $def = decode_json($json);
330 die("Composite attribute definition not supplied") unless $def;
332 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
334 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
335 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
342 if (ref $d eq 'HASH') { # node or AND
343 if (exists $d->{_attr}) { # it is a node
344 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
345 my $id = spi_exec_prepared(
346 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
350 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
351 return '!' . recurse($$d{_not});
352 } else { # an AND list
353 @list = map { recurse($$d{$_}) } sort keys %$d;
355 } elsif (ref $d eq 'ARRAY') {
357 @list = map { recurse($_) } @$d;
360 @list = grep { defined && $_ ne '' } @list;
362 return '(' . join($j,@list) . ')' if @list;
366 my $val = recurse($def) || undef;
367 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
370 $func$ IMMUTABLE LANGUAGE plperlu;
372 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
373 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
374 $func$ STRICT IMMUTABLE LANGUAGE SQL;
376 CREATE TABLE metabib.record_attr_vector_list (
377 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
378 vlist INT[] NOT NULL -- stores id from ccvm AND murav
380 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
382 /* This becomes a view, and we do sorters differently ...
383 CREATE TABLE metabib.record_attr (
384 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
385 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
387 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
388 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
389 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
393 CREATE TABLE metabib.record_sorter (
394 id BIGSERIAL PRIMARY KEY,
395 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
396 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
399 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
400 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
401 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
404 CREATE TYPE metabib.record_attr_type AS (
409 -- Back-compat view ... we're moving to an INTARRAY world
410 CREATE VIEW metabib.record_attr_flat AS
411 SELECT v.source AS id,
414 FROM metabib.record_attr_vector_list v
415 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
417 SELECT v.source AS id,
420 FROM metabib.record_attr_vector_list v
421 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
423 CREATE VIEW metabib.record_attr AS
424 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
425 FROM metabib.record_attr_flat
426 WHERE attr IS NOT NULL
429 -- Back-back-compat view ... we use to live in an HSTORE world
430 CREATE TYPE metabib.rec_desc_type AS (
448 CREATE VIEW metabib.rec_descriptor AS
451 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
452 FROM metabib.record_attr;
454 -- Use a sequence that matches previous version, for easier upgrading.
455 CREATE SEQUENCE metabib.full_rec_id_seq;
457 CREATE TABLE metabib.real_full_rec (
458 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
459 record BIGINT NOT NULL,
460 tag CHAR(3) NOT NULL,
465 index_vector tsvector NOT NULL
467 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
469 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
470 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
471 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
472 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
473 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
474 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
475 CREATE INDEX metabib_full_rec_isxn_caseless_idx
476 ON metabib.real_full_rec (LOWER(value))
477 WHERE tag IN ('020', '022', '024');
478 -- This next index might fully supplant the one above, but leaving both for now.
479 -- (they are not too large)
480 -- The reason we need this index is to ensure that the query parser always
481 -- prefers this index over the simpler tag/subfield index, as this greatly
482 -- increases Vandelay overlay speed for these identifiers, especially when
483 -- a record has many of these fields (around > 4-6 seems like the cutoff
484 -- on at least one PG9.1 system)
485 -- A similar index could be added for other fields (e.g. 010), but one should
486 -- leave out the LOWER() in all other cases.
487 -- TODO: verify whether we can discard the non tag/subfield/substring version
488 -- above (metabib_full_rec_isxn_caseless_idx)
489 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
490 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
491 WHERE tag IN ('020', '022', '024');
494 CREATE TRIGGER metabib_full_rec_fti_trigger
495 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
496 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
498 CREATE OR REPLACE VIEW metabib.full_rec AS
505 SUBSTRING(value,1,1024) AS value,
507 FROM metabib.real_full_rec;
509 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
510 AS ON INSERT TO metabib.full_rec
512 INSERT INTO metabib.real_full_rec VALUES (
513 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
523 CREATE OR REPLACE RULE metabib_full_rec_update_rule
524 AS ON UPDATE TO metabib.full_rec
526 UPDATE metabib.real_full_rec SET
532 subfield = NEW.subfield,
534 index_vector = NEW.index_vector
537 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
538 AS ON DELETE TO metabib.full_rec
540 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
542 CREATE TABLE metabib.metarecord_source_map (
543 id BIGSERIAL PRIMARY KEY,
544 metarecord BIGINT NOT NULL,
545 source BIGINT NOT NULL
547 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
548 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
550 CREATE TYPE metabib.field_entry_template AS (
563 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
565 bib biblio.record_entry%ROWTYPE;
566 idx config.metabib_field%ROWTYPE;
567 xfrm config.xml_transform%ROWTYPE;
569 transformed_xml TEXT;
571 xml_node_list TEXT[];
577 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
579 authority_link BIGINT;
580 output_row metabib.field_entry_template%ROWTYPE;
583 -- Start out with no field-use bools set
584 output_row.browse_field = FALSE;
585 output_row.facet_field = FALSE;
586 output_row.search_field = FALSE;
589 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
591 -- Loop over the indexing entries
592 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
594 joiner := COALESCE(idx.joiner, default_joiner);
596 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
598 -- See if we can skip the XSLT ... it's expensive
599 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
600 -- Can't skip the transform
601 IF xfrm.xslt <> '---' THEN
602 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
604 transformed_xml := bib.marc;
607 prev_xfrm := xfrm.name;
610 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
613 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
614 CONTINUE WHEN xml_node !~ E'^\\s*<';
616 -- XXX much of this should be moved into oils_xpath_string...
617 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
618 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
619 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
620 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
624 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
626 IF raw_text IS NOT NULL THEN
627 raw_text := raw_text || joiner;
630 raw_text := COALESCE(raw_text,'') || curr_text;
632 -- autosuggest/metabib.browse_entry
633 IF idx.browse_field THEN
635 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
636 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
638 browse_text := curr_text;
641 IF idx.browse_sort_xpath IS NOT NULL AND
642 idx.browse_sort_xpath <> '' THEN
644 sort_value := oils_xpath_string(
645 idx.browse_sort_xpath, xml_node, joiner,
646 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
649 sort_value := browse_text;
652 output_row.field_class = idx.field_class;
653 output_row.field = idx.id;
654 output_row.source = rid;
655 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
656 output_row.sort_value :=
657 public.naco_normalize(sort_value);
659 output_row.authority := NULL;
661 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
662 authority_text := oils_xpath_string(
663 idx.authority_xpath, xml_node, joiner,
665 ARRAY[xfrm.prefix, xfrm.namespace_uri],
666 ARRAY['xlink','http://www.w3.org/1999/xlink']
670 IF authority_text ~ '^\d+$' THEN
671 authority_link := authority_text::BIGINT;
672 PERFORM * FROM authority.record_entry WHERE id = authority_link;
674 output_row.authority := authority_link;
680 output_row.browse_field = TRUE;
681 -- Returning browse rows with search_field = true for search+browse
682 -- configs allows us to retain granularity of being able to search
683 -- browse fields with "starts with" type operators (for example, for
684 -- titles of songs in music albums)
685 IF idx.search_field THEN
686 output_row.search_field = TRUE;
688 RETURN NEXT output_row;
689 output_row.browse_field = FALSE;
690 output_row.search_field = FALSE;
691 output_row.sort_value := NULL;
694 -- insert raw node text for faceting
695 IF idx.facet_field THEN
697 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
698 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
700 facet_text := curr_text;
703 output_row.field_class = idx.field_class;
704 output_row.field = -1 * idx.id;
705 output_row.source = rid;
706 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
708 output_row.facet_field = TRUE;
709 RETURN NEXT output_row;
710 output_row.facet_field = FALSE;
715 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
717 -- insert combined node text for searching
718 IF idx.search_field THEN
719 output_row.field_class = idx.field_class;
720 output_row.field = idx.id;
721 output_row.source = rid;
722 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
724 output_row.search_field = TRUE;
725 RETURN NEXT output_row;
726 output_row.search_field = FALSE;
733 $func$ LANGUAGE PLPGSQL;
735 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
737 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
738 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
739 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
740 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
741 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
742 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
743 FROM metabib.keyword_field_entry WHERE source = bib_id;
745 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
746 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
747 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
748 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
749 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
750 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
751 FROM metabib.title_field_entry WHERE source = bib_id;
753 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
754 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
755 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
756 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
757 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
758 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
759 FROM metabib.author_field_entry WHERE source = bib_id;
761 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
762 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
763 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
764 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
765 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
766 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
767 FROM metabib.subject_field_entry WHERE source = bib_id;
769 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
770 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
771 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
772 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
773 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
774 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
775 FROM metabib.series_field_entry WHERE source = bib_id;
777 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
778 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
779 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
780 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
781 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
782 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
783 FROM metabib.identifier_field_entry WHERE source = bib_id;
786 $func$ LANGUAGE PLPGSQL;
788 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
791 ind_data metabib.field_entry_template%ROWTYPE;
792 mbe_row metabib.browse_entry%ROWTYPE;
800 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
801 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
802 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
804 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
806 IF NOT b_skip_search THEN
807 FOR fclass IN SELECT * FROM config.metabib_class LOOP
808 -- RAISE NOTICE 'Emptying out %', fclass.name;
809 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
812 IF NOT b_skip_facet THEN
813 DELETE FROM metabib.facet_entry WHERE source = bib_id;
815 IF NOT b_skip_browse THEN
816 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
820 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
822 -- don't store what has been normalized away
823 CONTINUE WHEN ind_data.value IS NULL;
825 IF ind_data.field < 0 THEN
826 ind_data.field = -1 * ind_data.field;
829 IF ind_data.facet_field AND NOT b_skip_facet THEN
830 INSERT INTO metabib.facet_entry (field, source, value)
831 VALUES (ind_data.field, ind_data.source, ind_data.value);
834 IF ind_data.browse_field AND NOT b_skip_browse THEN
835 -- A caveat about this SELECT: this should take care of replacing
836 -- old mbe rows when data changes, but not if normalization (by
837 -- which I mean specifically the output of
838 -- evergreen.oils_tsearch2()) changes. It may or may not be
839 -- expensive to add a comparison of index_vector to index_vector
840 -- to the WHERE clause below.
842 CONTINUE WHEN ind_data.sort_value IS NULL;
844 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
845 SELECT INTO mbe_row * FROM metabib.browse_entry
846 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
849 mbe_id := mbe_row.id;
851 INSERT INTO metabib.browse_entry
852 ( value, sort_value ) VALUES
853 ( value_prepped, ind_data.sort_value );
855 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
858 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
859 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
862 IF ind_data.search_field AND NOT b_skip_search THEN
863 -- Avoid inserting duplicate rows
864 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
865 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
866 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
867 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
868 IF mbe_id IS NULL THEN
870 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
872 quote_literal(ind_data.field) || $$, $$ ||
873 quote_literal(ind_data.source) || $$, $$ ||
874 quote_literal(ind_data.value) ||
881 IF NOT b_skip_search THEN
882 PERFORM metabib.update_combined_index_vectors(bib_id);
887 $func$ LANGUAGE PLPGSQL;
889 -- default to a space joiner
890 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
891 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
894 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
896 auth authority.record_entry%ROWTYPE;
897 output authority.full_rec%ROWTYPE;
900 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
902 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
903 output.record := rid;
904 output.ind1 := field.ind1;
905 output.ind2 := field.ind2;
906 output.tag := field.tag;
907 output.subfield := field.subfield;
908 output.value := field.value;
913 $func$ LANGUAGE PLPGSQL;
915 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
917 bib biblio.record_entry%ROWTYPE;
918 output metabib.full_rec%ROWTYPE;
921 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
923 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
924 output.record := rid;
925 output.ind1 := field.ind1;
926 output.ind2 := field.ind2;
927 output.tag := field.tag;
928 output.subfield := field.subfield;
929 output.value := field.value;
934 $func$ LANGUAGE PLPGSQL;
936 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
937 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
940 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
941 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
944 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
945 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
948 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
949 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
952 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
965 IF marc IS NULL OR marc = '' THEN
969 -- First, the count of tags
970 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
972 -- now go through a bunch of pain to get the record type
973 IF best_type IS NOT NULL THEN
974 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
976 IF ldr IS NOT NULL THEN
977 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
978 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
981 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
982 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
984 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
986 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
988 IF type_map.code IS NOT NULL THEN
989 IF best_type = type_map.code THEN
990 qual := qual + qual / 2;
993 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
994 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
995 IF ff_tag_data = best_lang THEN
1003 -- Now look for some quality metrics
1005 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1010 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1017 $func$ LANGUAGE PLPGSQL;
1019 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1021 idx config.biblio_fingerprint%ROWTYPE;
1022 xfrm config.xml_transform%ROWTYPE;
1024 transformed_xml TEXT;
1026 xml_node_list TEXT[];
1028 output_text TEXT := '';
1031 IF marc IS NULL OR marc = '' THEN
1035 -- Loop over the indexing entries
1036 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1038 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1040 -- See if we can skip the XSLT ... it's expensive
1041 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1042 -- Can't skip the transform
1043 IF xfrm.xslt <> '---' THEN
1044 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1046 transformed_xml := marc;
1049 prev_xfrm := xfrm.name;
1052 raw_text := COALESCE(
1060 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1069 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1070 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1072 IF idx.first_word IS TRUE THEN
1073 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1076 output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
1083 $func$ LANGUAGE PLPGSQL;
1085 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1086 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1089 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1091 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1095 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1096 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1101 $func$ LANGUAGE PLPGSQL;
1103 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1105 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1107 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1109 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1110 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1114 $func$ LANGUAGE PLPGSQL;
1116 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1123 uri_owner_list TEXT[];
1131 -- Clear any URI mappings and call numbers for this bib.
1132 -- This leads to acn / auricnm inflation, but also enables
1133 -- old acn/auricnm's to go away and for bibs to be deleted.
1134 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1135 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1136 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1139 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1140 IF ARRAY_UPPER(uris,1) > 0 THEN
1141 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1142 -- First we pull info out of the 856
1145 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1146 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1147 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1149 IF uri_label IS NULL THEN
1150 uri_label := uri_href;
1152 CONTINUE WHEN uri_href IS NULL;
1154 -- Get the distinct list of libraries wanting to use
1156 DISTINCT REGEXP_REPLACE(
1158 $re$^.*?\((\w+)\).*$$re$,
1161 ) INTO uri_owner_list
1164 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1169 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1171 -- look for a matching uri
1172 IF uri_use IS NULL THEN
1173 SELECT id INTO uri_id
1175 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1176 ORDER BY id LIMIT 1;
1177 IF NOT FOUND THEN -- create one
1178 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1179 SELECT id INTO uri_id
1181 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1184 SELECT id INTO uri_id
1186 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1187 ORDER BY id LIMIT 1;
1188 IF NOT FOUND THEN -- create one
1189 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1190 SELECT id INTO uri_id
1192 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1196 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1197 uri_owner := uri_owner_list[j];
1199 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1200 CONTINUE WHEN NOT FOUND;
1202 -- we need a call number to link through
1203 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1205 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1206 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1207 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1210 -- now, link them if they're not already
1211 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1213 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1225 $func$ LANGUAGE PLPGSQL;
1227 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1229 new_mapping BOOL := TRUE;
1232 tmp_mr metabib.metarecord%ROWTYPE;
1233 deleted_mrs BIGINT[];
1236 -- We need to make sure we're not a deleted master record of an MR
1237 IF bib_is_deleted THEN
1238 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1240 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1241 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1244 -- Now, are there any more sources on this MR?
1245 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1247 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1248 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1249 DELETE FROM metabib.metarecord WHERE id = old_mr;
1251 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1252 UPDATE metabib.metarecord
1254 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1259 ELSE -- insert or update
1261 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1263 -- Find the first fingerprint-matching
1264 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1265 old_mr := tmp_mr.id;
1266 new_mapping := FALSE;
1268 ELSE -- Our fingerprint changed ... maybe remove the old MR
1269 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1270 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1271 IF source_count = 0 THEN -- No other records
1272 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1273 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1279 -- we found no suitable, preexisting MR based on old source maps
1280 IF old_mr IS NULL THEN
1281 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1283 IF old_mr IS NULL THEN -- nope, create one and grab its id
1284 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1285 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1287 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1288 UPDATE metabib.metarecord
1290 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1294 ELSE -- there was one we already attached to, update its mods cache and master_record
1295 UPDATE metabib.metarecord
1297 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1302 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1307 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1308 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1314 $func$ LANGUAGE PLPGSQL;
1317 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1318 DELETE FROM authority.bib_linking WHERE bib = $1;
1319 INSERT INTO authority.bib_linking (bib, authority)
1322 FROM ( SELECT DISTINCT $1 AS bib,
1323 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1324 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1325 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1326 ) y JOIN authority.record_entry r ON r.id = y.authority;
1328 $func$ LANGUAGE SQL;
1330 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1332 transformed_xml TEXT;
1333 rmarc TEXT := prmarc;
1337 xfrm config.xml_transform%ROWTYPE;
1338 attr_vector INT[] := '{}'::INT[];
1339 attr_vector_tmp INT[];
1340 attr_list TEXT[] := pattr_list;
1342 norm_attr_value TEXT[];
1344 attr_def config.record_attr_definition%ROWTYPE;
1345 ccvm_row config.coded_value_map%ROWTYPE;
1348 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1349 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1352 fixed_field IS NOT NULL OR
1353 xpath IS NOT NULL OR
1354 phys_char_sf IS NOT NULL OR
1361 IF rmarc IS NULL THEN
1362 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1365 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1367 attr_value := '{}'::TEXT[];
1368 norm_attr_value := '{}'::TEXT[];
1369 attr_vector_tmp := '{}'::INT[];
1371 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1373 -- tag+sf attrs only support SVF
1374 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1375 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1376 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1378 AND tag LIKE attr_def.tag
1380 WHEN attr_def.sf_list IS NOT NULL
1381 THEN POSITION(subfield IN attr_def.sf_list) > 0
1388 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1389 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1391 IF NOT attr_def.multi THEN
1392 attr_value := ARRAY[attr_value[1]];
1395 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1397 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1399 -- See if we can skip the XSLT ... it's expensive
1400 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1401 -- Can't skip the transform
1402 IF xfrm.xslt <> '---' THEN
1403 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1405 transformed_xml := rmarc;
1408 prev_xfrm := xfrm.name;
1411 IF xfrm.name IS NULL THEN
1412 -- just grab the marcxml (empty) transform
1413 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1414 prev_xfrm := xfrm.name;
1417 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1418 tmp_val := oils_xpath_string(
1421 COALESCE(attr_def.joiner,' '),
1422 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1424 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1425 attr_value := attr_value || tmp_val;
1426 EXIT WHEN NOT attr_def.multi;
1430 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1431 SELECT ARRAY_AGG(m.value) INTO attr_value
1432 FROM vandelay.marc21_physical_characteristics(rmarc) v
1433 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1434 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1435 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1437 IF NOT attr_def.multi THEN
1438 attr_value := ARRAY[attr_value[1]];
1443 -- apply index normalizers to attr_value
1444 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1446 SELECT n.func AS func,
1447 n.param_count AS param_count,
1449 FROM config.index_normalizer n
1450 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1451 WHERE attr = attr_def.name
1453 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1454 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1456 WHEN normalizer.param_count > 0
1457 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1463 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1464 -- note that a string that contains only blanks
1465 -- is a valid value for some attributes
1466 norm_attr_value := norm_attr_value || tmp_val;
1470 IF attr_def.filter THEN
1471 -- Create unknown uncontrolled values and find the IDs of the values
1472 IF ccvm_row.id IS NULL THEN
1473 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1474 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1475 BEGIN -- use subtransaction to isolate unique constraint violations
1476 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1477 EXCEPTION WHEN unique_violation THEN END;
1481 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1483 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1486 -- Add the new value to the vector
1487 attr_vector := attr_vector || attr_vector_tmp;
1490 IF attr_def.sorter THEN
1491 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1492 IF norm_attr_value[1] IS NOT NULL THEN
1493 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1499 /* We may need to rewrite the vlist to contain
1500 the intersection of new values for requested
1501 attrs and old values for ignored attrs. To
1502 do this, we take the old attr vlist and
1503 subtract any values that are valid for the
1504 requested attrs, and then add back the new
1505 set of attr values. */
1507 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1508 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1509 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1510 attr_vector := attr_vector || attr_vector_tmp;
1513 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1514 -- attributes can depend on earlier ones.
1515 PERFORM metabib.compile_composite_attr_cache_init();
1516 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1518 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1520 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1521 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1523 IF attr_def.filter THEN
1524 IF attr_vector @@ tmp_val::query_int THEN
1525 attr_vector = attr_vector + intset(ccvm_row.id);
1526 EXIT WHEN NOT attr_def.multi;
1530 IF attr_def.sorter THEN
1531 IF attr_vector @@ tmp_val THEN
1532 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1533 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1541 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1542 IF rdeleted THEN -- initial insert OR revivication
1543 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1544 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1546 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1552 $func$ LANGUAGE PLPGSQL;
1555 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1556 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1561 IF NEW.deleted THEN -- If this bib is deleted
1563 PERFORM * FROM config.internal_flag WHERE
1564 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1566 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1568 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1570 IF NOT tmp_bool THEN
1571 -- One needs to keep these around to support searches
1572 -- with the #deleted modifier, so one should turn on the named
1573 -- internal flag for that functionality.
1574 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1577 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1578 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1579 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1580 RETURN NEW; -- and we're done
1583 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1584 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1586 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1591 -- Record authority linking
1592 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1594 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1597 -- Flatten and insert the mfr data
1598 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1600 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1602 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1603 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1605 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1609 -- Gather and insert the field entry data
1610 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1612 -- Located URI magic
1613 IF TG_OP = 'INSERT' THEN
1614 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1616 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1619 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1621 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1625 -- (re)map metarecord-bib linking
1626 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1627 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1629 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1631 ELSE -- we're doing an update, and we're not deleted, remap
1632 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1634 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1640 $func$ LANGUAGE PLPGSQL;
1642 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1648 SELECT n.func AS func,
1649 n.param_count AS param_count,
1651 FROM config.index_normalizer n
1652 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1653 WHERE m.field = mapped_field AND m.pos < 0
1656 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1657 quote_literal( facet_text ) ||
1659 WHEN normalizer.param_count > 0
1660 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1663 ')' INTO facet_text;
1670 $$ LANGUAGE PLPGSQL;
1673 -- This mimics a specific part of QueryParser, turning the first part of a
1674 -- classed search (search_class) into a set of classes and possibly fields.
1675 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1676 -- or "au" or "au|corporate|personal" or anything like that, where the first
1677 -- element of the list you get by separating on the "|" character is either
1678 -- a registered class (config.metabib_class) or an alias
1679 -- (config.metabib_search_alias), and the rest of any such elements are
1680 -- fields (config.metabib_field).
1682 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1683 RETURNS SETOF RECORD AS $func$
1685 search_parts TEXT[];
1687 search_part_count INTEGER;
1689 registered_class config.metabib_class%ROWTYPE;
1690 registered_alias config.metabib_search_alias%ROWTYPE;
1691 registered_field config.metabib_field%ROWTYPE;
1693 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1695 search_part_count := ARRAY_LENGTH(search_parts, 1);
1696 IF search_part_count = 0 THEN
1699 SELECT INTO registered_class
1700 * FROM config.metabib_class WHERE name = search_parts[1];
1702 IF search_part_count < 2 THEN -- all fields
1703 rec := (registered_class.name, NULL::INTEGER);
1707 FOR field_name IN SELECT *
1708 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1709 SELECT INTO registered_field
1710 * FROM config.metabib_field
1711 WHERE name = field_name AND
1712 field_class = registered_class.name;
1714 rec := (registered_class.name, registered_field.id);
1719 -- maybe we have an alias?
1720 SELECT INTO registered_alias
1721 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1725 IF search_part_count < 2 THEN -- return w/e the alias says
1727 registered_alias.field_class, registered_alias.field
1732 FOR field_name IN SELECT *
1733 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1734 SELECT INTO registered_field
1735 * FROM config.metabib_field
1736 WHERE name = field_name AND
1737 field_class = registered_alias.field_class;
1740 registered_alias.field_class,
1751 $func$ LANGUAGE PLPGSQL ROWS 1;
1754 -- Given a string such as a user might type into a search box, prepare
1755 -- two changed variants for TO_TSQUERY(). See
1756 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1757 -- The first variant is normalized to match indexed documents regardless
1758 -- of diacritics. The second variant keeps its diacritics for proper
1759 -- highlighting via TS_HEADLINE().
1761 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1764 orig_ended_in_space BOOLEAN;
1769 orig_ended_in_space := orig ~ E'\\s$';
1771 orig := ARRAY_TO_STRING(
1772 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1775 normalized := public.naco_normalize(orig); -- also trim()s
1776 plain := trim(orig);
1778 IF NOT orig_ended_in_space THEN
1779 plain := plain || ':*';
1780 normalized := normalized || ':*';
1783 plain := ARRAY_TO_STRING(
1784 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1786 normalized := ARRAY_TO_STRING(
1787 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1790 RETURN ARRAY[normalized, plain];
1792 $$ LANGUAGE PLPGSQL;
1796 FUNCTION metabib.suggest_browse_entries(
1797 raw_query_text TEXT, -- actually typed by humans at the UI level
1798 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
1799 headline_opts TEXT, -- markup options for ts_headline()
1800 visibility_org INTEGER,-- null if you don't want opac visibility test
1801 query_limit INTEGER,-- use in LIMIT clause of interal query
1802 normalization INTEGER -- argument to TS_RANK_CD()
1804 value TEXT, -- plain
1806 buoyant_and_class_match BOOL,
1808 field_weight INTEGER,
1811 match TEXT -- marked up
1814 prepared_query_texts TEXT[];
1816 plain_query TSQUERY;
1817 opac_visibility_join TEXT;
1818 search_class_join TEXT;
1821 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
1823 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
1824 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
1826 visibility_org := NULLIF(visibility_org,-1);
1827 IF visibility_org IS NOT NULL THEN
1828 opac_visibility_join := '
1829 JOIN asset.opac_visible_copies aovc ON (
1830 aovc.record = x.source AND
1831 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
1834 opac_visibility_join := '';
1837 -- The following determines whether we only provide suggestsons matching
1838 -- the user's selected search_class, or whether we show other suggestions
1839 -- too. The reason for MIN() is that for search_classes like
1840 -- 'title|proper|uniform' you would otherwise get multiple rows. The
1841 -- implication is that if title as a class doesn't have restrict,
1842 -- nor does the proper field, but the uniform field does, you're going
1843 -- to get 'false' for your overall evaluation of 'should we restrict?'
1844 -- To invert that, change from MIN() to MAX().
1848 MIN(cmc.restrict::INT) AS restrict_class,
1849 MIN(cmf.restrict::INT) AS restrict_field
1850 FROM metabib.search_class_to_registered_components(search_class)
1851 AS _registered (field_class TEXT, field INT)
1853 config.metabib_class cmc ON (cmc.name = _registered.field_class)
1855 config.metabib_field cmf ON (cmf.id = _registered.field);
1857 -- evaluate 'should we restrict?'
1858 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
1859 search_class_join := '
1861 metabib.search_class_to_registered_components($2)
1862 AS _registered (field_class TEXT, field INT) ON (
1863 (_registered.field IS NULL AND
1864 _registered.field_class = cmf.field_class) OR
1865 (_registered.field = cmf.id)
1869 search_class_join := '
1871 metabib.search_class_to_registered_components($2)
1872 AS _registered (field_class TEXT, field INT) ON (
1873 _registered.field_class = cmc.name
1878 RETURN QUERY EXECUTE '
1887 TS_HEADLINE(value, $7, $3)
1888 FROM (SELECT DISTINCT
1891 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
1892 _registered.field = cmf.id AS restrict,
1894 TS_RANK_CD(mbe.index_vector, $1, $6),
1897 FROM metabib.browse_entry_def_map mbedm
1898 JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
1899 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
1900 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
1901 ' || search_class_join || '
1902 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1904 ' || opac_visibility_join || '
1905 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1907 ' -- sic, repeat the order by clause in the outer select too
1909 query, search_class, headline_opts,
1910 visibility_org, query_limit, normalization, plain_query
1914 -- buoyant AND chosen class = match class
1915 -- chosen field = match field
1922 $func$ LANGUAGE PLPGSQL;
1924 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1928 temp_vector TEXT := '';
1934 NEW.index_vector = ''::tsvector;
1936 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1938 SELECT n.func AS func,
1939 n.param_count AS param_count,
1941 FROM config.index_normalizer n
1942 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1943 WHERE field = NEW.field AND m.pos < 0
1945 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1946 quote_literal( value ) ||
1948 WHEN normalizer.param_count > 0
1949 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1959 SELECT n.func AS func,
1960 n.param_count AS param_count,
1962 FROM config.index_normalizer n
1963 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1964 WHERE field = NEW.field AND m.pos >= 0
1966 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1967 quote_literal( value ) ||
1969 WHEN normalizer.param_count > 0
1970 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1978 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
1980 value := ARRAY_TO_STRING(
1981 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
1983 value := public.search_normalize(value);
1984 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
1986 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1989 SELECT DISTINCT m.ts_config, m.index_weight
1990 FROM config.metabib_class_ts_map m
1991 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1992 LEFT JOIN config.coded_value_map ccvm ON (
1993 ccvm.ctype IN ('item_lang', 'language') AND
1994 ccvm.code = m.index_lang AND
1995 r.vlist @> intset(ccvm.id)
1997 WHERE m.field_class = TG_ARGV[0]
1999 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
2000 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2002 SELECT DISTINCT m.ts_config, m.index_weight
2003 FROM config.metabib_field_ts_map m
2004 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
2005 LEFT JOIN config.coded_value_map ccvm ON (
2006 ccvm.ctype IN ('item_lang', 'language') AND
2007 ccvm.code = m.index_lang AND
2008 r.vlist @> intset(ccvm.id)
2010 WHERE m.metabib_field = NEW.field
2012 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2013 ORDER BY index_weight ASC
2017 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2018 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2022 cur_weight = ts_rec.index_weight;
2023 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2026 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2028 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2033 $$ LANGUAGE PLPGSQL;
2036 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2037 browse_entry BIGINT,
2042 sources INT, -- visible ones, that is
2043 asources INT, -- visible ones, that is
2044 row_number INT, -- internal use, sort of
2045 accurate BOOL, -- Count in sources field is accurate? Not
2046 -- if we had more than a browse superpage
2047 -- of records to look at.
2048 aaccurate BOOL, -- See previous comment...
2053 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2056 ) RETURNS BIGINT AS $p$
2058 FROM metabib.browse_entry mbe
2059 JOIN metabib.browse_entry_def_map mbedm ON (
2060 mbedm.entry = mbe.id
2061 AND mbedm.def = ANY($1)
2063 WHERE mbe.sort_value >= public.naco_normalize($2)
2064 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2065 $p$ LANGUAGE SQL STABLE;
2067 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2070 ) RETURNS BIGINT AS $p$
2072 FROM metabib.browse_entry mbe
2073 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2074 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2075 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2076 ash.atag = map.authority_field
2077 AND map.metabib_field = ANY($1)
2079 WHERE mbe.sort_value >= public.naco_normalize($2)
2080 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2081 $p$ LANGUAGE SQL STABLE;
2083 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2086 ) RETURNS BIGINT AS $p$
2088 FROM metabib.browse_entry mbe
2089 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2090 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2091 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2092 ash.atag = map.authority_field
2093 AND map.metabib_field = ANY($1)
2095 WHERE mbe.sort_value >= public.naco_normalize($2)
2096 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2097 $p$ LANGUAGE SQL STABLE;
2099 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2102 ) RETURNS BIGINT AS $p$
2103 SELECT id FROM metabib.browse_entry
2105 metabib.browse_bib_pivot($1, $2),
2106 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2108 ORDER BY sort_value, value LIMIT 1;
2109 $p$ LANGUAGE SQL STABLE;
2112 CREATE OR REPLACE FUNCTION metabib.staged_browse(
2116 context_locations INT[],
2118 browse_superpage_size INT,
2119 count_up_from_zero BOOL, -- if false, count down from -1
2122 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2130 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
2131 results_skipped INT := 0;
2132 row_counter INT := 0;
2137 all_records BIGINT[];
2138 all_brecords BIGINT[];
2139 all_arecords BIGINT[];
2140 superpage_of_records BIGINT[];
2143 IF count_up_from_zero THEN
2149 OPEN curs FOR EXECUTE query;
2152 FETCH curs INTO rec;
2154 IF result_row.pivot_point IS NOT NULL THEN
2155 RETURN NEXT result_row;
2161 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
2162 SELECT INTO all_arecords, result_row.sees, afields
2163 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
2164 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
2165 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
2167 FROM metabib.browse_entry_simple_heading_map mbeshm
2168 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2169 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
2170 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
2171 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2172 ash.atag = map.authority_field
2173 AND map.metabib_field = ANY(fields)
2175 WHERE mbeshm.entry = rec.id;
2178 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
2179 SELECT INTO all_brecords, result_row.authorities, bfields
2180 ARRAY_AGG(DISTINCT source),
2181 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
2182 ARRAY_AGG(DISTINCT def)
2183 FROM metabib.browse_entry_def_map
2184 WHERE entry = rec.id
2185 AND def = ANY(fields);
2187 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
2189 result_row.sources := 0;
2190 result_row.asources := 0;
2192 -- Bib-linked vis checking
2193 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
2195 full_end := ARRAY_LENGTH(all_brecords, 1);
2196 superpage_size := COALESCE(browse_superpage_size, full_end);
2198 slice_end := superpage_size;
2200 WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
2201 superpage_of_records := all_brecords[slice_start:slice_end];
2203 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2204 'NULL AS badges, NULL::NUMERIC AS popularity, ' ||
2205 '1::NUMERIC AS rel FROM (SELECT UNNEST(' ||
2206 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2208 -- We use search.query_parser_fts() for visibility testing.
2209 -- We're calling it once per browse-superpage worth of records
2210 -- out of the set of records related to a given mbe, until we've
2211 -- either exhausted that set of records or found at least 1
2214 SELECT INTO result_row.sources visible
2215 FROM search.query_parser_fts(
2216 context_org, NULL, qpfts_query, NULL,
2217 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2219 WHERE qpfts.rel IS NULL;
2221 slice_start := slice_start + superpage_size;
2222 slice_end := slice_end + superpage_size;
2225 -- Accurate? Well, probably.
2226 result_row.accurate := browse_superpage_size IS NULL OR
2227 browse_superpage_size >= full_end;
2231 -- Authority-linked vis checking
2232 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
2234 full_end := ARRAY_LENGTH(all_arecords, 1);
2235 superpage_size := COALESCE(browse_superpage_size, full_end);
2237 slice_end := superpage_size;
2239 WHILE result_row.asources = 0 AND slice_start <= full_end LOOP
2240 superpage_of_records := all_arecords[slice_start:slice_end];
2242 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2243 'NULL AS badges, NULL::NUMERIC AS popularity, ' ||
2244 '1::NUMERIC AS rel FROM (SELECT UNNEST(' ||
2245 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2247 -- We use search.query_parser_fts() for visibility testing.
2248 -- We're calling it once per browse-superpage worth of records
2249 -- out of the set of records related to a given mbe, via
2250 -- authority until we've either exhausted that set of records
2251 -- or found at least 1 visible record.
2253 SELECT INTO result_row.asources visible
2254 FROM search.query_parser_fts(
2255 context_org, NULL, qpfts_query, NULL,
2256 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2258 WHERE qpfts.rel IS NULL;
2260 slice_start := slice_start + superpage_size;
2261 slice_end := slice_end + superpage_size;
2265 -- Accurate? Well, probably.
2266 result_row.aaccurate := browse_superpage_size IS NULL OR
2267 browse_superpage_size >= full_end;
2271 IF result_row.sources > 0 OR result_row.asources > 0 THEN
2273 -- The function that calls this function needs row_number in order
2274 -- to correctly order results from two different runs of this
2276 result_row.row_number := row_number;
2278 -- Now, if row_counter is still less than limit, return a row. If
2279 -- not, but it is less than next_pivot_pos, continue on without
2280 -- returning actual result rows until we find
2281 -- that next pivot, and return it.
2283 IF row_counter < result_limit THEN
2284 result_row.browse_entry := rec.id;
2285 result_row.value := rec.value;
2287 RETURN NEXT result_row;
2289 result_row.browse_entry := NULL;
2290 result_row.authorities := NULL;
2291 result_row.fields := NULL;
2292 result_row.value := NULL;
2293 result_row.sources := NULL;
2294 result_row.sees := NULL;
2295 result_row.accurate := NULL;
2296 result_row.aaccurate := NULL;
2297 result_row.pivot_point := rec.id;
2299 IF row_counter >= next_pivot_pos THEN
2300 RETURN NEXT result_row;
2305 IF count_up_from_zero THEN
2306 row_number := row_number + 1;
2308 row_number := row_number - 1;
2311 -- row_counter is different from row_number.
2312 -- It simply counts up from zero so that we know when
2313 -- we've reached our limit.
2314 row_counter := row_counter + 1;
2318 $p$ LANGUAGE PLPGSQL;
2321 CREATE OR REPLACE FUNCTION metabib.browse(
2324 context_org INT DEFAULT NULL,
2325 context_loc_group INT DEFAULT NULL,
2326 staff BOOL DEFAULT FALSE,
2327 pivot_id BIGINT DEFAULT NULL,
2328 result_limit INT DEFAULT 10
2329 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2334 pivot_sort_value TEXT;
2335 pivot_sort_fallback TEXT;
2336 context_locations INT[];
2337 browse_superpage_size INT;
2338 results_skipped INT := 0;
2342 forward_to_pivot INT;
2344 -- First, find the pivot if we were given a browse term but not a pivot.
2345 IF pivot_id IS NULL THEN
2346 pivot_id := metabib.browse_pivot(search_field, browse_term);
2349 SELECT INTO pivot_sort_value, pivot_sort_fallback
2350 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
2352 -- Bail if we couldn't find a pivot.
2353 IF pivot_sort_value IS NULL THEN
2357 -- Transform the context_loc_group argument (if any) (logc at the
2358 -- TPAC layer) into a form we'll be able to use.
2359 IF context_loc_group IS NOT NULL THEN
2360 SELECT INTO context_locations ARRAY_AGG(location)
2361 FROM asset.copy_location_group_map
2362 WHERE lgroup = context_loc_group;
2365 -- Get the configured size of browse superpages.
2366 SELECT INTO browse_superpage_size value -- NULL ok
2367 FROM config.global_flag
2368 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
2370 -- First we're going to search backward from the pivot, then we're going
2371 -- to search forward. In each direction, we need two limits. At the
2372 -- lesser of the two limits, we delineate the edge of the result set
2373 -- we're going to return. At the greater of the two limits, we find the
2374 -- pivot value that would represent an offset from the current pivot
2375 -- at a distance of one "page" in either direction, where a "page" is a
2376 -- result set of the size specified in the "result_limit" argument.
2378 -- The two limits in each direction make four derived values in total,
2379 -- and we calculate them now.
2380 back_limit := CEIL(result_limit::FLOAT / 2);
2381 back_to_pivot := result_limit;
2382 forward_limit := result_limit / 2;
2383 forward_to_pivot := result_limit - 1;
2385 -- This is the meat of the SQL query that finds browse entries. We'll
2386 -- pass this to a function which uses it with a cursor, so that individual
2387 -- rows may be fetched in a loop until some condition is satisfied, without
2388 -- waiting for a result set of fixed size to be collected all at once.
2393 FROM metabib.browse_entry mbe
2395 EXISTS ( -- are there any bibs using this mbe via the requested fields?
2397 FROM metabib.browse_entry_def_map mbedm
2398 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
2400 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
2402 FROM metabib.browse_entry_simple_heading_map mbeshm
2403 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2404 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2405 ash.atag = map.authority_field
2406 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
2408 WHERE mbeshm.entry = mbe.id
2412 -- This is the variant of the query for browsing backward.
2413 back_query := core_query ||
2414 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
2415 ' ORDER BY mbe.sort_value DESC, mbe.value DESC ';
2417 -- This variant browses forward.
2418 forward_query := core_query ||
2419 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
2420 ' ORDER BY mbe.sort_value, mbe.value ';
2422 -- We now call the function which applies a cursor to the provided
2423 -- queries, stopping at the appropriate limits and also giving us
2424 -- the next page's pivot.
2426 SELECT * FROM metabib.staged_browse(
2427 back_query, search_field, context_org, context_locations,
2428 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
2430 SELECT * FROM metabib.staged_browse(
2431 forward_query, search_field, context_org, context_locations,
2432 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
2433 ) ORDER BY row_number DESC;
2436 $p$ LANGUAGE PLPGSQL;
2438 CREATE OR REPLACE FUNCTION metabib.browse(
2441 context_org INT DEFAULT NULL,
2442 context_loc_group INT DEFAULT NULL,
2443 staff BOOL DEFAULT FALSE,
2444 pivot_id BIGINT DEFAULT NULL,
2445 result_limit INT DEFAULT 10
2446 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2448 RETURN QUERY SELECT * FROM metabib.browse(
2449 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
2450 FROM config.metabib_field WHERE field_class = search_class),
2459 $p$ LANGUAGE PLPGSQL;