2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.browse_entry (
187 id BIGSERIAL PRIMARY KEY,
189 index_vector tsvector,
190 sort_value TEXT NOT NULL,
191 UNIQUE(sort_value, value)
195 CREATE INDEX browse_entry_sort_value_idx
196 ON metabib.browse_entry USING BTREE (sort_value);
198 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
199 CREATE TRIGGER metabib_browse_entry_fti_trigger
200 BEFORE INSERT OR UPDATE ON metabib.browse_entry
201 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
204 CREATE TABLE metabib.browse_entry_def_map (
205 id BIGSERIAL PRIMARY KEY,
206 entry BIGINT REFERENCES metabib.browse_entry (id),
207 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
208 source BIGINT REFERENCES biblio.record_entry (id),
209 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
211 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
212 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
213 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
215 CREATE TABLE metabib.browse_entry_simple_heading_map (
216 id BIGSERIAL PRIMARY KEY,
217 entry BIGINT REFERENCES metabib.browse_entry (id),
218 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
220 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
221 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
223 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
228 facet_text := NEW.value;
231 SELECT n.func AS func,
232 n.param_count AS param_count,
234 FROM config.index_normalizer n
235 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
236 WHERE m.field = NEW.field AND m.pos < 0
239 EXECUTE 'SELECT ' || normalizer.func || '(' ||
240 quote_literal( facet_text ) ||
242 WHEN normalizer.param_count > 0
243 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
250 NEW.value = facet_text;
256 CREATE TRIGGER facet_normalize_tgr
257 BEFORE UPDATE OR INSERT ON metabib.facet_entry
258 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
260 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
262 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
267 CREATE TRIGGER facet_force_nfc_tgr
268 BEFORE UPDATE OR INSERT ON metabib.facet_entry
269 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
271 -- DECREMENTING serial starts at -1
272 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
274 CREATE TABLE metabib.uncontrolled_record_attr_value (
275 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
276 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
279 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
281 CREATE VIEW metabib.record_attr_id_map AS
282 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
284 SELECT c.id, c.ctype AS attr, c.code AS value
285 FROM config.coded_value_map c
286 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
288 CREATE VIEW metabib.composite_attr_id_map AS
289 SELECT c.id, c.ctype AS attr, c.code AS value
290 FROM config.coded_value_map c
291 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
293 CREATE VIEW metabib.full_attr_id_map AS
294 SELECT id, attr, value FROM metabib.record_attr_id_map
296 SELECT id, attr, value FROM metabib.composite_attr_id_map;
299 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
300 $_SHARED{metabib_compile_composite_attr_cache} = {}
301 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
302 return exists $_SHARED{metabib_compile_composite_attr_cache};
303 $f$ LANGUAGE PLPERLU;
305 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
306 delete $_SHARED{metabib_compile_composite_attr_cache};
307 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
308 $f$ LANGUAGE PLPERLU;
310 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
311 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
314 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
316 PERFORM metabib.compile_composite_attr_cache_invalidate();
319 $f$ LANGUAGE PLPGSQL;
321 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
323 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
328 my $def = decode_json($json);
330 die("Composite attribute definition not supplied") unless $def;
332 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
334 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
335 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
342 if (ref $d eq 'HASH') { # node or AND
343 if (exists $d->{_attr}) { # it is a node
344 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
345 my $id = spi_exec_prepared(
346 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
350 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
351 return '!' . recurse($$d{_not});
352 } else { # an AND list
353 @list = map { recurse($$d{$_}) } sort keys %$d;
355 } elsif (ref $d eq 'ARRAY') {
357 @list = map { recurse($_) } @$d;
360 @list = grep { defined && $_ ne '' } @list;
362 return '(' . join($j,@list) . ')' if @list;
366 my $val = recurse($def) || undef;
367 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
370 $func$ IMMUTABLE LANGUAGE plperlu;
372 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
373 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
374 $func$ STRICT IMMUTABLE LANGUAGE SQL;
376 CREATE TABLE metabib.record_attr_vector_list (
377 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
378 vlist INT[] NOT NULL -- stores id from ccvm AND murav
380 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
382 /* This becomes a view, and we do sorters differently ...
383 CREATE TABLE metabib.record_attr (
384 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
385 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
387 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
388 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
389 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
393 CREATE TABLE metabib.record_sorter (
394 id BIGSERIAL PRIMARY KEY,
395 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
396 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
399 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
400 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
401 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
404 CREATE TYPE metabib.record_attr_type AS (
409 -- Back-compat view ... we're moving to an INTARRAY world
410 CREATE VIEW metabib.record_attr_flat AS
411 SELECT v.source AS id,
414 FROM metabib.record_attr_vector_list v
415 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
417 SELECT v.source AS id,
420 FROM metabib.record_attr_vector_list v
421 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
423 CREATE VIEW metabib.record_attr AS
424 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs FROM metabib.record_attr_flat GROUP BY 1;
426 -- Back-back-compat view ... we use to live in an HSTORE world
427 CREATE TYPE metabib.rec_desc_type AS (
445 CREATE VIEW metabib.rec_descriptor AS
448 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
449 FROM metabib.record_attr;
451 -- Use a sequence that matches previous version, for easier upgrading.
452 CREATE SEQUENCE metabib.full_rec_id_seq;
454 CREATE TABLE metabib.real_full_rec (
455 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
456 record BIGINT NOT NULL,
457 tag CHAR(3) NOT NULL,
462 index_vector tsvector NOT NULL
464 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
466 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
467 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
468 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
469 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
470 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
471 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
472 CREATE INDEX metabib_full_rec_isxn_caseless_idx
473 ON metabib.real_full_rec (LOWER(value))
474 WHERE tag IN ('020', '022', '024');
475 -- This next index might fully supplant the one above, but leaving both for now.
476 -- (they are not too large)
477 -- The reason we need this index is to ensure that the query parser always
478 -- prefers this index over the simpler tag/subfield index, as this greatly
479 -- increases Vandelay overlay speed for these identifiers, especially when
480 -- a record has many of these fields (around > 4-6 seems like the cutoff
481 -- on at least one PG9.1 system)
482 -- A similar index could be added for other fields (e.g. 010), but one should
483 -- leave out the LOWER() in all other cases.
484 -- TODO: verify whether we can discard the non tag/subfield/substring version
485 -- above (metabib_full_rec_isxn_caseless_idx)
486 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
487 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
488 WHERE tag IN ('020', '022', '024');
491 CREATE TRIGGER metabib_full_rec_fti_trigger
492 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
493 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
495 CREATE OR REPLACE VIEW metabib.full_rec AS
502 SUBSTRING(value,1,1024) AS value,
504 FROM metabib.real_full_rec;
506 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
507 AS ON INSERT TO metabib.full_rec
509 INSERT INTO metabib.real_full_rec VALUES (
510 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
520 CREATE OR REPLACE RULE metabib_full_rec_update_rule
521 AS ON UPDATE TO metabib.full_rec
523 UPDATE metabib.real_full_rec SET
529 subfield = NEW.subfield,
531 index_vector = NEW.index_vector
534 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
535 AS ON DELETE TO metabib.full_rec
537 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
539 CREATE TABLE metabib.metarecord_source_map (
540 id BIGSERIAL PRIMARY KEY,
541 metarecord BIGINT NOT NULL,
542 source BIGINT NOT NULL
544 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
545 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
547 CREATE TYPE metabib.field_entry_template AS (
560 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
562 bib biblio.record_entry%ROWTYPE;
563 idx config.metabib_field%ROWTYPE;
564 xfrm config.xml_transform%ROWTYPE;
566 transformed_xml TEXT;
568 xml_node_list TEXT[];
574 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
576 authority_link BIGINT;
577 output_row metabib.field_entry_template%ROWTYPE;
580 -- Start out with no field-use bools set
581 output_row.browse_field = FALSE;
582 output_row.facet_field = FALSE;
583 output_row.search_field = FALSE;
586 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
588 -- Loop over the indexing entries
589 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
591 joiner := COALESCE(idx.joiner, default_joiner);
593 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
595 -- See if we can skip the XSLT ... it's expensive
596 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
597 -- Can't skip the transform
598 IF xfrm.xslt <> '---' THEN
599 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
601 transformed_xml := bib.marc;
604 prev_xfrm := xfrm.name;
607 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
610 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
611 CONTINUE WHEN xml_node !~ E'^\\s*<';
613 -- XXX much of this should be moved into oils_xpath_string...
614 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
615 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
616 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
617 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
621 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
623 IF raw_text IS NOT NULL THEN
624 raw_text := raw_text || joiner;
627 raw_text := COALESCE(raw_text,'') || curr_text;
629 -- autosuggest/metabib.browse_entry
630 IF idx.browse_field THEN
632 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
633 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
635 browse_text := curr_text;
638 IF idx.browse_sort_xpath IS NOT NULL AND
639 idx.browse_sort_xpath <> '' THEN
641 sort_value := oils_xpath_string(
642 idx.browse_sort_xpath, xml_node, joiner,
643 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
646 sort_value := browse_text;
649 output_row.field_class = idx.field_class;
650 output_row.field = idx.id;
651 output_row.source = rid;
652 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
653 output_row.sort_value :=
654 public.naco_normalize(sort_value);
656 output_row.authority := NULL;
658 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
659 authority_text := oils_xpath_string(
660 idx.authority_xpath, xml_node, joiner,
662 ARRAY[xfrm.prefix, xfrm.namespace_uri],
663 ARRAY['xlink','http://www.w3.org/1999/xlink']
667 IF authority_text ~ '^\d+$' THEN
668 authority_link := authority_text::BIGINT;
669 PERFORM * FROM authority.record_entry WHERE id = authority_link;
671 output_row.authority := authority_link;
677 output_row.browse_field = TRUE;
678 -- Returning browse rows with search_field = true for search+browse
679 -- configs allows us to retain granularity of being able to search
680 -- browse fields with "starts with" type operators (for example, for
681 -- titles of songs in music albums)
682 IF idx.search_field THEN
683 output_row.search_field = TRUE;
685 RETURN NEXT output_row;
686 output_row.browse_field = FALSE;
687 output_row.search_field = FALSE;
688 output_row.sort_value := NULL;
691 -- insert raw node text for faceting
692 IF idx.facet_field THEN
694 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
695 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
697 facet_text := curr_text;
700 output_row.field_class = idx.field_class;
701 output_row.field = -1 * idx.id;
702 output_row.source = rid;
703 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
705 output_row.facet_field = TRUE;
706 RETURN NEXT output_row;
707 output_row.facet_field = FALSE;
712 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
714 -- insert combined node text for searching
715 IF idx.search_field THEN
716 output_row.field_class = idx.field_class;
717 output_row.field = idx.id;
718 output_row.source = rid;
719 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
721 output_row.search_field = TRUE;
722 RETURN NEXT output_row;
723 output_row.search_field = FALSE;
730 $func$ LANGUAGE PLPGSQL;
732 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
734 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
735 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
736 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
737 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
738 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
739 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
740 FROM metabib.keyword_field_entry WHERE source = bib_id;
742 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
743 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
744 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
745 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
746 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
747 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
748 FROM metabib.title_field_entry WHERE source = bib_id;
750 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
751 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
752 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
753 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
754 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
755 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
756 FROM metabib.author_field_entry WHERE source = bib_id;
758 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
759 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
760 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
761 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
762 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
763 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
764 FROM metabib.subject_field_entry WHERE source = bib_id;
766 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
767 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
768 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
769 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
770 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
771 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
772 FROM metabib.series_field_entry WHERE source = bib_id;
774 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
775 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
776 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
777 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
778 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
779 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
780 FROM metabib.identifier_field_entry WHERE source = bib_id;
783 $func$ LANGUAGE PLPGSQL;
785 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
788 ind_data metabib.field_entry_template%ROWTYPE;
789 mbe_row metabib.browse_entry%ROWTYPE;
797 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
798 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
799 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
801 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
803 IF NOT b_skip_search THEN
804 FOR fclass IN SELECT * FROM config.metabib_class LOOP
805 -- RAISE NOTICE 'Emptying out %', fclass.name;
806 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
809 IF NOT b_skip_facet THEN
810 DELETE FROM metabib.facet_entry WHERE source = bib_id;
812 IF NOT b_skip_browse THEN
813 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
817 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
819 -- don't store what has been normalized away
820 CONTINUE WHEN ind_data.value IS NULL;
822 IF ind_data.field < 0 THEN
823 ind_data.field = -1 * ind_data.field;
826 IF ind_data.facet_field AND NOT b_skip_facet THEN
827 INSERT INTO metabib.facet_entry (field, source, value)
828 VALUES (ind_data.field, ind_data.source, ind_data.value);
831 IF ind_data.browse_field AND NOT b_skip_browse THEN
832 -- A caveat about this SELECT: this should take care of replacing
833 -- old mbe rows when data changes, but not if normalization (by
834 -- which I mean specifically the output of
835 -- evergreen.oils_tsearch2()) changes. It may or may not be
836 -- expensive to add a comparison of index_vector to index_vector
837 -- to the WHERE clause below.
839 CONTINUE WHEN ind_data.sort_value IS NULL;
841 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
842 SELECT INTO mbe_row * FROM metabib.browse_entry
843 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
846 mbe_id := mbe_row.id;
848 INSERT INTO metabib.browse_entry
849 ( value, sort_value ) VALUES
850 ( value_prepped, ind_data.sort_value );
852 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
855 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
856 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
859 IF ind_data.search_field AND NOT b_skip_search THEN
860 -- Avoid inserting duplicate rows
861 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
862 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
863 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
864 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
865 IF mbe_id IS NULL THEN
867 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
869 quote_literal(ind_data.field) || $$, $$ ||
870 quote_literal(ind_data.source) || $$, $$ ||
871 quote_literal(ind_data.value) ||
878 IF NOT b_skip_search THEN
879 PERFORM metabib.update_combined_index_vectors(bib_id);
884 $func$ LANGUAGE PLPGSQL;
886 -- default to a space joiner
887 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
888 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
891 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
893 auth authority.record_entry%ROWTYPE;
894 output authority.full_rec%ROWTYPE;
897 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
899 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
900 output.record := rid;
901 output.ind1 := field.ind1;
902 output.ind2 := field.ind2;
903 output.tag := field.tag;
904 output.subfield := field.subfield;
905 output.value := field.value;
910 $func$ LANGUAGE PLPGSQL;
912 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
914 bib biblio.record_entry%ROWTYPE;
915 output metabib.full_rec%ROWTYPE;
918 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
920 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
921 output.record := rid;
922 output.ind1 := field.ind1;
923 output.ind2 := field.ind2;
924 output.tag := field.tag;
925 output.subfield := field.subfield;
926 output.value := field.value;
931 $func$ LANGUAGE PLPGSQL;
933 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
934 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
937 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
938 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
941 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
942 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
945 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
946 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
949 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
962 IF marc IS NULL OR marc = '' THEN
966 -- First, the count of tags
967 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
969 -- now go through a bunch of pain to get the record type
970 IF best_type IS NOT NULL THEN
971 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
973 IF ldr IS NOT NULL THEN
974 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
975 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
978 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
979 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
981 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
983 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
985 IF type_map.code IS NOT NULL THEN
986 IF best_type = type_map.code THEN
987 qual := qual + qual / 2;
990 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
991 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
992 IF ff_tag_data = best_lang THEN
1000 -- Now look for some quality metrics
1002 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1007 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1014 $func$ LANGUAGE PLPGSQL;
1016 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1018 idx config.biblio_fingerprint%ROWTYPE;
1019 xfrm config.xml_transform%ROWTYPE;
1021 transformed_xml TEXT;
1023 xml_node_list TEXT[];
1025 output_text TEXT := '';
1028 IF marc IS NULL OR marc = '' THEN
1032 -- Loop over the indexing entries
1033 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1035 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1037 -- See if we can skip the XSLT ... it's expensive
1038 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1039 -- Can't skip the transform
1040 IF xfrm.xslt <> '---' THEN
1041 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1043 transformed_xml := marc;
1046 prev_xfrm := xfrm.name;
1049 raw_text := COALESCE(
1057 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1066 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1067 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1069 IF idx.first_word IS TRUE THEN
1070 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1073 output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
1080 $func$ LANGUAGE PLPGSQL;
1082 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1083 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1086 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1088 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1092 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1093 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1098 $func$ LANGUAGE PLPGSQL;
1100 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1102 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1104 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1106 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1107 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1111 $func$ LANGUAGE PLPGSQL;
1113 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1120 uri_owner_list TEXT[];
1128 -- Clear any URI mappings and call numbers for this bib.
1129 -- This leads to acn / auricnm inflation, but also enables
1130 -- old acn/auricnm's to go away and for bibs to be deleted.
1131 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1132 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1133 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1136 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1137 IF ARRAY_UPPER(uris,1) > 0 THEN
1138 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1139 -- First we pull info out of the 856
1142 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1143 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1144 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1146 IF uri_label IS NULL THEN
1147 uri_label := uri_href;
1149 CONTINUE WHEN uri_href IS NULL;
1151 -- Get the distinct list of libraries wanting to use
1153 DISTINCT REGEXP_REPLACE(
1155 $re$^.*?\((\w+)\).*$$re$,
1158 ) INTO uri_owner_list
1161 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1166 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1168 -- look for a matching uri
1169 IF uri_use IS NULL THEN
1170 SELECT id INTO uri_id
1172 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1173 ORDER BY id LIMIT 1;
1174 IF NOT FOUND THEN -- create one
1175 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1176 SELECT id INTO uri_id
1178 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1181 SELECT id INTO uri_id
1183 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1184 ORDER BY id LIMIT 1;
1185 IF NOT FOUND THEN -- create one
1186 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1187 SELECT id INTO uri_id
1189 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1193 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1194 uri_owner := uri_owner_list[j];
1196 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1197 CONTINUE WHEN NOT FOUND;
1199 -- we need a call number to link through
1200 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1202 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1203 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1204 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1207 -- now, link them if they're not already
1208 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1210 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1222 $func$ LANGUAGE PLPGSQL;
1224 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1226 new_mapping BOOL := TRUE;
1229 tmp_mr metabib.metarecord%ROWTYPE;
1230 deleted_mrs BIGINT[];
1233 -- We need to make sure we're not a deleted master record of an MR
1234 IF bib_is_deleted THEN
1235 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1237 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1238 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1241 -- Now, are there any more sources on this MR?
1242 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1244 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1245 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1246 DELETE FROM metabib.metarecord WHERE id = old_mr;
1248 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1249 UPDATE metabib.metarecord
1251 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1256 ELSE -- insert or update
1258 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1260 -- Find the first fingerprint-matching
1261 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1262 old_mr := tmp_mr.id;
1263 new_mapping := FALSE;
1265 ELSE -- Our fingerprint changed ... maybe remove the old MR
1266 DELETE FROM metabib.metarecord_source_map WHERE metarecord = old_mr AND source = bib_id; -- remove the old source mapping
1267 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1268 IF source_count = 0 THEN -- No other records
1269 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1270 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1276 -- we found no suitable, preexisting MR based on old source maps
1277 IF old_mr IS NULL THEN
1278 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1280 IF old_mr IS NULL THEN -- nope, create one and grab its id
1281 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1282 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1284 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1285 UPDATE metabib.metarecord
1287 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1291 ELSE -- there was one we already attached to, update its mods cache and master_record
1292 UPDATE metabib.metarecord
1294 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1299 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1304 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1305 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1311 $func$ LANGUAGE PLPGSQL;
1314 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1315 DELETE FROM authority.bib_linking WHERE bib = $1;
1316 INSERT INTO authority.bib_linking (bib, authority)
1319 FROM ( SELECT DISTINCT $1 AS bib,
1320 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1321 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1322 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1323 ) y JOIN authority.record_entry r ON r.id = y.authority;
1325 $func$ LANGUAGE SQL;
1327 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1329 transformed_xml TEXT;
1330 rmarc TEXT := prmarc;
1334 xfrm config.xml_transform%ROWTYPE;
1335 attr_vector INT[] := '{}'::INT[];
1336 attr_vector_tmp INT[];
1337 attr_list TEXT[] := pattr_list;
1339 norm_attr_value TEXT[];
1341 attr_def config.record_attr_definition%ROWTYPE;
1342 ccvm_row config.coded_value_map%ROWTYPE;
1345 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1346 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
1349 IF rmarc IS NULL THEN
1350 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1353 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1355 attr_value := '{}'::TEXT[];
1356 norm_attr_value := '{}'::TEXT[];
1357 attr_vector_tmp := '{}'::INT[];
1359 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1361 -- tag+sf attrs only support SVF
1362 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1363 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1364 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1366 AND tag LIKE attr_def.tag
1368 WHEN attr_def.sf_list IS NOT NULL
1369 THEN POSITION(subfield IN attr_def.sf_list) > 0
1376 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1377 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1379 IF NOT attr_def.multi THEN
1380 attr_value := ARRAY[attr_value[1]];
1383 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1385 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1387 -- See if we can skip the XSLT ... it's expensive
1388 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1389 -- Can't skip the transform
1390 IF xfrm.xslt <> '---' THEN
1391 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1393 transformed_xml := rmarc;
1396 prev_xfrm := xfrm.name;
1399 IF xfrm.name IS NULL THEN
1400 -- just grab the marcxml (empty) transform
1401 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1402 prev_xfrm := xfrm.name;
1405 FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
1406 tmp_val := oils_xpath_string(
1409 COALESCE(attr_def.joiner,' '),
1410 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1412 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1413 attr_value := attr_value || tmp_val;
1414 EXIT WHEN NOT attr_def.multi;
1418 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1419 SELECT ARRAY_AGG(m.value) INTO attr_value
1420 FROM vandelay.marc21_physical_characteristics(rmarc) v
1421 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1422 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1423 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1425 IF NOT attr_def.multi THEN
1426 attr_value := ARRAY[attr_value[1]];
1431 -- apply index normalizers to attr_value
1432 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1434 SELECT n.func AS func,
1435 n.param_count AS param_count,
1437 FROM config.index_normalizer n
1438 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1439 WHERE attr = attr_def.name
1441 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1442 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1444 WHEN normalizer.param_count > 0
1445 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1451 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1452 norm_attr_value := norm_attr_value || tmp_val;
1456 IF attr_def.filter THEN
1457 -- Create unknown uncontrolled values and find the IDs of the values
1458 IF ccvm_row.id IS NULL THEN
1459 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1460 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1461 BEGIN -- use subtransaction to isolate unique constraint violations
1462 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1463 EXCEPTION WHEN unique_violation THEN END;
1467 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1469 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1472 -- Add the new value to the vector
1473 attr_vector := attr_vector || attr_vector_tmp;
1476 IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
1477 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1478 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1483 /* We may need to rewrite the vlist to contain
1484 the intersection of new values for requested
1485 attrs and old values for ignored attrs. To
1486 do this, we take the old attr vlist and
1487 subtract any values that are valid for the
1488 requested attrs, and then add back the new
1489 set of attr values. */
1491 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1492 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1493 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1494 attr_vector := attr_vector || attr_vector_tmp;
1497 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1498 -- attributes can depend on earlier ones.
1499 PERFORM metabib.compile_composite_attr_cache_init();
1500 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1502 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1504 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1505 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1507 IF attr_def.filter THEN
1508 IF attr_vector @@ tmp_val::query_int THEN
1509 attr_vector = attr_vector + intset(ccvm_row.id);
1510 EXIT WHEN NOT attr_def.multi;
1514 IF attr_def.sorter THEN
1515 IF attr_vector @@ tmp_val THEN
1516 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1517 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1525 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1526 IF rdeleted THEN -- initial insert OR revivication
1527 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1528 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1530 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1536 $func$ LANGUAGE PLPGSQL;
1539 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1540 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1545 IF NEW.deleted THEN -- If this bib is deleted
1547 PERFORM * FROM config.internal_flag WHERE
1548 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1550 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1552 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1554 IF NOT tmp_bool THEN
1555 -- One needs to keep these around to support searches
1556 -- with the #deleted modifier, so one should turn on the named
1557 -- internal flag for that functionality.
1558 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1561 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1562 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1563 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1564 RETURN NEW; -- and we're done
1567 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1568 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1570 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1575 -- Record authority linking
1576 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1578 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1581 -- Flatten and insert the mfr data
1582 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1584 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1586 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1587 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1589 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1593 -- Gather and insert the field entry data
1594 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1596 -- Located URI magic
1597 IF TG_OP = 'INSERT' THEN
1598 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1600 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1603 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1605 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1609 -- (re)map metarecord-bib linking
1610 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1611 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1613 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1615 ELSE -- we're doing an update, and we're not deleted, remap
1616 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1618 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1624 $func$ LANGUAGE PLPGSQL;
1626 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1632 SELECT n.func AS func,
1633 n.param_count AS param_count,
1635 FROM config.index_normalizer n
1636 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1637 WHERE m.field = mapped_field AND m.pos < 0
1640 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1641 quote_literal( facet_text ) ||
1643 WHEN normalizer.param_count > 0
1644 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1647 ')' INTO facet_text;
1654 $$ LANGUAGE PLPGSQL;
1657 -- This mimics a specific part of QueryParser, turning the first part of a
1658 -- classed search (search_class) into a set of classes and possibly fields.
1659 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1660 -- or "au" or "au|corporate|personal" or anything like that, where the first
1661 -- element of the list you get by separating on the "|" character is either
1662 -- a registered class (config.metabib_class) or an alias
1663 -- (config.metabib_search_alias), and the rest of any such elements are
1664 -- fields (config.metabib_field).
1666 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1667 RETURNS SETOF RECORD AS $func$
1669 search_parts TEXT[];
1671 search_part_count INTEGER;
1673 registered_class config.metabib_class%ROWTYPE;
1674 registered_alias config.metabib_search_alias%ROWTYPE;
1675 registered_field config.metabib_field%ROWTYPE;
1677 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1679 search_part_count := ARRAY_LENGTH(search_parts, 1);
1680 IF search_part_count = 0 THEN
1683 SELECT INTO registered_class
1684 * FROM config.metabib_class WHERE name = search_parts[1];
1686 IF search_part_count < 2 THEN -- all fields
1687 rec := (registered_class.name, NULL::INTEGER);
1691 FOR field_name IN SELECT *
1692 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1693 SELECT INTO registered_field
1694 * FROM config.metabib_field
1695 WHERE name = field_name AND
1696 field_class = registered_class.name;
1698 rec := (registered_class.name, registered_field.id);
1703 -- maybe we have an alias?
1704 SELECT INTO registered_alias
1705 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1709 IF search_part_count < 2 THEN -- return w/e the alias says
1711 registered_alias.field_class, registered_alias.field
1716 FOR field_name IN SELECT *
1717 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1718 SELECT INTO registered_field
1719 * FROM config.metabib_field
1720 WHERE name = field_name AND
1721 field_class = registered_alias.field_class;
1724 registered_alias.field_class,
1735 $func$ LANGUAGE PLPGSQL ROWS 1;
1738 -- Given a string such as a user might type into a search box, prepare
1739 -- two changed variants for TO_TSQUERY(). See
1740 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1741 -- The first variant is normalized to match indexed documents regardless
1742 -- of diacritics. The second variant keeps its diacritics for proper
1743 -- highlighting via TS_HEADLINE().
1745 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1748 orig_ended_in_space BOOLEAN;
1753 orig_ended_in_space := orig ~ E'\\s$';
1755 orig := ARRAY_TO_STRING(
1756 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1759 normalized := public.naco_normalize(orig); -- also trim()s
1760 plain := trim(orig);
1762 IF NOT orig_ended_in_space THEN
1763 plain := plain || ':*';
1764 normalized := normalized || ':*';
1767 plain := ARRAY_TO_STRING(
1768 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1770 normalized := ARRAY_TO_STRING(
1771 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1774 RETURN ARRAY[normalized, plain];
1776 $$ LANGUAGE PLPGSQL;
1780 FUNCTION metabib.suggest_browse_entries(
1781 raw_query_text TEXT, -- actually typed by humans at the UI level
1782 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
1783 headline_opts TEXT, -- markup options for ts_headline()
1784 visibility_org INTEGER,-- null if you don't want opac visibility test
1785 query_limit INTEGER,-- use in LIMIT clause of interal query
1786 normalization INTEGER -- argument to TS_RANK_CD()
1788 value TEXT, -- plain
1790 buoyant_and_class_match BOOL,
1792 field_weight INTEGER,
1795 match TEXT -- marked up
1798 prepared_query_texts TEXT[];
1800 plain_query TSQUERY;
1801 opac_visibility_join TEXT;
1802 search_class_join TEXT;
1805 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
1807 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
1808 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
1810 visibility_org := NULLIF(visibility_org,-1);
1811 IF visibility_org IS NOT NULL THEN
1812 opac_visibility_join := '
1813 JOIN asset.opac_visible_copies aovc ON (
1814 aovc.record = x.source AND
1815 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
1818 opac_visibility_join := '';
1821 -- The following determines whether we only provide suggestsons matching
1822 -- the user's selected search_class, or whether we show other suggestions
1823 -- too. The reason for MIN() is that for search_classes like
1824 -- 'title|proper|uniform' you would otherwise get multiple rows. The
1825 -- implication is that if title as a class doesn't have restrict,
1826 -- nor does the proper field, but the uniform field does, you're going
1827 -- to get 'false' for your overall evaluation of 'should we restrict?'
1828 -- To invert that, change from MIN() to MAX().
1832 MIN(cmc.restrict::INT) AS restrict_class,
1833 MIN(cmf.restrict::INT) AS restrict_field
1834 FROM metabib.search_class_to_registered_components(search_class)
1835 AS _registered (field_class TEXT, field INT)
1837 config.metabib_class cmc ON (cmc.name = _registered.field_class)
1839 config.metabib_field cmf ON (cmf.id = _registered.field);
1841 -- evaluate 'should we restrict?'
1842 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
1843 search_class_join := '
1845 metabib.search_class_to_registered_components($2)
1846 AS _registered (field_class TEXT, field INT) ON (
1847 (_registered.field IS NULL AND
1848 _registered.field_class = cmf.field_class) OR
1849 (_registered.field = cmf.id)
1853 search_class_join := '
1855 metabib.search_class_to_registered_components($2)
1856 AS _registered (field_class TEXT, field INT) ON (
1857 _registered.field_class = cmc.name
1862 RETURN QUERY EXECUTE '
1871 TS_HEADLINE(value, $7, $3)
1872 FROM (SELECT DISTINCT
1875 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
1876 _registered.field = cmf.id AS restrict,
1878 TS_RANK_CD(mbe.index_vector, $1, $6),
1881 FROM metabib.browse_entry_def_map mbedm
1882 JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
1883 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
1884 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
1885 ' || search_class_join || '
1886 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1888 ' || opac_visibility_join || '
1889 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1891 ' -- sic, repeat the order by clause in the outer select too
1893 query, search_class, headline_opts,
1894 visibility_org, query_limit, normalization, plain_query
1898 -- buoyant AND chosen class = match class
1899 -- chosen field = match field
1906 $func$ LANGUAGE PLPGSQL;
1908 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1912 temp_vector TEXT := '';
1918 NEW.index_vector = ''::tsvector;
1920 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1922 SELECT n.func AS func,
1923 n.param_count AS param_count,
1925 FROM config.index_normalizer n
1926 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1927 WHERE field = NEW.field AND m.pos < 0
1929 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1930 quote_literal( value ) ||
1932 WHEN normalizer.param_count > 0
1933 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1943 SELECT n.func AS func,
1944 n.param_count AS param_count,
1946 FROM config.index_normalizer n
1947 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1948 WHERE field = NEW.field AND m.pos >= 0
1950 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1951 quote_literal( value ) ||
1953 WHEN normalizer.param_count > 0
1954 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1962 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
1964 value := ARRAY_TO_STRING(
1965 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
1967 value := public.search_normalize(value);
1968 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
1970 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1973 SELECT DISTINCT m.ts_config, m.index_weight
1974 FROM config.metabib_class_ts_map m
1975 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1976 LEFT JOIN config.coded_value_map ccvm ON (
1977 ccvm.ctype IN ('item_lang', 'language') AND
1978 ccvm.code = m.index_lang AND
1979 r.vlist @> intset(ccvm.id)
1981 WHERE m.field_class = TG_ARGV[0]
1983 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
1984 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1986 SELECT DISTINCT m.ts_config, m.index_weight
1987 FROM config.metabib_field_ts_map m
1988 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1989 LEFT JOIN config.coded_value_map ccvm ON (
1990 ccvm.ctype IN ('item_lang', 'language') AND
1991 ccvm.code = m.index_lang AND
1992 r.vlist @> intset(ccvm.id)
1994 WHERE m.metabib_field = NEW.field
1996 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1997 ORDER BY index_weight ASC
2001 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2002 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2006 cur_weight = ts_rec.index_weight;
2007 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2010 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2012 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2017 $$ LANGUAGE PLPGSQL;
2020 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2021 browse_entry BIGINT,
2026 sources INT, -- visible ones, that is
2027 asources INT, -- visible ones, that is
2028 row_number INT, -- internal use, sort of
2029 accurate BOOL, -- Count in sources field is accurate? Not
2030 -- if we had more than a browse superpage
2031 -- of records to look at.
2032 aaccurate BOOL, -- See previous comment...
2037 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2040 ) RETURNS BIGINT AS $p$
2042 FROM metabib.browse_entry mbe
2043 JOIN metabib.browse_entry_def_map mbedm ON (
2044 mbedm.entry = mbe.id
2045 AND mbedm.def = ANY($1)
2047 WHERE mbe.sort_value >= public.naco_normalize($2)
2048 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2049 $p$ LANGUAGE SQL STABLE;
2051 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2054 ) RETURNS BIGINT AS $p$
2056 FROM metabib.browse_entry mbe
2057 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2058 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2059 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2060 ash.atag = map.authority_field
2061 AND map.metabib_field = ANY($1)
2063 WHERE mbe.sort_value >= public.naco_normalize($2)
2064 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2065 $p$ LANGUAGE SQL STABLE;
2067 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2070 ) RETURNS BIGINT AS $p$
2072 FROM metabib.browse_entry mbe
2073 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2074 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2075 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2076 ash.atag = map.authority_field
2077 AND map.metabib_field = ANY($1)
2079 WHERE mbe.sort_value >= public.naco_normalize($2)
2080 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2081 $p$ LANGUAGE SQL STABLE;
2083 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2086 ) RETURNS BIGINT AS $p$
2087 SELECT id FROM metabib.browse_entry
2089 metabib.browse_bib_pivot($1, $2),
2090 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2092 ORDER BY sort_value, value LIMIT 1;
2093 $p$ LANGUAGE SQL STABLE;
2096 CREATE OR REPLACE FUNCTION metabib.staged_browse(
2100 context_locations INT[],
2102 browse_superpage_size INT,
2103 count_up_from_zero BOOL, -- if false, count down from -1
2106 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2114 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
2115 results_skipped INT := 0;
2116 row_counter INT := 0;
2121 all_records BIGINT[];
2122 all_brecords BIGINT[];
2123 all_arecords BIGINT[];
2124 superpage_of_records BIGINT[];
2127 IF count_up_from_zero THEN
2133 OPEN curs FOR EXECUTE query;
2136 FETCH curs INTO rec;
2138 IF result_row.pivot_point IS NOT NULL THEN
2139 RETURN NEXT result_row;
2145 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
2146 SELECT INTO all_arecords, result_row.sees, afields
2147 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
2148 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
2149 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
2151 FROM metabib.browse_entry_simple_heading_map mbeshm
2152 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2153 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
2154 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
2155 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2156 ash.atag = map.authority_field
2157 AND map.metabib_field = ANY(fields)
2159 WHERE mbeshm.entry = rec.id;
2162 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
2163 SELECT INTO all_brecords, result_row.authorities, bfields
2164 ARRAY_AGG(DISTINCT source),
2165 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
2166 ARRAY_AGG(DISTINCT def)
2167 FROM metabib.browse_entry_def_map
2168 WHERE entry = rec.id
2169 AND def = ANY(fields);
2171 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
2173 result_row.sources := 0;
2174 result_row.asources := 0;
2176 -- Bib-linked vis checking
2177 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
2179 full_end := ARRAY_LENGTH(all_brecords, 1);
2180 superpage_size := COALESCE(browse_superpage_size, full_end);
2182 slice_end := superpage_size;
2184 WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
2185 superpage_of_records := all_brecords[slice_start:slice_end];
2187 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2188 '1::INT AS rel FROM (SELECT UNNEST(' ||
2189 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2191 -- We use search.query_parser_fts() for visibility testing.
2192 -- We're calling it once per browse-superpage worth of records
2193 -- out of the set of records related to a given mbe, until we've
2194 -- either exhausted that set of records or found at least 1
2197 SELECT INTO result_row.sources visible
2198 FROM search.query_parser_fts(
2199 context_org, NULL, qpfts_query, NULL,
2200 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2202 WHERE qpfts.rel IS NULL;
2204 slice_start := slice_start + superpage_size;
2205 slice_end := slice_end + superpage_size;
2208 -- Accurate? Well, probably.
2209 result_row.accurate := browse_superpage_size IS NULL OR
2210 browse_superpage_size >= full_end;
2214 -- Authority-linked vis checking
2215 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
2217 full_end := ARRAY_LENGTH(all_arecords, 1);
2218 superpage_size := COALESCE(browse_superpage_size, full_end);
2220 slice_end := superpage_size;
2222 WHILE result_row.asources = 0 AND slice_start <= full_end LOOP
2223 superpage_of_records := all_arecords[slice_start:slice_end];
2225 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2226 '1::INT AS rel FROM (SELECT UNNEST(' ||
2227 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2229 -- We use search.query_parser_fts() for visibility testing.
2230 -- We're calling it once per browse-superpage worth of records
2231 -- out of the set of records related to a given mbe, via
2232 -- authority until we've either exhausted that set of records
2233 -- or found at least 1 visible record.
2235 SELECT INTO result_row.asources visible
2236 FROM search.query_parser_fts(
2237 context_org, NULL, qpfts_query, NULL,
2238 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2240 WHERE qpfts.rel IS NULL;
2242 slice_start := slice_start + superpage_size;
2243 slice_end := slice_end + superpage_size;
2247 -- Accurate? Well, probably.
2248 result_row.aaccurate := browse_superpage_size IS NULL OR
2249 browse_superpage_size >= full_end;
2253 IF result_row.sources > 0 OR result_row.asources > 0 THEN
2255 -- The function that calls this function needs row_number in order
2256 -- to correctly order results from two different runs of this
2258 result_row.row_number := row_number;
2260 -- Now, if row_counter is still less than limit, return a row. If
2261 -- not, but it is less than next_pivot_pos, continue on without
2262 -- returning actual result rows until we find
2263 -- that next pivot, and return it.
2265 IF row_counter < result_limit THEN
2266 result_row.browse_entry := rec.id;
2267 result_row.value := rec.value;
2269 RETURN NEXT result_row;
2271 result_row.browse_entry := NULL;
2272 result_row.authorities := NULL;
2273 result_row.fields := NULL;
2274 result_row.value := NULL;
2275 result_row.sources := NULL;
2276 result_row.sees := NULL;
2277 result_row.accurate := NULL;
2278 result_row.aaccurate := NULL;
2279 result_row.pivot_point := rec.id;
2281 IF row_counter >= next_pivot_pos THEN
2282 RETURN NEXT result_row;
2287 IF count_up_from_zero THEN
2288 row_number := row_number + 1;
2290 row_number := row_number - 1;
2293 -- row_counter is different from row_number.
2294 -- It simply counts up from zero so that we know when
2295 -- we've reached our limit.
2296 row_counter := row_counter + 1;
2300 $p$ LANGUAGE PLPGSQL;
2303 CREATE OR REPLACE FUNCTION metabib.browse(
2306 context_org INT DEFAULT NULL,
2307 context_loc_group INT DEFAULT NULL,
2308 staff BOOL DEFAULT FALSE,
2309 pivot_id BIGINT DEFAULT NULL,
2310 result_limit INT DEFAULT 10
2311 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2316 pivot_sort_value TEXT;
2317 pivot_sort_fallback TEXT;
2318 context_locations INT[];
2319 browse_superpage_size INT;
2320 results_skipped INT := 0;
2324 forward_to_pivot INT;
2326 -- First, find the pivot if we were given a browse term but not a pivot.
2327 IF pivot_id IS NULL THEN
2328 pivot_id := metabib.browse_pivot(search_field, browse_term);
2331 SELECT INTO pivot_sort_value, pivot_sort_fallback
2332 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
2334 -- Bail if we couldn't find a pivot.
2335 IF pivot_sort_value IS NULL THEN
2339 -- Transform the context_loc_group argument (if any) (logc at the
2340 -- TPAC layer) into a form we'll be able to use.
2341 IF context_loc_group IS NOT NULL THEN
2342 SELECT INTO context_locations ARRAY_AGG(location)
2343 FROM asset.copy_location_group_map
2344 WHERE lgroup = context_loc_group;
2347 -- Get the configured size of browse superpages.
2348 SELECT INTO browse_superpage_size value -- NULL ok
2349 FROM config.global_flag
2350 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
2352 -- First we're going to search backward from the pivot, then we're going
2353 -- to search forward. In each direction, we need two limits. At the
2354 -- lesser of the two limits, we delineate the edge of the result set
2355 -- we're going to return. At the greater of the two limits, we find the
2356 -- pivot value that would represent an offset from the current pivot
2357 -- at a distance of one "page" in either direction, where a "page" is a
2358 -- result set of the size specified in the "result_limit" argument.
2360 -- The two limits in each direction make four derived values in total,
2361 -- and we calculate them now.
2362 back_limit := CEIL(result_limit::FLOAT / 2);
2363 back_to_pivot := result_limit;
2364 forward_limit := result_limit / 2;
2365 forward_to_pivot := result_limit - 1;
2367 -- This is the meat of the SQL query that finds browse entries. We'll
2368 -- pass this to a function which uses it with a cursor, so that individual
2369 -- rows may be fetched in a loop until some condition is satisfied, without
2370 -- waiting for a result set of fixed size to be collected all at once.
2375 FROM metabib.browse_entry mbe
2377 EXISTS ( -- are there any bibs using this mbe via the requested fields?
2379 FROM metabib.browse_entry_def_map mbedm
2380 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
2382 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
2384 FROM metabib.browse_entry_simple_heading_map mbeshm
2385 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2386 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2387 ash.atag = map.authority_field
2388 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
2390 WHERE mbeshm.entry = mbe.id
2394 -- This is the variant of the query for browsing backward.
2395 back_query := core_query ||
2396 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
2397 ' ORDER BY mbe.sort_value DESC, mbe.value DESC ';
2399 -- This variant browses forward.
2400 forward_query := core_query ||
2401 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
2402 ' ORDER BY mbe.sort_value, mbe.value ';
2404 -- We now call the function which applies a cursor to the provided
2405 -- queries, stopping at the appropriate limits and also giving us
2406 -- the next page's pivot.
2408 SELECT * FROM metabib.staged_browse(
2409 back_query, search_field, context_org, context_locations,
2410 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
2412 SELECT * FROM metabib.staged_browse(
2413 forward_query, search_field, context_org, context_locations,
2414 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
2415 ) ORDER BY row_number DESC;
2418 $p$ LANGUAGE PLPGSQL;
2420 CREATE OR REPLACE FUNCTION metabib.browse(
2423 context_org INT DEFAULT NULL,
2424 context_loc_group INT DEFAULT NULL,
2425 staff BOOL DEFAULT FALSE,
2426 pivot_id BIGINT DEFAULT NULL,
2427 result_limit INT DEFAULT 10
2428 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2430 RETURN QUERY SELECT * FROM metabib.browse(
2431 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
2432 FROM config.metabib_field WHERE field_class = search_class),
2441 $p$ LANGUAGE PLPGSQL;