2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.display_entry (
187 id BIGSERIAL PRIMARY KEY,
188 source BIGINT NOT NULL,
193 CREATE INDEX metabib_display_entry_field_idx
194 ON metabib.display_entry (field);
195 CREATE INDEX metabib_display_entry_source_idx
196 ON metabib.display_entry (source);
198 CREATE TABLE metabib.browse_entry (
199 id BIGSERIAL PRIMARY KEY,
201 index_vector tsvector,
202 sort_value TEXT NOT NULL,
203 UNIQUE(sort_value, value)
207 CREATE INDEX browse_entry_sort_value_idx
208 ON metabib.browse_entry USING BTREE (sort_value);
210 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
211 CREATE TRIGGER metabib_browse_entry_fti_trigger
212 BEFORE INSERT OR UPDATE ON metabib.browse_entry
213 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
216 CREATE TABLE metabib.browse_entry_def_map (
217 id BIGSERIAL PRIMARY KEY,
218 entry BIGINT REFERENCES metabib.browse_entry (id),
219 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
220 source BIGINT REFERENCES biblio.record_entry (id),
221 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
223 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
224 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
225 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
227 CREATE TABLE metabib.browse_entry_simple_heading_map (
228 id BIGSERIAL PRIMARY KEY,
229 entry BIGINT REFERENCES metabib.browse_entry (id),
230 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
232 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
233 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
235 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
236 RETURNS TRIGGER AS $$
239 display_field_text TEXT;
241 display_field_text := NEW.value;
244 SELECT n.func AS func,
245 n.param_count AS param_count,
247 FROM config.index_normalizer n
248 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
249 WHERE m.field = NEW.field AND m.pos < 0
252 EXECUTE 'SELECT ' || normalizer.func || '(' ||
253 quote_literal( display_field_text ) ||
255 WHEN normalizer.param_count > 0
256 THEN ',' || REPLACE(REPLACE(BTRIM(
257 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
260 ')' INTO display_field_text;
264 NEW.value = display_field_text;
270 CREATE TRIGGER display_field_normalize_tgr
271 BEFORE UPDATE OR INSERT ON metabib.display_entry
272 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
274 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
275 RETURNS TRIGGER AS $$
277 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
282 CREATE TRIGGER display_field_force_nfc_tgr
283 BEFORE UPDATE OR INSERT ON metabib.display_entry
284 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
287 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
292 facet_text := NEW.value;
295 SELECT n.func AS func,
296 n.param_count AS param_count,
298 FROM config.index_normalizer n
299 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
300 WHERE m.field = NEW.field AND m.pos < 0
303 EXECUTE 'SELECT ' || normalizer.func || '(' ||
304 quote_literal( facet_text ) ||
306 WHEN normalizer.param_count > 0
307 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
314 NEW.value = facet_text;
320 CREATE TRIGGER facet_normalize_tgr
321 BEFORE UPDATE OR INSERT ON metabib.facet_entry
322 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
324 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
326 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
331 CREATE TRIGGER facet_force_nfc_tgr
332 BEFORE UPDATE OR INSERT ON metabib.facet_entry
333 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
335 -- DECREMENTING serial starts at -1
336 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
338 CREATE TABLE metabib.uncontrolled_record_attr_value (
339 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
340 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
343 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
345 CREATE VIEW metabib.record_attr_id_map AS
346 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
348 SELECT c.id, c.ctype AS attr, c.code AS value
349 FROM config.coded_value_map c
350 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
352 CREATE VIEW metabib.composite_attr_id_map AS
353 SELECT c.id, c.ctype AS attr, c.code AS value
354 FROM config.coded_value_map c
355 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
357 CREATE VIEW metabib.full_attr_id_map AS
358 SELECT id, attr, value FROM metabib.record_attr_id_map
360 SELECT id, attr, value FROM metabib.composite_attr_id_map;
363 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
364 $_SHARED{metabib_compile_composite_attr_cache} = {}
365 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
366 return exists $_SHARED{metabib_compile_composite_attr_cache};
367 $f$ LANGUAGE PLPERLU;
369 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
370 delete $_SHARED{metabib_compile_composite_attr_cache};
371 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
372 $f$ LANGUAGE PLPERLU;
374 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
375 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
378 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
380 PERFORM metabib.compile_composite_attr_cache_invalidate();
383 $f$ LANGUAGE PLPGSQL;
385 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
387 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
392 my $def = decode_json($json);
394 die("Composite attribute definition not supplied") unless $def;
396 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
398 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
399 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
406 if (ref $d eq 'HASH') { # node or AND
407 if (exists $d->{_attr}) { # it is a node
408 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
409 my $id = spi_exec_prepared(
410 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
414 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
415 return '!' . recurse($$d{_not});
416 } else { # an AND list
417 @list = map { recurse($$d{$_}) } sort keys %$d;
419 } elsif (ref $d eq 'ARRAY') {
421 @list = map { recurse($_) } @$d;
424 @list = grep { defined && $_ ne '' } @list;
426 return '(' . join($j,@list) . ')' if @list;
430 my $val = recurse($def) || undef;
431 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
434 $func$ IMMUTABLE LANGUAGE plperlu;
436 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
437 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
438 $func$ STRICT IMMUTABLE LANGUAGE SQL;
440 CREATE TABLE metabib.record_attr_vector_list (
441 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
442 vlist INT[] NOT NULL -- stores id from ccvm AND murav
444 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
446 /* This becomes a view, and we do sorters differently ...
447 CREATE TABLE metabib.record_attr (
448 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
449 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
451 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
452 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
453 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
457 CREATE TABLE metabib.record_sorter (
458 id BIGSERIAL PRIMARY KEY,
459 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
460 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
463 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
464 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
465 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
468 CREATE TYPE metabib.record_attr_type AS (
473 -- Back-compat view ... we're moving to an INTARRAY world
474 CREATE VIEW metabib.record_attr_flat AS
475 SELECT v.source AS id,
478 FROM metabib.record_attr_vector_list v
479 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
481 SELECT v.source AS id,
484 FROM metabib.record_attr_vector_list v
485 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
487 CREATE VIEW metabib.record_attr AS
488 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
489 FROM metabib.record_attr_flat
490 WHERE attr IS NOT NULL
493 -- Back-back-compat view ... we use to live in an HSTORE world
494 CREATE TYPE metabib.rec_desc_type AS (
512 CREATE VIEW metabib.rec_descriptor AS
515 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
516 FROM metabib.record_attr;
518 -- Use a sequence that matches previous version, for easier upgrading.
519 CREATE SEQUENCE metabib.full_rec_id_seq;
521 CREATE TABLE metabib.real_full_rec (
522 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
523 record BIGINT NOT NULL,
524 tag CHAR(3) NOT NULL,
529 index_vector tsvector NOT NULL
531 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
533 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
534 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
535 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
536 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
537 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
538 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
539 CREATE INDEX metabib_full_rec_isxn_caseless_idx
540 ON metabib.real_full_rec (LOWER(value))
541 WHERE tag IN ('020', '022', '024');
542 -- This next index might fully supplant the one above, but leaving both for now.
543 -- (they are not too large)
544 -- The reason we need this index is to ensure that the query parser always
545 -- prefers this index over the simpler tag/subfield index, as this greatly
546 -- increases Vandelay overlay speed for these identifiers, especially when
547 -- a record has many of these fields (around > 4-6 seems like the cutoff
548 -- on at least one PG9.1 system)
549 -- A similar index could be added for other fields (e.g. 010), but one should
550 -- leave out the LOWER() in all other cases.
551 -- TODO: verify whether we can discard the non tag/subfield/substring version
552 -- above (metabib_full_rec_isxn_caseless_idx)
553 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
554 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
555 WHERE tag IN ('020', '022', '024');
558 CREATE TRIGGER metabib_full_rec_fti_trigger
559 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
560 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
562 CREATE OR REPLACE VIEW metabib.full_rec AS
569 SUBSTRING(value,1,1024) AS value,
571 FROM metabib.real_full_rec;
573 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
574 AS ON INSERT TO metabib.full_rec
576 INSERT INTO metabib.real_full_rec VALUES (
577 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
587 CREATE OR REPLACE RULE metabib_full_rec_update_rule
588 AS ON UPDATE TO metabib.full_rec
590 UPDATE metabib.real_full_rec SET
596 subfield = NEW.subfield,
598 index_vector = NEW.index_vector
601 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
602 AS ON DELETE TO metabib.full_rec
604 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
606 CREATE TABLE metabib.metarecord_source_map (
607 id BIGSERIAL PRIMARY KEY,
608 metarecord BIGINT NOT NULL,
609 source BIGINT NOT NULL
611 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
612 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
614 CREATE TYPE metabib.field_entry_template AS (
627 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
632 ) RETURNS SETOF metabib.field_entry_template AS $func$
634 bib biblio.record_entry%ROWTYPE;
635 idx config.metabib_field%ROWTYPE;
636 xfrm config.xml_transform%ROWTYPE;
638 transformed_xml TEXT;
640 xml_node_list TEXT[];
647 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
649 authority_link BIGINT;
650 output_row metabib.field_entry_template%ROWTYPE;
654 -- Start out with no field-use bools set
655 output_row.browse_field = FALSE;
656 output_row.facet_field = FALSE;
657 output_row.display_field = FALSE;
658 output_row.search_field = FALSE;
661 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
663 -- Loop over the indexing entries
664 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
666 process_idx := FALSE;
667 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
668 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
669 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
670 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
671 CONTINUE WHEN process_idx = FALSE;
673 joiner := COALESCE(idx.joiner, default_joiner);
675 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
677 -- See if we can skip the XSLT ... it's expensive
678 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
679 -- Can't skip the transform
680 IF xfrm.xslt <> '---' THEN
681 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
683 transformed_xml := bib.marc;
686 prev_xfrm := xfrm.name;
689 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
692 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
693 CONTINUE WHEN xml_node !~ E'^\\s*<';
695 -- XXX much of this should be moved into oils_xpath_string...
696 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
697 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
698 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
699 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
703 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
705 IF raw_text IS NOT NULL THEN
706 raw_text := raw_text || joiner;
709 raw_text := COALESCE(raw_text,'') || curr_text;
711 -- autosuggest/metabib.browse_entry
712 IF idx.browse_field THEN
714 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
715 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
717 browse_text := curr_text;
720 IF idx.browse_sort_xpath IS NOT NULL AND
721 idx.browse_sort_xpath <> '' THEN
723 sort_value := oils_xpath_string(
724 idx.browse_sort_xpath, xml_node, joiner,
725 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
728 sort_value := browse_text;
731 output_row.field_class = idx.field_class;
732 output_row.field = idx.id;
733 output_row.source = rid;
734 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
735 output_row.sort_value :=
736 public.naco_normalize(sort_value);
738 output_row.authority := NULL;
740 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
741 authority_text := oils_xpath_string(
742 idx.authority_xpath, xml_node, joiner,
744 ARRAY[xfrm.prefix, xfrm.namespace_uri],
745 ARRAY['xlink','http://www.w3.org/1999/xlink']
749 IF authority_text ~ '^\d+$' THEN
750 authority_link := authority_text::BIGINT;
751 PERFORM * FROM authority.record_entry WHERE id = authority_link;
753 output_row.authority := authority_link;
759 output_row.browse_field = TRUE;
760 -- Returning browse rows with search_field = true for search+browse
761 -- configs allows us to retain granularity of being able to search
762 -- browse fields with "starts with" type operators (for example, for
763 -- titles of songs in music albums)
764 IF idx.search_field THEN
765 output_row.search_field = TRUE;
767 RETURN NEXT output_row;
768 output_row.browse_field = FALSE;
769 output_row.search_field = FALSE;
770 output_row.sort_value := NULL;
773 -- insert raw node text for faceting
774 IF idx.facet_field THEN
776 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
777 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
779 facet_text := curr_text;
782 output_row.field_class = idx.field_class;
783 output_row.field = -1 * idx.id;
784 output_row.source = rid;
785 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
787 output_row.facet_field = TRUE;
788 RETURN NEXT output_row;
789 output_row.facet_field = FALSE;
792 -- insert raw node text for display
793 IF idx.display_field THEN
795 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
796 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
798 display_text := curr_text;
801 output_row.field_class = idx.field_class;
802 output_row.field = -1 * idx.id;
803 output_row.source = rid;
804 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
806 output_row.display_field = TRUE;
807 RETURN NEXT output_row;
808 output_row.display_field = FALSE;
813 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
815 -- insert combined node text for searching
816 IF idx.search_field THEN
817 output_row.field_class = idx.field_class;
818 output_row.field = idx.id;
819 output_row.source = rid;
820 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
822 output_row.search_field = TRUE;
823 RETURN NEXT output_row;
824 output_row.search_field = FALSE;
831 $func$ LANGUAGE PLPGSQL;
833 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
835 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
836 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
837 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
838 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
839 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
840 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
841 FROM metabib.keyword_field_entry WHERE source = bib_id;
843 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
844 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
845 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
846 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
847 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
848 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
849 FROM metabib.title_field_entry WHERE source = bib_id;
851 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
852 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
853 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
854 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
855 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
856 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
857 FROM metabib.author_field_entry WHERE source = bib_id;
859 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
860 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
861 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
862 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
863 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
864 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
865 FROM metabib.subject_field_entry WHERE source = bib_id;
867 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
868 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
869 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
870 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
871 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
872 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
873 FROM metabib.series_field_entry WHERE source = bib_id;
875 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
876 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
877 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
878 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
879 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
880 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
881 FROM metabib.identifier_field_entry WHERE source = bib_id;
884 $func$ LANGUAGE PLPGSQL;
886 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
888 skip_facet BOOL DEFAULT FALSE,
889 skip_display BOOL DEFAULT FALSE,
890 skip_browse BOOL DEFAULT FALSE,
891 skip_search BOOL DEFAULT FALSE,
892 only_fields INT[] DEFAULT '{}'::INT[]
893 ) RETURNS VOID AS $func$
896 ind_data metabib.field_entry_template%ROWTYPE;
897 mbe_row metabib.browse_entry%ROWTYPE;
904 field_list INT[] := only_fields;
905 field_types TEXT[] := '{}'::TEXT[];
908 IF field_list = '{}'::INT[] THEN
909 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
912 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
913 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
914 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
915 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
917 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
918 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
919 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
920 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
922 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
924 IF NOT b_skip_search THEN
925 FOR fclass IN SELECT * FROM config.metabib_class LOOP
926 -- RAISE NOTICE 'Emptying out %', fclass.name;
927 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
930 IF NOT b_skip_facet THEN
931 DELETE FROM metabib.facet_entry WHERE source = bib_id;
933 IF NOT b_skip_display THEN
934 DELETE FROM metabib.display_entry WHERE source = bib_id;
936 IF NOT b_skip_browse THEN
937 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
941 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
943 -- don't store what has been normalized away
944 CONTINUE WHEN ind_data.value IS NULL;
946 IF ind_data.field < 0 THEN
947 ind_data.field = -1 * ind_data.field;
950 IF ind_data.facet_field AND NOT b_skip_facet THEN
951 INSERT INTO metabib.facet_entry (field, source, value)
952 VALUES (ind_data.field, ind_data.source, ind_data.value);
955 IF ind_data.display_field AND NOT b_skip_display THEN
956 INSERT INTO metabib.display_entry (field, source, value)
957 VALUES (ind_data.field, ind_data.source, ind_data.value);
961 IF ind_data.browse_field AND NOT b_skip_browse THEN
962 -- A caveat about this SELECT: this should take care of replacing
963 -- old mbe rows when data changes, but not if normalization (by
964 -- which I mean specifically the output of
965 -- evergreen.oils_tsearch2()) changes. It may or may not be
966 -- expensive to add a comparison of index_vector to index_vector
967 -- to the WHERE clause below.
969 CONTINUE WHEN ind_data.sort_value IS NULL;
971 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
972 SELECT INTO mbe_row * FROM metabib.browse_entry
973 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
976 mbe_id := mbe_row.id;
978 INSERT INTO metabib.browse_entry
979 ( value, sort_value ) VALUES
980 ( value_prepped, ind_data.sort_value );
982 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
985 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
986 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
989 IF ind_data.search_field AND NOT b_skip_search THEN
990 -- Avoid inserting duplicate rows
991 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
992 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
993 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
994 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
995 IF mbe_id IS NULL THEN
997 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
999 quote_literal(ind_data.field) || $$, $$ ||
1000 quote_literal(ind_data.source) || $$, $$ ||
1001 quote_literal(ind_data.value) ||
1008 IF NOT b_skip_search THEN
1009 PERFORM metabib.update_combined_index_vectors(bib_id);
1014 $func$ LANGUAGE PLPGSQL;
1016 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
1018 auth authority.record_entry%ROWTYPE;
1019 output authority.full_rec%ROWTYPE;
1022 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
1024 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
1025 output.record := rid;
1026 output.ind1 := field.ind1;
1027 output.ind2 := field.ind2;
1028 output.tag := field.tag;
1029 output.subfield := field.subfield;
1030 output.value := field.value;
1035 $func$ LANGUAGE PLPGSQL;
1037 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
1039 bib biblio.record_entry%ROWTYPE;
1040 output metabib.full_rec%ROWTYPE;
1043 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
1045 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
1046 output.record := rid;
1047 output.ind1 := field.ind1;
1048 output.ind2 := field.ind2;
1049 output.tag := field.tag;
1050 output.subfield := field.subfield;
1051 output.value := field.value;
1056 $func$ LANGUAGE PLPGSQL;
1058 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1059 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1060 $func$ LANGUAGE SQL;
1062 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1063 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1064 $func$ LANGUAGE SQL;
1066 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1067 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
1068 $func$ LANGUAGE SQL;
1070 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1071 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1072 $func$ LANGUAGE SQL;
1074 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1087 IF marc IS NULL OR marc = '' THEN
1091 -- First, the count of tags
1092 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1094 -- now go through a bunch of pain to get the record type
1095 IF best_type IS NOT NULL THEN
1096 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1098 IF ldr IS NOT NULL THEN
1099 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1100 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1103 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1104 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1106 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1108 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1110 IF type_map.code IS NOT NULL THEN
1111 IF best_type = type_map.code THEN
1112 qual := qual + qual / 2;
1115 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1116 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1117 IF ff_tag_data = best_lang THEN
1125 -- Now look for some quality metrics
1127 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1132 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1139 $func$ LANGUAGE PLPGSQL;
1141 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1143 idx config.biblio_fingerprint%ROWTYPE;
1144 xfrm config.xml_transform%ROWTYPE;
1146 transformed_xml TEXT;
1148 xml_node_list TEXT[];
1150 output_text TEXT := '';
1153 IF marc IS NULL OR marc = '' THEN
1157 -- Loop over the indexing entries
1158 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1160 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1162 -- See if we can skip the XSLT ... it's expensive
1163 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1164 -- Can't skip the transform
1165 IF xfrm.xslt <> '---' THEN
1166 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1168 transformed_xml := marc;
1171 prev_xfrm := xfrm.name;
1174 raw_text := COALESCE(
1182 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1191 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1192 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1194 IF idx.first_word IS TRUE THEN
1195 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1198 output_text := output_text || idx.name || ':' ||
1199 REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
1203 RETURN BTRIM(output_text);
1206 $func$ LANGUAGE PLPGSQL;
1208 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1209 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1212 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1214 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1218 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1219 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1224 $func$ LANGUAGE PLPGSQL;
1226 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1228 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1230 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1232 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1233 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1237 $func$ LANGUAGE PLPGSQL;
1239 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1246 uri_owner_list TEXT[];
1254 -- Clear any URI mappings and call numbers for this bib.
1255 -- This leads to acn / auricnm inflation, but also enables
1256 -- old acn/auricnm's to go away and for bibs to be deleted.
1257 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1258 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1259 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1262 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1263 IF ARRAY_UPPER(uris,1) > 0 THEN
1264 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1265 -- First we pull info out of the 856
1268 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1269 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1270 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1272 IF uri_label IS NULL THEN
1273 uri_label := uri_href;
1275 CONTINUE WHEN uri_href IS NULL;
1277 -- Get the distinct list of libraries wanting to use
1279 DISTINCT REGEXP_REPLACE(
1281 $re$^.*?\((\w+)\).*$$re$,
1284 ) INTO uri_owner_list
1287 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1292 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1294 -- look for a matching uri
1295 IF uri_use IS NULL THEN
1296 SELECT id INTO uri_id
1298 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1299 ORDER BY id LIMIT 1;
1300 IF NOT FOUND THEN -- create one
1301 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1302 SELECT id INTO uri_id
1304 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1307 SELECT id INTO uri_id
1309 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1310 ORDER BY id LIMIT 1;
1311 IF NOT FOUND THEN -- create one
1312 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1313 SELECT id INTO uri_id
1315 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1319 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1320 uri_owner := uri_owner_list[j];
1322 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1323 CONTINUE WHEN NOT FOUND;
1325 -- we need a call number to link through
1326 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1328 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1329 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1330 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1333 -- now, link them if they're not already
1334 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1336 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1348 $func$ LANGUAGE PLPGSQL;
1350 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1352 new_mapping BOOL := TRUE;
1355 tmp_mr metabib.metarecord%ROWTYPE;
1356 deleted_mrs BIGINT[];
1359 -- We need to make sure we're not a deleted master record of an MR
1360 IF bib_is_deleted THEN
1361 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1363 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1364 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1367 -- Now, are there any more sources on this MR?
1368 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1370 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1371 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1372 DELETE FROM metabib.metarecord WHERE id = old_mr;
1374 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1375 UPDATE metabib.metarecord
1377 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1382 ELSE -- insert or update
1384 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1386 -- Find the first fingerprint-matching
1387 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1388 old_mr := tmp_mr.id;
1389 new_mapping := FALSE;
1391 ELSE -- Our fingerprint changed ... maybe remove the old MR
1392 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1393 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1394 IF source_count = 0 THEN -- No other records
1395 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1396 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1402 -- we found no suitable, preexisting MR based on old source maps
1403 IF old_mr IS NULL THEN
1404 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1406 IF old_mr IS NULL THEN -- nope, create one and grab its id
1407 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1408 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1410 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1411 UPDATE metabib.metarecord
1413 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1417 ELSE -- there was one we already attached to, update its mods cache and master_record
1418 UPDATE metabib.metarecord
1420 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1425 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1430 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1431 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1437 $func$ LANGUAGE PLPGSQL;
1440 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1441 DELETE FROM authority.bib_linking WHERE bib = $1;
1442 INSERT INTO authority.bib_linking (bib, authority)
1445 FROM ( SELECT DISTINCT $1 AS bib,
1446 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1447 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1448 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1449 ) y JOIN authority.record_entry r ON r.id = y.authority;
1451 $func$ LANGUAGE SQL;
1453 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1455 transformed_xml TEXT;
1456 rmarc TEXT := prmarc;
1460 xfrm config.xml_transform%ROWTYPE;
1461 attr_vector INT[] := '{}'::INT[];
1462 attr_vector_tmp INT[];
1463 attr_list TEXT[] := pattr_list;
1465 norm_attr_value TEXT[];
1467 attr_def config.record_attr_definition%ROWTYPE;
1468 ccvm_row config.coded_value_map%ROWTYPE;
1471 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1472 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1475 fixed_field IS NOT NULL OR
1476 xpath IS NOT NULL OR
1477 phys_char_sf IS NOT NULL OR
1484 IF rmarc IS NULL THEN
1485 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1488 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1490 attr_value := '{}'::TEXT[];
1491 norm_attr_value := '{}'::TEXT[];
1492 attr_vector_tmp := '{}'::INT[];
1494 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1496 -- tag+sf attrs only support SVF
1497 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1498 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1499 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1501 AND tag LIKE attr_def.tag
1503 WHEN attr_def.sf_list IS NOT NULL
1504 THEN POSITION(subfield IN attr_def.sf_list) > 0
1511 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1512 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1514 IF NOT attr_def.multi THEN
1515 attr_value := ARRAY[attr_value[1]];
1518 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1520 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1522 -- See if we can skip the XSLT ... it's expensive
1523 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1524 -- Can't skip the transform
1525 IF xfrm.xslt <> '---' THEN
1526 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1528 transformed_xml := rmarc;
1531 prev_xfrm := xfrm.name;
1534 IF xfrm.name IS NULL THEN
1535 -- just grab the marcxml (empty) transform
1536 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1537 prev_xfrm := xfrm.name;
1540 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1541 tmp_val := oils_xpath_string(
1544 COALESCE(attr_def.joiner,' '),
1545 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1547 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1548 attr_value := attr_value || tmp_val;
1549 EXIT WHEN NOT attr_def.multi;
1553 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1554 SELECT ARRAY_AGG(m.value) INTO attr_value
1555 FROM vandelay.marc21_physical_characteristics(rmarc) v
1556 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1557 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1558 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1560 IF NOT attr_def.multi THEN
1561 attr_value := ARRAY[attr_value[1]];
1566 -- apply index normalizers to attr_value
1567 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1569 SELECT n.func AS func,
1570 n.param_count AS param_count,
1572 FROM config.index_normalizer n
1573 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1574 WHERE attr = attr_def.name
1576 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1577 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1579 WHEN normalizer.param_count > 0
1580 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1586 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1587 -- note that a string that contains only blanks
1588 -- is a valid value for some attributes
1589 norm_attr_value := norm_attr_value || tmp_val;
1593 IF attr_def.filter THEN
1594 -- Create unknown uncontrolled values and find the IDs of the values
1595 IF ccvm_row.id IS NULL THEN
1596 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1597 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1598 BEGIN -- use subtransaction to isolate unique constraint violations
1599 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1600 EXCEPTION WHEN unique_violation THEN END;
1604 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1606 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1609 -- Add the new value to the vector
1610 attr_vector := attr_vector || attr_vector_tmp;
1613 IF attr_def.sorter THEN
1614 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1615 IF norm_attr_value[1] IS NOT NULL THEN
1616 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1622 /* We may need to rewrite the vlist to contain
1623 the intersection of new values for requested
1624 attrs and old values for ignored attrs. To
1625 do this, we take the old attr vlist and
1626 subtract any values that are valid for the
1627 requested attrs, and then add back the new
1628 set of attr values. */
1630 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1631 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1632 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1633 attr_vector := attr_vector || attr_vector_tmp;
1636 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1637 -- attributes can depend on earlier ones.
1638 PERFORM metabib.compile_composite_attr_cache_init();
1639 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1641 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1643 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1644 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1646 IF attr_def.filter THEN
1647 IF attr_vector @@ tmp_val::query_int THEN
1648 attr_vector = attr_vector + intset(ccvm_row.id);
1649 EXIT WHEN NOT attr_def.multi;
1653 IF attr_def.sorter THEN
1654 IF attr_vector @@ tmp_val THEN
1655 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1656 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1664 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1665 IF rdeleted THEN -- initial insert OR revivication
1666 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1667 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1669 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1675 $func$ LANGUAGE PLPGSQL;
1678 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1679 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1684 IF NEW.deleted THEN -- If this bib is deleted
1686 PERFORM * FROM config.internal_flag WHERE
1687 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1689 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1691 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1693 IF NOT tmp_bool THEN
1694 -- One needs to keep these around to support searches
1695 -- with the #deleted modifier, so one should turn on the named
1696 -- internal flag for that functionality.
1697 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1700 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1701 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1702 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1703 RETURN NEW; -- and we're done
1706 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1707 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1709 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1714 -- Record authority linking
1715 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1717 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1720 -- Flatten and insert the mfr data
1721 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1723 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1725 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1726 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1728 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1732 -- Gather and insert the field entry data
1733 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1735 -- Located URI magic
1736 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1737 IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF;
1739 -- (re)map metarecord-bib linking
1740 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1741 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1743 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1745 ELSE -- we're doing an update, and we're not deleted, remap
1746 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1748 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1754 $func$ LANGUAGE PLPGSQL;
1756 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1762 SELECT n.func AS func,
1763 n.param_count AS param_count,
1765 FROM config.index_normalizer n
1766 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1767 WHERE m.field = mapped_field AND m.pos < 0
1770 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1771 quote_literal( facet_text ) ||
1773 WHEN normalizer.param_count > 0
1774 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1777 ')' INTO facet_text;
1784 $$ LANGUAGE PLPGSQL;
1787 -- This mimics a specific part of QueryParser, turning the first part of a
1788 -- classed search (search_class) into a set of classes and possibly fields.
1789 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1790 -- or "au" or "au|corporate|personal" or anything like that, where the first
1791 -- element of the list you get by separating on the "|" character is either
1792 -- a registered class (config.metabib_class) or an alias
1793 -- (config.metabib_search_alias), and the rest of any such elements are
1794 -- fields (config.metabib_field).
1796 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1797 RETURNS SETOF RECORD AS $func$
1799 search_parts TEXT[];
1801 search_part_count INTEGER;
1803 registered_class config.metabib_class%ROWTYPE;
1804 registered_alias config.metabib_search_alias%ROWTYPE;
1805 registered_field config.metabib_field%ROWTYPE;
1807 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1809 search_part_count := ARRAY_LENGTH(search_parts, 1);
1810 IF search_part_count = 0 THEN
1813 SELECT INTO registered_class
1814 * FROM config.metabib_class WHERE name = search_parts[1];
1816 IF search_part_count < 2 THEN -- all fields
1817 rec := (registered_class.name, NULL::INTEGER);
1821 FOR field_name IN SELECT *
1822 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1823 SELECT INTO registered_field
1824 * FROM config.metabib_field
1825 WHERE name = field_name AND
1826 field_class = registered_class.name;
1828 rec := (registered_class.name, registered_field.id);
1833 -- maybe we have an alias?
1834 SELECT INTO registered_alias
1835 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1839 IF search_part_count < 2 THEN -- return w/e the alias says
1841 registered_alias.field_class, registered_alias.field
1846 FOR field_name IN SELECT *
1847 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1848 SELECT INTO registered_field
1849 * FROM config.metabib_field
1850 WHERE name = field_name AND
1851 field_class = registered_alias.field_class;
1854 registered_alias.field_class,
1865 $func$ LANGUAGE PLPGSQL ROWS 1;
1868 -- Given a string such as a user might type into a search box, prepare
1869 -- two changed variants for TO_TSQUERY(). See
1870 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1871 -- The first variant is normalized to match indexed documents regardless
1872 -- of diacritics. The second variant keeps its diacritics for proper
1873 -- highlighting via TS_HEADLINE().
1875 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1878 orig_ended_in_space BOOLEAN;
1883 orig_ended_in_space := orig ~ E'\\s$';
1885 orig := ARRAY_TO_STRING(
1886 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1889 normalized := public.naco_normalize(orig); -- also trim()s
1890 plain := trim(orig);
1892 IF NOT orig_ended_in_space THEN
1893 plain := plain || ':*';
1894 normalized := normalized || ':*';
1897 plain := ARRAY_TO_STRING(
1898 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1900 normalized := ARRAY_TO_STRING(
1901 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1904 RETURN ARRAY[normalized, plain];
1906 $$ LANGUAGE PLPGSQL;
1908 -- Functions metabib.browse, metabib.staged_browse, and metabib.suggest_browse_entries
1909 -- will be created later, after internal dependencies are resolved.
1911 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1915 temp_vector TEXT := '';
1921 NEW.index_vector = ''::tsvector;
1923 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1925 SELECT n.func AS func,
1926 n.param_count AS param_count,
1928 FROM config.index_normalizer n
1929 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1930 WHERE field = NEW.field AND m.pos < 0
1932 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1933 quote_literal( value ) ||
1935 WHEN normalizer.param_count > 0
1936 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1946 SELECT n.func AS func,
1947 n.param_count AS param_count,
1949 FROM config.index_normalizer n
1950 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1951 WHERE field = NEW.field AND m.pos >= 0
1953 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1954 quote_literal( value ) ||
1956 WHEN normalizer.param_count > 0
1957 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1965 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
1967 value := ARRAY_TO_STRING(
1968 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
1970 value := public.search_normalize(value);
1971 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
1973 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1976 SELECT DISTINCT m.ts_config, m.index_weight
1977 FROM config.metabib_class_ts_map m
1978 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1979 LEFT JOIN config.coded_value_map ccvm ON (
1980 ccvm.ctype IN ('item_lang', 'language') AND
1981 ccvm.code = m.index_lang AND
1982 r.vlist @> intset(ccvm.id)
1984 WHERE m.field_class = TG_ARGV[0]
1986 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
1987 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1989 SELECT DISTINCT m.ts_config, m.index_weight
1990 FROM config.metabib_field_ts_map m
1991 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1992 LEFT JOIN config.coded_value_map ccvm ON (
1993 ccvm.ctype IN ('item_lang', 'language') AND
1994 ccvm.code = m.index_lang AND
1995 r.vlist @> intset(ccvm.id)
1997 WHERE m.metabib_field = NEW.field
1999 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2000 ORDER BY index_weight ASC
2004 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2005 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2009 cur_weight = ts_rec.index_weight;
2010 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2013 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2015 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2020 $$ LANGUAGE PLPGSQL;
2023 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2024 browse_entry BIGINT,
2029 sources INT, -- visible ones, that is
2030 asources INT, -- visible ones, that is
2031 row_number INT, -- internal use, sort of
2032 accurate BOOL, -- Count in sources field is accurate? Not
2033 -- if we had more than a browse superpage
2034 -- of records to look at.
2035 aaccurate BOOL, -- See previous comment...
2040 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2043 ) RETURNS BIGINT AS $p$
2045 FROM metabib.browse_entry mbe
2046 JOIN metabib.browse_entry_def_map mbedm ON (
2047 mbedm.entry = mbe.id
2048 AND mbedm.def = ANY($1)
2050 WHERE mbe.sort_value >= public.naco_normalize($2)
2051 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2052 $p$ LANGUAGE SQL STABLE;
2054 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2057 ) RETURNS BIGINT AS $p$
2059 FROM metabib.browse_entry mbe
2060 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2061 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2062 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2063 ash.atag = map.authority_field
2064 AND map.metabib_field = ANY($1)
2066 WHERE mbe.sort_value >= public.naco_normalize($2)
2067 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2068 $p$ LANGUAGE SQL STABLE;
2070 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2073 ) RETURNS BIGINT AS $p$
2075 FROM metabib.browse_entry mbe
2076 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2077 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2078 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2079 ash.atag = map.authority_field
2080 AND map.metabib_field = ANY($1)
2082 WHERE mbe.sort_value >= public.naco_normalize($2)
2083 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2084 $p$ LANGUAGE SQL STABLE;
2086 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2089 ) RETURNS BIGINT AS $p$
2090 SELECT id FROM metabib.browse_entry
2092 metabib.browse_bib_pivot($1, $2),
2093 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2095 ORDER BY sort_value, value LIMIT 1;
2096 $p$ LANGUAGE SQL STABLE;
2099 -- This function is used to help clean up facet labels. Due to quirks in
2100 -- MARC parsing, some facet labels may be generated with periods or commas
2101 -- at the end. This will strip a trailing commas off all the time, and
2102 -- periods when they don't look like they are part of initials.
2103 -- Smith, John => no change
2104 -- Smith, John, => Smith, John
2105 -- Smith, John. => Smith, John
2106 -- Public, John Q. => no change
2107 CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$
2113 last_char = substring(result from '.$');
2115 IF last_char = ',' THEN
2116 result := substring(result from '^(.*),$');
2118 ELSIF last_char = '.' THEN
2119 IF substring(result from ' \w\.$') IS NULL THEN
2120 result := substring(result from '^(.*)\.$');
2127 $$ language 'plpgsql';