2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.display_entry (
187 id BIGSERIAL PRIMARY KEY,
188 source BIGINT NOT NULL,
193 CREATE INDEX metabib_display_entry_field_idx
194 ON metabib.display_entry (field);
195 CREATE INDEX metabib_display_entry_value_idx
196 ON metabib.display_entry (SUBSTRING(value,1,1024));
197 CREATE INDEX metabib_display_entry_source_idx
198 ON metabib.display_entry (source);
200 CREATE TABLE metabib.browse_entry (
201 id BIGSERIAL PRIMARY KEY,
203 index_vector tsvector,
204 sort_value TEXT NOT NULL,
205 UNIQUE(sort_value, value)
209 CREATE INDEX browse_entry_sort_value_idx
210 ON metabib.browse_entry USING BTREE (sort_value);
212 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
213 CREATE TRIGGER metabib_browse_entry_fti_trigger
214 BEFORE INSERT OR UPDATE ON metabib.browse_entry
215 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
218 CREATE TABLE metabib.browse_entry_def_map (
219 id BIGSERIAL PRIMARY KEY,
220 entry BIGINT REFERENCES metabib.browse_entry (id),
221 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
222 source BIGINT REFERENCES biblio.record_entry (id),
223 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
225 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
226 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
227 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
229 CREATE TABLE metabib.browse_entry_simple_heading_map (
230 id BIGSERIAL PRIMARY KEY,
231 entry BIGINT REFERENCES metabib.browse_entry (id),
232 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
234 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
235 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
237 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
238 RETURNS TRIGGER AS $$
241 display_field_text TEXT;
243 display_field_text := NEW.value;
246 SELECT n.func AS func,
247 n.param_count AS param_count,
249 FROM config.index_normalizer n
250 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
251 WHERE m.field = NEW.field AND m.pos < 0
254 EXECUTE 'SELECT ' || normalizer.func || '(' ||
255 quote_literal( display_field_text ) ||
257 WHEN normalizer.param_count > 0
258 THEN ',' || REPLACE(REPLACE(BTRIM(
259 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
262 ')' INTO display_field_text;
266 NEW.value = display_field_text;
272 CREATE TRIGGER display_field_normalize_tgr
273 BEFORE UPDATE OR INSERT ON metabib.display_entry
274 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
276 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
277 RETURNS TRIGGER AS $$
279 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
284 CREATE TRIGGER display_field_force_nfc_tgr
285 BEFORE UPDATE OR INSERT ON metabib.display_entry
286 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
289 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
294 facet_text := NEW.value;
297 SELECT n.func AS func,
298 n.param_count AS param_count,
300 FROM config.index_normalizer n
301 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
302 WHERE m.field = NEW.field AND m.pos < 0
305 EXECUTE 'SELECT ' || normalizer.func || '(' ||
306 quote_literal( facet_text ) ||
308 WHEN normalizer.param_count > 0
309 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
316 NEW.value = facet_text;
322 CREATE TRIGGER facet_normalize_tgr
323 BEFORE UPDATE OR INSERT ON metabib.facet_entry
324 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
326 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
328 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
333 CREATE TRIGGER facet_force_nfc_tgr
334 BEFORE UPDATE OR INSERT ON metabib.facet_entry
335 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
337 -- DECREMENTING serial starts at -1
338 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
340 CREATE TABLE metabib.uncontrolled_record_attr_value (
341 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
342 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
345 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
347 CREATE VIEW metabib.record_attr_id_map AS
348 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
350 SELECT c.id, c.ctype AS attr, c.code AS value
351 FROM config.coded_value_map c
352 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
354 CREATE VIEW metabib.composite_attr_id_map AS
355 SELECT c.id, c.ctype AS attr, c.code AS value
356 FROM config.coded_value_map c
357 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
359 CREATE VIEW metabib.full_attr_id_map AS
360 SELECT id, attr, value FROM metabib.record_attr_id_map
362 SELECT id, attr, value FROM metabib.composite_attr_id_map;
365 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
366 $_SHARED{metabib_compile_composite_attr_cache} = {}
367 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
368 return exists $_SHARED{metabib_compile_composite_attr_cache};
369 $f$ LANGUAGE PLPERLU;
371 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
372 delete $_SHARED{metabib_compile_composite_attr_cache};
373 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
374 $f$ LANGUAGE PLPERLU;
376 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
377 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
380 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
382 PERFORM metabib.compile_composite_attr_cache_invalidate();
385 $f$ LANGUAGE PLPGSQL;
387 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
389 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
394 my $def = decode_json($json);
396 die("Composite attribute definition not supplied") unless $def;
398 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
400 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
401 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
408 if (ref $d eq 'HASH') { # node or AND
409 if (exists $d->{_attr}) { # it is a node
410 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
411 my $id = spi_exec_prepared(
412 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
416 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
417 return '!' . recurse($$d{_not});
418 } else { # an AND list
419 @list = map { recurse($$d{$_}) } sort keys %$d;
421 } elsif (ref $d eq 'ARRAY') {
423 @list = map { recurse($_) } @$d;
426 @list = grep { defined && $_ ne '' } @list;
428 return '(' . join($j,@list) . ')' if @list;
432 my $val = recurse($def) || undef;
433 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
436 $func$ IMMUTABLE LANGUAGE plperlu;
438 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
439 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
440 $func$ STRICT IMMUTABLE LANGUAGE SQL;
442 CREATE TABLE metabib.record_attr_vector_list (
443 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
444 vlist INT[] NOT NULL -- stores id from ccvm AND murav
446 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
448 /* This becomes a view, and we do sorters differently ...
449 CREATE TABLE metabib.record_attr (
450 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
451 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
453 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
454 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
455 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
459 CREATE TABLE metabib.record_sorter (
460 id BIGSERIAL PRIMARY KEY,
461 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
462 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
465 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
466 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
467 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
470 CREATE TYPE metabib.record_attr_type AS (
475 -- Back-compat view ... we're moving to an INTARRAY world
476 CREATE VIEW metabib.record_attr_flat AS
477 SELECT v.source AS id,
480 FROM metabib.record_attr_vector_list v
481 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
483 SELECT v.source AS id,
486 FROM metabib.record_attr_vector_list v
487 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
489 CREATE VIEW metabib.record_attr AS
490 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
491 FROM metabib.record_attr_flat
492 WHERE attr IS NOT NULL
495 -- Back-back-compat view ... we use to live in an HSTORE world
496 CREATE TYPE metabib.rec_desc_type AS (
514 CREATE VIEW metabib.rec_descriptor AS
517 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
518 FROM metabib.record_attr;
520 -- Use a sequence that matches previous version, for easier upgrading.
521 CREATE SEQUENCE metabib.full_rec_id_seq;
523 CREATE TABLE metabib.real_full_rec (
524 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
525 record BIGINT NOT NULL,
526 tag CHAR(3) NOT NULL,
531 index_vector tsvector NOT NULL
533 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
535 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
536 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
537 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
538 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
539 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
540 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
541 CREATE INDEX metabib_full_rec_isxn_caseless_idx
542 ON metabib.real_full_rec (LOWER(value))
543 WHERE tag IN ('020', '022', '024');
544 -- This next index might fully supplant the one above, but leaving both for now.
545 -- (they are not too large)
546 -- The reason we need this index is to ensure that the query parser always
547 -- prefers this index over the simpler tag/subfield index, as this greatly
548 -- increases Vandelay overlay speed for these identifiers, especially when
549 -- a record has many of these fields (around > 4-6 seems like the cutoff
550 -- on at least one PG9.1 system)
551 -- A similar index could be added for other fields (e.g. 010), but one should
552 -- leave out the LOWER() in all other cases.
553 -- TODO: verify whether we can discard the non tag/subfield/substring version
554 -- above (metabib_full_rec_isxn_caseless_idx)
555 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
556 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
557 WHERE tag IN ('020', '022', '024');
560 CREATE TRIGGER metabib_full_rec_fti_trigger
561 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
562 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
564 CREATE OR REPLACE VIEW metabib.full_rec AS
571 SUBSTRING(value,1,1024) AS value,
573 FROM metabib.real_full_rec;
575 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
576 AS ON INSERT TO metabib.full_rec
578 INSERT INTO metabib.real_full_rec VALUES (
579 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
589 CREATE OR REPLACE RULE metabib_full_rec_update_rule
590 AS ON UPDATE TO metabib.full_rec
592 UPDATE metabib.real_full_rec SET
598 subfield = NEW.subfield,
600 index_vector = NEW.index_vector
603 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
604 AS ON DELETE TO metabib.full_rec
606 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
608 CREATE TABLE metabib.metarecord_source_map (
609 id BIGSERIAL PRIMARY KEY,
610 metarecord BIGINT NOT NULL,
611 source BIGINT NOT NULL
613 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
614 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
616 CREATE TYPE metabib.field_entry_template AS (
629 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
631 bib biblio.record_entry%ROWTYPE;
632 idx config.metabib_field%ROWTYPE;
633 xfrm config.xml_transform%ROWTYPE;
635 transformed_xml TEXT;
637 xml_node_list TEXT[];
644 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
646 authority_link BIGINT;
647 output_row metabib.field_entry_template%ROWTYPE;
650 -- Start out with no field-use bools set
651 output_row.browse_field = FALSE;
652 output_row.facet_field = FALSE;
653 output_row.display_field = FALSE;
654 output_row.search_field = FALSE;
657 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
659 -- Loop over the indexing entries
660 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
662 joiner := COALESCE(idx.joiner, default_joiner);
664 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
666 -- See if we can skip the XSLT ... it's expensive
667 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
668 -- Can't skip the transform
669 IF xfrm.xslt <> '---' THEN
670 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
672 transformed_xml := bib.marc;
675 prev_xfrm := xfrm.name;
678 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
681 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
682 CONTINUE WHEN xml_node !~ E'^\\s*<';
684 -- XXX much of this should be moved into oils_xpath_string...
685 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
686 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
687 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
688 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
692 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
694 IF raw_text IS NOT NULL THEN
695 raw_text := raw_text || joiner;
698 raw_text := COALESCE(raw_text,'') || curr_text;
700 -- autosuggest/metabib.browse_entry
701 IF idx.browse_field THEN
703 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
704 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
706 browse_text := curr_text;
709 IF idx.browse_sort_xpath IS NOT NULL AND
710 idx.browse_sort_xpath <> '' THEN
712 sort_value := oils_xpath_string(
713 idx.browse_sort_xpath, xml_node, joiner,
714 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
717 sort_value := browse_text;
720 output_row.field_class = idx.field_class;
721 output_row.field = idx.id;
722 output_row.source = rid;
723 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
724 output_row.sort_value :=
725 public.naco_normalize(sort_value);
727 output_row.authority := NULL;
729 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
730 authority_text := oils_xpath_string(
731 idx.authority_xpath, xml_node, joiner,
733 ARRAY[xfrm.prefix, xfrm.namespace_uri],
734 ARRAY['xlink','http://www.w3.org/1999/xlink']
738 IF authority_text ~ '^\d+$' THEN
739 authority_link := authority_text::BIGINT;
740 PERFORM * FROM authority.record_entry WHERE id = authority_link;
742 output_row.authority := authority_link;
748 output_row.browse_field = TRUE;
749 -- Returning browse rows with search_field = true for search+browse
750 -- configs allows us to retain granularity of being able to search
751 -- browse fields with "starts with" type operators (for example, for
752 -- titles of songs in music albums)
753 IF idx.search_field THEN
754 output_row.search_field = TRUE;
756 RETURN NEXT output_row;
757 output_row.browse_field = FALSE;
758 output_row.search_field = FALSE;
759 output_row.sort_value := NULL;
762 -- insert raw node text for faceting
763 IF idx.facet_field THEN
765 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
766 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
768 facet_text := curr_text;
771 output_row.field_class = idx.field_class;
772 output_row.field = -1 * idx.id;
773 output_row.source = rid;
774 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
776 output_row.facet_field = TRUE;
777 RETURN NEXT output_row;
778 output_row.facet_field = FALSE;
783 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
785 -- insert combined node text for searching
786 IF idx.search_field THEN
787 output_row.field_class = idx.field_class;
788 output_row.field = idx.id;
789 output_row.source = rid;
790 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
792 output_row.search_field = TRUE;
793 RETURN NEXT output_row;
794 output_row.search_field = FALSE;
797 IF idx.display_field THEN
798 output_row.field_class = idx.field_class;
799 output_row.field = idx.id;
800 output_row.source = rid;
801 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
803 output_row.display_field = TRUE;
804 RETURN NEXT output_row;
805 output_row.display_field = FALSE;
812 $func$ LANGUAGE PLPGSQL;
814 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
816 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
817 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
818 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
819 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
820 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
821 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
822 FROM metabib.keyword_field_entry WHERE source = bib_id;
824 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
825 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
826 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
827 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
828 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
829 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
830 FROM metabib.title_field_entry WHERE source = bib_id;
832 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
833 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
834 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
835 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
836 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
837 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
838 FROM metabib.author_field_entry WHERE source = bib_id;
840 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
841 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
842 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
843 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
844 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
845 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
846 FROM metabib.subject_field_entry WHERE source = bib_id;
848 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
849 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
850 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
851 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
852 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
853 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
854 FROM metabib.series_field_entry WHERE source = bib_id;
856 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
857 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
858 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
859 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
860 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
861 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
862 FROM metabib.identifier_field_entry WHERE source = bib_id;
865 $func$ LANGUAGE PLPGSQL;
867 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
868 bib_id BIGINT, skip_facet BOOL DEFAULT FALSE,
869 skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE,
870 skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
873 ind_data metabib.field_entry_template%ROWTYPE;
874 mbe_row metabib.browse_entry%ROWTYPE;
883 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
884 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
885 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
886 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
888 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
890 IF NOT b_skip_search THEN
891 FOR fclass IN SELECT * FROM config.metabib_class LOOP
892 -- RAISE NOTICE 'Emptying out %', fclass.name;
893 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
896 IF NOT b_skip_facet THEN
897 DELETE FROM metabib.facet_entry WHERE source = bib_id;
899 IF NOT b_skip_display THEN
900 DELETE FROM metabib.display_entry WHERE source = bib_id;
902 IF NOT b_skip_browse THEN
903 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
907 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
909 -- don't store what has been normalized away
910 CONTINUE WHEN ind_data.value IS NULL;
912 IF ind_data.field < 0 THEN
913 ind_data.field = -1 * ind_data.field;
916 IF ind_data.facet_field AND NOT b_skip_facet THEN
917 INSERT INTO metabib.facet_entry (field, source, value)
918 VALUES (ind_data.field, ind_data.source, ind_data.value);
921 IF ind_data.display_field AND NOT b_skip_display THEN
922 INSERT INTO metabib.display_entry (field, source, value)
923 VALUES (ind_data.field, ind_data.source, ind_data.value);
927 IF ind_data.browse_field AND NOT b_skip_browse THEN
928 -- A caveat about this SELECT: this should take care of replacing
929 -- old mbe rows when data changes, but not if normalization (by
930 -- which I mean specifically the output of
931 -- evergreen.oils_tsearch2()) changes. It may or may not be
932 -- expensive to add a comparison of index_vector to index_vector
933 -- to the WHERE clause below.
935 CONTINUE WHEN ind_data.sort_value IS NULL;
937 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
938 SELECT INTO mbe_row * FROM metabib.browse_entry
939 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
942 mbe_id := mbe_row.id;
944 INSERT INTO metabib.browse_entry
945 ( value, sort_value ) VALUES
946 ( value_prepped, ind_data.sort_value );
948 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
951 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
952 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
955 IF ind_data.search_field AND NOT b_skip_search THEN
956 -- Avoid inserting duplicate rows
957 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
958 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
959 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
960 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
961 IF mbe_id IS NULL THEN
963 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
965 quote_literal(ind_data.field) || $$, $$ ||
966 quote_literal(ind_data.source) || $$, $$ ||
967 quote_literal(ind_data.value) ||
974 IF NOT b_skip_search THEN
975 PERFORM metabib.update_combined_index_vectors(bib_id);
980 $func$ LANGUAGE PLPGSQL;
982 -- default to a space joiner
983 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
984 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
987 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
989 auth authority.record_entry%ROWTYPE;
990 output authority.full_rec%ROWTYPE;
993 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
995 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
996 output.record := rid;
997 output.ind1 := field.ind1;
998 output.ind2 := field.ind2;
999 output.tag := field.tag;
1000 output.subfield := field.subfield;
1001 output.value := field.value;
1006 $func$ LANGUAGE PLPGSQL;
1008 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
1010 bib biblio.record_entry%ROWTYPE;
1011 output metabib.full_rec%ROWTYPE;
1014 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
1016 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
1017 output.record := rid;
1018 output.ind1 := field.ind1;
1019 output.ind2 := field.ind2;
1020 output.tag := field.tag;
1021 output.subfield := field.subfield;
1022 output.value := field.value;
1027 $func$ LANGUAGE PLPGSQL;
1029 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1030 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1031 $func$ LANGUAGE SQL;
1033 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1034 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1035 $func$ LANGUAGE SQL;
1037 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1038 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
1039 $func$ LANGUAGE SQL;
1041 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1042 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1043 $func$ LANGUAGE SQL;
1045 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1058 IF marc IS NULL OR marc = '' THEN
1062 -- First, the count of tags
1063 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1065 -- now go through a bunch of pain to get the record type
1066 IF best_type IS NOT NULL THEN
1067 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1069 IF ldr IS NOT NULL THEN
1070 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1071 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1074 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1075 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1077 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1079 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1081 IF type_map.code IS NOT NULL THEN
1082 IF best_type = type_map.code THEN
1083 qual := qual + qual / 2;
1086 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1087 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1088 IF ff_tag_data = best_lang THEN
1096 -- Now look for some quality metrics
1098 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1103 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1110 $func$ LANGUAGE PLPGSQL;
1112 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1114 idx config.biblio_fingerprint%ROWTYPE;
1115 xfrm config.xml_transform%ROWTYPE;
1117 transformed_xml TEXT;
1119 xml_node_list TEXT[];
1121 output_text TEXT := '';
1124 IF marc IS NULL OR marc = '' THEN
1128 -- Loop over the indexing entries
1129 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1131 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1133 -- See if we can skip the XSLT ... it's expensive
1134 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1135 -- Can't skip the transform
1136 IF xfrm.xslt <> '---' THEN
1137 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1139 transformed_xml := marc;
1142 prev_xfrm := xfrm.name;
1145 raw_text := COALESCE(
1153 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1162 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1163 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1165 IF idx.first_word IS TRUE THEN
1166 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1169 output_text := output_text || idx.name || ':' ||
1170 REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
1174 RETURN BTRIM(output_text);
1177 $func$ LANGUAGE PLPGSQL;
1179 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1180 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1183 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1185 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1189 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1190 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1195 $func$ LANGUAGE PLPGSQL;
1197 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1199 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1201 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1203 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1204 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1208 $func$ LANGUAGE PLPGSQL;
1210 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1217 uri_owner_list TEXT[];
1225 -- Clear any URI mappings and call numbers for this bib.
1226 -- This leads to acn / auricnm inflation, but also enables
1227 -- old acn/auricnm's to go away and for bibs to be deleted.
1228 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1229 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1230 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1233 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1234 IF ARRAY_UPPER(uris,1) > 0 THEN
1235 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1236 -- First we pull info out of the 856
1239 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1240 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1241 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1243 IF uri_label IS NULL THEN
1244 uri_label := uri_href;
1246 CONTINUE WHEN uri_href IS NULL;
1248 -- Get the distinct list of libraries wanting to use
1250 DISTINCT REGEXP_REPLACE(
1252 $re$^.*?\((\w+)\).*$$re$,
1255 ) INTO uri_owner_list
1258 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1263 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1265 -- look for a matching uri
1266 IF uri_use IS NULL THEN
1267 SELECT id INTO uri_id
1269 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1270 ORDER BY id LIMIT 1;
1271 IF NOT FOUND THEN -- create one
1272 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1273 SELECT id INTO uri_id
1275 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1278 SELECT id INTO uri_id
1280 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1281 ORDER BY id LIMIT 1;
1282 IF NOT FOUND THEN -- create one
1283 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1284 SELECT id INTO uri_id
1286 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1290 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1291 uri_owner := uri_owner_list[j];
1293 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1294 CONTINUE WHEN NOT FOUND;
1296 -- we need a call number to link through
1297 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1299 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1300 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1301 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1304 -- now, link them if they're not already
1305 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1307 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1319 $func$ LANGUAGE PLPGSQL;
1321 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1323 new_mapping BOOL := TRUE;
1326 tmp_mr metabib.metarecord%ROWTYPE;
1327 deleted_mrs BIGINT[];
1330 -- We need to make sure we're not a deleted master record of an MR
1331 IF bib_is_deleted THEN
1332 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1334 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1335 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1338 -- Now, are there any more sources on this MR?
1339 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1341 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1342 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1343 DELETE FROM metabib.metarecord WHERE id = old_mr;
1345 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1346 UPDATE metabib.metarecord
1348 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1353 ELSE -- insert or update
1355 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1357 -- Find the first fingerprint-matching
1358 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1359 old_mr := tmp_mr.id;
1360 new_mapping := FALSE;
1362 ELSE -- Our fingerprint changed ... maybe remove the old MR
1363 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1364 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1365 IF source_count = 0 THEN -- No other records
1366 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1367 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1373 -- we found no suitable, preexisting MR based on old source maps
1374 IF old_mr IS NULL THEN
1375 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1377 IF old_mr IS NULL THEN -- nope, create one and grab its id
1378 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1379 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1381 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1382 UPDATE metabib.metarecord
1384 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1388 ELSE -- there was one we already attached to, update its mods cache and master_record
1389 UPDATE metabib.metarecord
1391 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1396 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1401 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1402 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1408 $func$ LANGUAGE PLPGSQL;
1411 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1412 DELETE FROM authority.bib_linking WHERE bib = $1;
1413 INSERT INTO authority.bib_linking (bib, authority)
1416 FROM ( SELECT DISTINCT $1 AS bib,
1417 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1418 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1419 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1420 ) y JOIN authority.record_entry r ON r.id = y.authority;
1422 $func$ LANGUAGE SQL;
1424 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1426 transformed_xml TEXT;
1427 rmarc TEXT := prmarc;
1431 xfrm config.xml_transform%ROWTYPE;
1432 attr_vector INT[] := '{}'::INT[];
1433 attr_vector_tmp INT[];
1434 attr_list TEXT[] := pattr_list;
1436 norm_attr_value TEXT[];
1438 attr_def config.record_attr_definition%ROWTYPE;
1439 ccvm_row config.coded_value_map%ROWTYPE;
1442 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1443 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1446 fixed_field IS NOT NULL OR
1447 xpath IS NOT NULL OR
1448 phys_char_sf IS NOT NULL OR
1455 IF rmarc IS NULL THEN
1456 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1459 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1461 attr_value := '{}'::TEXT[];
1462 norm_attr_value := '{}'::TEXT[];
1463 attr_vector_tmp := '{}'::INT[];
1465 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1467 -- tag+sf attrs only support SVF
1468 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1469 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1470 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1472 AND tag LIKE attr_def.tag
1474 WHEN attr_def.sf_list IS NOT NULL
1475 THEN POSITION(subfield IN attr_def.sf_list) > 0
1482 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1483 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1485 IF NOT attr_def.multi THEN
1486 attr_value := ARRAY[attr_value[1]];
1489 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1491 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1493 -- See if we can skip the XSLT ... it's expensive
1494 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1495 -- Can't skip the transform
1496 IF xfrm.xslt <> '---' THEN
1497 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1499 transformed_xml := rmarc;
1502 prev_xfrm := xfrm.name;
1505 IF xfrm.name IS NULL THEN
1506 -- just grab the marcxml (empty) transform
1507 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1508 prev_xfrm := xfrm.name;
1511 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1512 tmp_val := oils_xpath_string(
1515 COALESCE(attr_def.joiner,' '),
1516 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1518 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1519 attr_value := attr_value || tmp_val;
1520 EXIT WHEN NOT attr_def.multi;
1524 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1525 SELECT ARRAY_AGG(m.value) INTO attr_value
1526 FROM vandelay.marc21_physical_characteristics(rmarc) v
1527 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1528 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1529 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1531 IF NOT attr_def.multi THEN
1532 attr_value := ARRAY[attr_value[1]];
1537 -- apply index normalizers to attr_value
1538 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1540 SELECT n.func AS func,
1541 n.param_count AS param_count,
1543 FROM config.index_normalizer n
1544 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1545 WHERE attr = attr_def.name
1547 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1548 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1550 WHEN normalizer.param_count > 0
1551 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1557 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1558 -- note that a string that contains only blanks
1559 -- is a valid value for some attributes
1560 norm_attr_value := norm_attr_value || tmp_val;
1564 IF attr_def.filter THEN
1565 -- Create unknown uncontrolled values and find the IDs of the values
1566 IF ccvm_row.id IS NULL THEN
1567 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1568 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1569 BEGIN -- use subtransaction to isolate unique constraint violations
1570 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1571 EXCEPTION WHEN unique_violation THEN END;
1575 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1577 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1580 -- Add the new value to the vector
1581 attr_vector := attr_vector || attr_vector_tmp;
1584 IF attr_def.sorter THEN
1585 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1586 IF norm_attr_value[1] IS NOT NULL THEN
1587 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1593 /* We may need to rewrite the vlist to contain
1594 the intersection of new values for requested
1595 attrs and old values for ignored attrs. To
1596 do this, we take the old attr vlist and
1597 subtract any values that are valid for the
1598 requested attrs, and then add back the new
1599 set of attr values. */
1601 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1602 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1603 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1604 attr_vector := attr_vector || attr_vector_tmp;
1607 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1608 -- attributes can depend on earlier ones.
1609 PERFORM metabib.compile_composite_attr_cache_init();
1610 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1612 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1614 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1615 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1617 IF attr_def.filter THEN
1618 IF attr_vector @@ tmp_val::query_int THEN
1619 attr_vector = attr_vector + intset(ccvm_row.id);
1620 EXIT WHEN NOT attr_def.multi;
1624 IF attr_def.sorter THEN
1625 IF attr_vector @@ tmp_val THEN
1626 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1627 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1635 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1636 IF rdeleted THEN -- initial insert OR revivication
1637 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1638 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1640 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1646 $func$ LANGUAGE PLPGSQL;
1649 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1650 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1655 IF NEW.deleted THEN -- If this bib is deleted
1657 PERFORM * FROM config.internal_flag WHERE
1658 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1660 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1662 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1664 IF NOT tmp_bool THEN
1665 -- One needs to keep these around to support searches
1666 -- with the #deleted modifier, so one should turn on the named
1667 -- internal flag for that functionality.
1668 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1671 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1672 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1673 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1674 RETURN NEW; -- and we're done
1677 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1678 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1680 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1685 -- Record authority linking
1686 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1688 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1691 -- Flatten and insert the mfr data
1692 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1694 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1696 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1697 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1699 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1703 -- Gather and insert the field entry data
1704 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1706 -- Located URI magic
1707 IF TG_OP = 'INSERT' THEN
1708 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1710 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1713 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1715 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1719 -- (re)map metarecord-bib linking
1720 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1721 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1723 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1725 ELSE -- we're doing an update, and we're not deleted, remap
1726 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1728 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1734 $func$ LANGUAGE PLPGSQL;
1736 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1742 SELECT n.func AS func,
1743 n.param_count AS param_count,
1745 FROM config.index_normalizer n
1746 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1747 WHERE m.field = mapped_field AND m.pos < 0
1750 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1751 quote_literal( facet_text ) ||
1753 WHEN normalizer.param_count > 0
1754 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1757 ')' INTO facet_text;
1764 $$ LANGUAGE PLPGSQL;
1767 -- This mimics a specific part of QueryParser, turning the first part of a
1768 -- classed search (search_class) into a set of classes and possibly fields.
1769 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1770 -- or "au" or "au|corporate|personal" or anything like that, where the first
1771 -- element of the list you get by separating on the "|" character is either
1772 -- a registered class (config.metabib_class) or an alias
1773 -- (config.metabib_search_alias), and the rest of any such elements are
1774 -- fields (config.metabib_field).
1776 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1777 RETURNS SETOF RECORD AS $func$
1779 search_parts TEXT[];
1781 search_part_count INTEGER;
1783 registered_class config.metabib_class%ROWTYPE;
1784 registered_alias config.metabib_search_alias%ROWTYPE;
1785 registered_field config.metabib_field%ROWTYPE;
1787 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1789 search_part_count := ARRAY_LENGTH(search_parts, 1);
1790 IF search_part_count = 0 THEN
1793 SELECT INTO registered_class
1794 * FROM config.metabib_class WHERE name = search_parts[1];
1796 IF search_part_count < 2 THEN -- all fields
1797 rec := (registered_class.name, NULL::INTEGER);
1801 FOR field_name IN SELECT *
1802 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1803 SELECT INTO registered_field
1804 * FROM config.metabib_field
1805 WHERE name = field_name AND
1806 field_class = registered_class.name;
1808 rec := (registered_class.name, registered_field.id);
1813 -- maybe we have an alias?
1814 SELECT INTO registered_alias
1815 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1819 IF search_part_count < 2 THEN -- return w/e the alias says
1821 registered_alias.field_class, registered_alias.field
1826 FOR field_name IN SELECT *
1827 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1828 SELECT INTO registered_field
1829 * FROM config.metabib_field
1830 WHERE name = field_name AND
1831 field_class = registered_alias.field_class;
1834 registered_alias.field_class,
1845 $func$ LANGUAGE PLPGSQL ROWS 1;
1848 -- Given a string such as a user might type into a search box, prepare
1849 -- two changed variants for TO_TSQUERY(). See
1850 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1851 -- The first variant is normalized to match indexed documents regardless
1852 -- of diacritics. The second variant keeps its diacritics for proper
1853 -- highlighting via TS_HEADLINE().
1855 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1858 orig_ended_in_space BOOLEAN;
1863 orig_ended_in_space := orig ~ E'\\s$';
1865 orig := ARRAY_TO_STRING(
1866 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1869 normalized := public.naco_normalize(orig); -- also trim()s
1870 plain := trim(orig);
1872 IF NOT orig_ended_in_space THEN
1873 plain := plain || ':*';
1874 normalized := normalized || ':*';
1877 plain := ARRAY_TO_STRING(
1878 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1880 normalized := ARRAY_TO_STRING(
1881 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1884 RETURN ARRAY[normalized, plain];
1886 $$ LANGUAGE PLPGSQL;
1888 -- Functions metabib.browse, metabib.staged_browse, and metabib.suggest_browse_entries
1889 -- will be created later, after internal dependencies are resolved.
1891 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1895 temp_vector TEXT := '';
1901 NEW.index_vector = ''::tsvector;
1903 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1905 SELECT n.func AS func,
1906 n.param_count AS param_count,
1908 FROM config.index_normalizer n
1909 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1910 WHERE field = NEW.field AND m.pos < 0
1912 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1913 quote_literal( value ) ||
1915 WHEN normalizer.param_count > 0
1916 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1926 SELECT n.func AS func,
1927 n.param_count AS param_count,
1929 FROM config.index_normalizer n
1930 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1931 WHERE field = NEW.field AND m.pos >= 0
1933 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1934 quote_literal( value ) ||
1936 WHEN normalizer.param_count > 0
1937 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1945 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
1947 value := ARRAY_TO_STRING(
1948 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
1950 value := public.search_normalize(value);
1951 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
1953 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1956 SELECT DISTINCT m.ts_config, m.index_weight
1957 FROM config.metabib_class_ts_map m
1958 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1959 LEFT JOIN config.coded_value_map ccvm ON (
1960 ccvm.ctype IN ('item_lang', 'language') AND
1961 ccvm.code = m.index_lang AND
1962 r.vlist @> intset(ccvm.id)
1964 WHERE m.field_class = TG_ARGV[0]
1966 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
1967 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1969 SELECT DISTINCT m.ts_config, m.index_weight
1970 FROM config.metabib_field_ts_map m
1971 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1972 LEFT JOIN config.coded_value_map ccvm ON (
1973 ccvm.ctype IN ('item_lang', 'language') AND
1974 ccvm.code = m.index_lang AND
1975 r.vlist @> intset(ccvm.id)
1977 WHERE m.metabib_field = NEW.field
1979 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1980 ORDER BY index_weight ASC
1984 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
1985 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
1989 cur_weight = ts_rec.index_weight;
1990 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
1993 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
1995 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2000 $$ LANGUAGE PLPGSQL;
2003 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2004 browse_entry BIGINT,
2009 sources INT, -- visible ones, that is
2010 asources INT, -- visible ones, that is
2011 row_number INT, -- internal use, sort of
2012 accurate BOOL, -- Count in sources field is accurate? Not
2013 -- if we had more than a browse superpage
2014 -- of records to look at.
2015 aaccurate BOOL, -- See previous comment...
2020 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2023 ) RETURNS BIGINT AS $p$
2025 FROM metabib.browse_entry mbe
2026 JOIN metabib.browse_entry_def_map mbedm ON (
2027 mbedm.entry = mbe.id
2028 AND mbedm.def = ANY($1)
2030 WHERE mbe.sort_value >= public.naco_normalize($2)
2031 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2032 $p$ LANGUAGE SQL STABLE;
2034 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2037 ) RETURNS BIGINT AS $p$
2039 FROM metabib.browse_entry mbe
2040 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2041 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2042 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2043 ash.atag = map.authority_field
2044 AND map.metabib_field = ANY($1)
2046 WHERE mbe.sort_value >= public.naco_normalize($2)
2047 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2048 $p$ LANGUAGE SQL STABLE;
2050 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2053 ) RETURNS BIGINT AS $p$
2055 FROM metabib.browse_entry mbe
2056 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2057 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2058 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2059 ash.atag = map.authority_field
2060 AND map.metabib_field = ANY($1)
2062 WHERE mbe.sort_value >= public.naco_normalize($2)
2063 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2064 $p$ LANGUAGE SQL STABLE;
2066 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2069 ) RETURNS BIGINT AS $p$
2070 SELECT id FROM metabib.browse_entry
2072 metabib.browse_bib_pivot($1, $2),
2073 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2075 ORDER BY sort_value, value LIMIT 1;
2076 $p$ LANGUAGE SQL STABLE;
2079 -- This function is used to help clean up facet labels. Due to quirks in
2080 -- MARC parsing, some facet labels may be generated with periods or commas
2081 -- at the end. This will strip a trailing commas off all the time, and
2082 -- periods when they don't look like they are part of initials.
2083 -- Smith, John => no change
2084 -- Smith, John, => Smith, John
2085 -- Smith, John. => Smith, John
2086 -- Public, John Q. => no change
2087 CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$
2093 last_char = substring(result from '.$');
2095 IF last_char = ',' THEN
2096 result := substring(result from '^(.*),$');
2098 ELSIF last_char = '.' THEN
2099 IF substring(result from ' \w\.$') IS NULL THEN
2100 result := substring(result from '^(.*)\.$');
2107 $$ language 'plpgsql';