2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.display_entry (
187 id BIGSERIAL PRIMARY KEY,
188 source BIGINT NOT NULL,
193 CREATE INDEX metabib_display_entry_field_idx
194 ON metabib.display_entry (field);
195 CREATE INDEX metabib_display_entry_source_idx
196 ON metabib.display_entry (source);
198 CREATE VIEW metabib.flat_display_entry AS
199 /* One row per display entry fleshed with field info */
207 FROM metabib.display_entry mde
208 JOIN config.metabib_field cmf ON (cmf.id = mde.field)
209 JOIN config.display_field_map cdfm ON (cdfm.field = mde.field)
212 CREATE VIEW metabib.compressed_display_entry AS
213 /* Like flat_display_entry except values are compressed into
214 one row per display_field_map and JSON-ified. */
222 TO_JSON(ARRAY_AGG(value))
226 FROM metabib.flat_display_entry
227 GROUP BY 1, 2, 3, 4, 5
230 CREATE VIEW metabib.wide_display_entry AS
231 /* Table-like view of well-known display fields.
232 This VIEW expands as well-known display fields are added. */
235 COALESCE(mcde_title.value, 'null') AS title,
236 COALESCE(mcde_author.value, 'null') AS author,
237 COALESCE(mcde_subject.value, 'null') AS subject,
238 COALESCE(mcde_creators.value, 'null') AS creators,
239 COALESCE(mcde_isbn.value, 'null') AS isbn
240 -- ensure one row per bre regardless of the presence of display entries
241 FROM biblio.record_entry bre
242 LEFT JOIN metabib.compressed_display_entry mcde_title
243 ON (bre.id = mcde_title.source AND mcde_title.name = 'title')
244 LEFT JOIN metabib.compressed_display_entry mcde_author
245 ON (bre.id = mcde_author.source AND mcde_author.name = 'author')
246 LEFT JOIN metabib.compressed_display_entry mcde_subject
247 ON (bre.id = mcde_subject.source AND mcde_subject.name = 'subject')
248 LEFT JOIN metabib.compressed_display_entry mcde_creators
249 ON (bre.id = mcde_creators.source AND mcde_creators.name = 'creators')
250 LEFT JOIN metabib.compressed_display_entry mcde_isbn
251 ON (bre.id = mcde_isbn.source AND mcde_isbn.name = 'isbn')
255 CREATE TABLE metabib.browse_entry (
256 id BIGSERIAL PRIMARY KEY,
258 index_vector tsvector,
259 sort_value TEXT NOT NULL,
260 UNIQUE(sort_value, value)
264 CREATE INDEX browse_entry_sort_value_idx
265 ON metabib.browse_entry USING BTREE (sort_value);
267 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
268 CREATE TRIGGER metabib_browse_entry_fti_trigger
269 BEFORE INSERT OR UPDATE ON metabib.browse_entry
270 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
273 CREATE TABLE metabib.browse_entry_def_map (
274 id BIGSERIAL PRIMARY KEY,
275 entry BIGINT REFERENCES metabib.browse_entry (id),
276 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
277 source BIGINT REFERENCES biblio.record_entry (id),
278 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
280 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
281 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
282 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
284 CREATE TABLE metabib.browse_entry_simple_heading_map (
285 id BIGSERIAL PRIMARY KEY,
286 entry BIGINT REFERENCES metabib.browse_entry (id),
287 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
289 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
290 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
292 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
293 RETURNS TRIGGER AS $$
296 display_field_text TEXT;
298 display_field_text := NEW.value;
301 SELECT n.func AS func,
302 n.param_count AS param_count,
304 FROM config.index_normalizer n
305 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
306 WHERE m.field = NEW.field AND m.pos < 0
309 EXECUTE 'SELECT ' || normalizer.func || '(' ||
310 quote_literal( display_field_text ) ||
312 WHEN normalizer.param_count > 0
313 THEN ',' || REPLACE(REPLACE(BTRIM(
314 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
317 ')' INTO display_field_text;
321 NEW.value = display_field_text;
327 CREATE TRIGGER display_field_normalize_tgr
328 BEFORE UPDATE OR INSERT ON metabib.display_entry
329 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
331 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
332 RETURNS TRIGGER AS $$
334 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
339 CREATE TRIGGER display_field_force_nfc_tgr
340 BEFORE UPDATE OR INSERT ON metabib.display_entry
341 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
344 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
349 facet_text := NEW.value;
352 SELECT n.func AS func,
353 n.param_count AS param_count,
355 FROM config.index_normalizer n
356 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
357 WHERE m.field = NEW.field AND m.pos < 0
360 EXECUTE 'SELECT ' || normalizer.func || '(' ||
361 quote_literal( facet_text ) ||
363 WHEN normalizer.param_count > 0
364 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
371 NEW.value = facet_text;
377 CREATE TRIGGER facet_normalize_tgr
378 BEFORE UPDATE OR INSERT ON metabib.facet_entry
379 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
381 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
383 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
388 CREATE TRIGGER facet_force_nfc_tgr
389 BEFORE UPDATE OR INSERT ON metabib.facet_entry
390 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
392 -- DECREMENTING serial starts at -1
393 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
395 CREATE TABLE metabib.uncontrolled_record_attr_value (
396 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
397 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
400 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
402 CREATE VIEW metabib.record_attr_id_map AS
403 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
405 SELECT c.id, c.ctype AS attr, c.code AS value
406 FROM config.coded_value_map c
407 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
409 CREATE VIEW metabib.composite_attr_id_map AS
410 SELECT c.id, c.ctype AS attr, c.code AS value
411 FROM config.coded_value_map c
412 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
414 CREATE VIEW metabib.full_attr_id_map AS
415 SELECT id, attr, value FROM metabib.record_attr_id_map
417 SELECT id, attr, value FROM metabib.composite_attr_id_map;
420 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
421 $_SHARED{metabib_compile_composite_attr_cache} = {}
422 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
423 return exists $_SHARED{metabib_compile_composite_attr_cache};
424 $f$ LANGUAGE PLPERLU;
426 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
427 delete $_SHARED{metabib_compile_composite_attr_cache};
428 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
429 $f$ LANGUAGE PLPERLU;
431 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
432 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
435 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
437 PERFORM metabib.compile_composite_attr_cache_invalidate();
440 $f$ LANGUAGE PLPGSQL;
442 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
444 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
449 my $def = decode_json($json);
451 die("Composite attribute definition not supplied") unless $def;
453 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
455 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
456 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
463 if (ref $d eq 'HASH') { # node or AND
464 if (exists $d->{_attr}) { # it is a node
465 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
466 my $id = spi_exec_prepared(
467 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
471 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
472 return '!' . recurse($$d{_not});
473 } else { # an AND list
474 @list = map { recurse($$d{$_}) } sort keys %$d;
476 } elsif (ref $d eq 'ARRAY') {
478 @list = map { recurse($_) } @$d;
481 @list = grep { defined && $_ ne '' } @list;
483 return '(' . join($j,@list) . ')' if @list;
487 my $val = recurse($def) || undef;
488 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
491 $func$ IMMUTABLE LANGUAGE plperlu;
493 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
494 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
495 $func$ STRICT IMMUTABLE LANGUAGE SQL;
497 CREATE TABLE metabib.record_attr_vector_list (
498 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
499 vlist INT[] NOT NULL -- stores id from ccvm AND murav
501 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
503 /* This becomes a view, and we do sorters differently ...
504 CREATE TABLE metabib.record_attr (
505 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
506 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
508 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
509 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
510 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
514 CREATE TABLE metabib.record_sorter (
515 id BIGSERIAL PRIMARY KEY,
516 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
517 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
520 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
521 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
522 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
525 CREATE TYPE metabib.record_attr_type AS (
530 -- Back-compat view ... we're moving to an INTARRAY world
531 CREATE VIEW metabib.record_attr_flat AS
532 SELECT v.source AS id,
535 FROM metabib.record_attr_vector_list v
536 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
538 SELECT v.source AS id,
541 FROM metabib.record_attr_vector_list v
542 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
544 CREATE VIEW metabib.record_attr AS
545 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
546 FROM metabib.record_attr_flat
547 WHERE attr IS NOT NULL
550 -- Back-back-compat view ... we use to live in an HSTORE world
551 CREATE TYPE metabib.rec_desc_type AS (
569 CREATE VIEW metabib.rec_descriptor AS
572 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
573 FROM metabib.record_attr;
575 -- Use a sequence that matches previous version, for easier upgrading.
576 CREATE SEQUENCE metabib.full_rec_id_seq;
578 CREATE TABLE metabib.real_full_rec (
579 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
580 record BIGINT NOT NULL,
581 tag CHAR(3) NOT NULL,
586 index_vector tsvector NOT NULL
588 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
590 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
591 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
592 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
593 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
594 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
595 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
596 CREATE INDEX metabib_full_rec_isxn_caseless_idx
597 ON metabib.real_full_rec (LOWER(value))
598 WHERE tag IN ('020', '022', '024');
599 -- This next index might fully supplant the one above, but leaving both for now.
600 -- (they are not too large)
601 -- The reason we need this index is to ensure that the query parser always
602 -- prefers this index over the simpler tag/subfield index, as this greatly
603 -- increases Vandelay overlay speed for these identifiers, especially when
604 -- a record has many of these fields (around > 4-6 seems like the cutoff
605 -- on at least one PG9.1 system)
606 -- A similar index could be added for other fields (e.g. 010), but one should
607 -- leave out the LOWER() in all other cases.
608 -- TODO: verify whether we can discard the non tag/subfield/substring version
609 -- above (metabib_full_rec_isxn_caseless_idx)
610 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
611 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
612 WHERE tag IN ('020', '022', '024');
615 CREATE TRIGGER metabib_full_rec_fti_trigger
616 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
617 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
619 CREATE OR REPLACE VIEW metabib.full_rec AS
626 SUBSTRING(value,1,1024) AS value,
628 FROM metabib.real_full_rec;
630 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
631 AS ON INSERT TO metabib.full_rec
633 INSERT INTO metabib.real_full_rec VALUES (
634 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
644 CREATE OR REPLACE RULE metabib_full_rec_update_rule
645 AS ON UPDATE TO metabib.full_rec
647 UPDATE metabib.real_full_rec SET
653 subfield = NEW.subfield,
655 index_vector = NEW.index_vector
658 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
659 AS ON DELETE TO metabib.full_rec
661 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
663 CREATE TABLE metabib.metarecord_source_map (
664 id BIGSERIAL PRIMARY KEY,
665 metarecord BIGINT NOT NULL,
666 source BIGINT NOT NULL
668 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
669 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
671 CREATE TYPE metabib.field_entry_template AS (
684 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
689 ) RETURNS SETOF metabib.field_entry_template AS $func$
691 bib biblio.record_entry%ROWTYPE;
692 idx config.metabib_field%ROWTYPE;
693 xfrm config.xml_transform%ROWTYPE;
695 transformed_xml TEXT;
697 xml_node_list TEXT[];
704 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
706 authority_link BIGINT;
707 output_row metabib.field_entry_template%ROWTYPE;
711 -- Start out with no field-use bools set
712 output_row.browse_field = FALSE;
713 output_row.facet_field = FALSE;
714 output_row.display_field = FALSE;
715 output_row.search_field = FALSE;
718 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
720 -- Loop over the indexing entries
721 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
723 process_idx := FALSE;
724 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
725 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
726 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
727 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
728 CONTINUE WHEN process_idx = FALSE;
730 joiner := COALESCE(idx.joiner, default_joiner);
732 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
734 -- See if we can skip the XSLT ... it's expensive
735 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
736 -- Can't skip the transform
737 IF xfrm.xslt <> '---' THEN
738 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
740 transformed_xml := bib.marc;
743 prev_xfrm := xfrm.name;
746 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
749 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
750 CONTINUE WHEN xml_node !~ E'^\\s*<';
752 -- XXX much of this should be moved into oils_xpath_string...
753 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
754 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
755 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
756 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
760 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
762 IF raw_text IS NOT NULL THEN
763 raw_text := raw_text || joiner;
766 raw_text := COALESCE(raw_text,'') || curr_text;
768 -- autosuggest/metabib.browse_entry
769 IF idx.browse_field THEN
771 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
772 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
774 browse_text := curr_text;
777 IF idx.browse_sort_xpath IS NOT NULL AND
778 idx.browse_sort_xpath <> '' THEN
780 sort_value := oils_xpath_string(
781 idx.browse_sort_xpath, xml_node, joiner,
782 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
785 sort_value := browse_text;
788 output_row.field_class = idx.field_class;
789 output_row.field = idx.id;
790 output_row.source = rid;
791 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
792 output_row.sort_value :=
793 public.naco_normalize(sort_value);
795 output_row.authority := NULL;
797 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
798 authority_text := oils_xpath_string(
799 idx.authority_xpath, xml_node, joiner,
801 ARRAY[xfrm.prefix, xfrm.namespace_uri],
802 ARRAY['xlink','http://www.w3.org/1999/xlink']
806 IF authority_text ~ '^\d+$' THEN
807 authority_link := authority_text::BIGINT;
808 PERFORM * FROM authority.record_entry WHERE id = authority_link;
810 output_row.authority := authority_link;
816 output_row.browse_field = TRUE;
817 -- Returning browse rows with search_field = true for search+browse
818 -- configs allows us to retain granularity of being able to search
819 -- browse fields with "starts with" type operators (for example, for
820 -- titles of songs in music albums)
821 IF idx.search_field THEN
822 output_row.search_field = TRUE;
824 RETURN NEXT output_row;
825 output_row.browse_field = FALSE;
826 output_row.search_field = FALSE;
827 output_row.sort_value := NULL;
830 -- insert raw node text for faceting
831 IF idx.facet_field THEN
833 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
834 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
836 facet_text := curr_text;
839 output_row.field_class = idx.field_class;
840 output_row.field = -1 * idx.id;
841 output_row.source = rid;
842 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
844 output_row.facet_field = TRUE;
845 RETURN NEXT output_row;
846 output_row.facet_field = FALSE;
849 -- insert raw node text for display
850 IF idx.display_field THEN
852 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
853 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
855 display_text := curr_text;
858 output_row.field_class = idx.field_class;
859 output_row.field = -1 * idx.id;
860 output_row.source = rid;
861 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
863 output_row.display_field = TRUE;
864 RETURN NEXT output_row;
865 output_row.display_field = FALSE;
870 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
872 -- insert combined node text for searching
873 IF idx.search_field THEN
874 output_row.field_class = idx.field_class;
875 output_row.field = idx.id;
876 output_row.source = rid;
877 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
879 output_row.search_field = TRUE;
880 RETURN NEXT output_row;
881 output_row.search_field = FALSE;
888 $func$ LANGUAGE PLPGSQL;
890 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
892 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
893 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
894 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
895 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
896 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
897 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
898 FROM metabib.keyword_field_entry WHERE source = bib_id;
900 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
901 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
902 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
903 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
904 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
905 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
906 FROM metabib.title_field_entry WHERE source = bib_id;
908 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
909 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
910 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
911 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
912 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
913 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
914 FROM metabib.author_field_entry WHERE source = bib_id;
916 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
917 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
918 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
919 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
920 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
921 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
922 FROM metabib.subject_field_entry WHERE source = bib_id;
924 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
925 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
926 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
927 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
928 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
929 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
930 FROM metabib.series_field_entry WHERE source = bib_id;
932 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
933 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
934 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
935 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
936 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
937 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
938 FROM metabib.identifier_field_entry WHERE source = bib_id;
941 $func$ LANGUAGE PLPGSQL;
943 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
945 skip_facet BOOL DEFAULT FALSE,
946 skip_display BOOL DEFAULT FALSE,
947 skip_browse BOOL DEFAULT FALSE,
948 skip_search BOOL DEFAULT FALSE,
949 only_fields INT[] DEFAULT '{}'::INT[]
950 ) RETURNS VOID AS $func$
953 ind_data metabib.field_entry_template%ROWTYPE;
954 mbe_row metabib.browse_entry%ROWTYPE;
961 field_list INT[] := only_fields;
962 field_types TEXT[] := '{}'::TEXT[];
965 IF field_list = '{}'::INT[] THEN
966 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
969 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
970 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
971 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
972 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
974 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
975 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
976 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
977 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
979 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
981 IF NOT b_skip_search THEN
982 FOR fclass IN SELECT * FROM config.metabib_class LOOP
983 -- RAISE NOTICE 'Emptying out %', fclass.name;
984 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
987 IF NOT b_skip_facet THEN
988 DELETE FROM metabib.facet_entry WHERE source = bib_id;
990 IF NOT b_skip_display THEN
991 DELETE FROM metabib.display_entry WHERE source = bib_id;
993 IF NOT b_skip_browse THEN
994 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
998 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
1000 -- don't store what has been normalized away
1001 CONTINUE WHEN ind_data.value IS NULL;
1003 IF ind_data.field < 0 THEN
1004 ind_data.field = -1 * ind_data.field;
1007 IF ind_data.facet_field AND NOT b_skip_facet THEN
1008 INSERT INTO metabib.facet_entry (field, source, value)
1009 VALUES (ind_data.field, ind_data.source, ind_data.value);
1012 IF ind_data.display_field AND NOT b_skip_display THEN
1013 INSERT INTO metabib.display_entry (field, source, value)
1014 VALUES (ind_data.field, ind_data.source, ind_data.value);
1018 IF ind_data.browse_field AND NOT b_skip_browse THEN
1019 -- A caveat about this SELECT: this should take care of replacing
1020 -- old mbe rows when data changes, but not if normalization (by
1021 -- which I mean specifically the output of
1022 -- evergreen.oils_tsearch2()) changes. It may or may not be
1023 -- expensive to add a comparison of index_vector to index_vector
1024 -- to the WHERE clause below.
1026 CONTINUE WHEN ind_data.sort_value IS NULL;
1028 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
1029 SELECT INTO mbe_row * FROM metabib.browse_entry
1030 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
1033 mbe_id := mbe_row.id;
1035 INSERT INTO metabib.browse_entry
1036 ( value, sort_value ) VALUES
1037 ( value_prepped, ind_data.sort_value );
1039 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
1042 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
1043 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
1046 IF ind_data.search_field AND NOT b_skip_search THEN
1047 -- Avoid inserting duplicate rows
1048 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
1049 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
1050 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
1051 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
1052 IF mbe_id IS NULL THEN
1054 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
1056 quote_literal(ind_data.field) || $$, $$ ||
1057 quote_literal(ind_data.source) || $$, $$ ||
1058 quote_literal(ind_data.value) ||
1065 IF NOT b_skip_search THEN
1066 PERFORM metabib.update_combined_index_vectors(bib_id);
1071 $func$ LANGUAGE PLPGSQL;
1073 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
1075 auth authority.record_entry%ROWTYPE;
1076 output authority.full_rec%ROWTYPE;
1079 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
1081 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
1082 output.record := rid;
1083 output.ind1 := field.ind1;
1084 output.ind2 := field.ind2;
1085 output.tag := field.tag;
1086 output.subfield := field.subfield;
1087 output.value := field.value;
1092 $func$ LANGUAGE PLPGSQL;
1094 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
1096 bib biblio.record_entry%ROWTYPE;
1097 output metabib.full_rec%ROWTYPE;
1100 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
1102 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
1103 output.record := rid;
1104 output.ind1 := field.ind1;
1105 output.ind2 := field.ind2;
1106 output.tag := field.tag;
1107 output.subfield := field.subfield;
1108 output.value := field.value;
1113 $func$ LANGUAGE PLPGSQL;
1115 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1116 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1117 $func$ LANGUAGE SQL;
1119 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1120 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1121 $func$ LANGUAGE SQL;
1123 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1124 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
1125 $func$ LANGUAGE SQL;
1127 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1128 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1129 $func$ LANGUAGE SQL;
1131 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1144 IF marc IS NULL OR marc = '' THEN
1148 -- First, the count of tags
1149 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1151 -- now go through a bunch of pain to get the record type
1152 IF best_type IS NOT NULL THEN
1153 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1155 IF ldr IS NOT NULL THEN
1156 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1157 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1160 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1161 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1163 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1165 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1167 IF type_map.code IS NOT NULL THEN
1168 IF best_type = type_map.code THEN
1169 qual := qual + qual / 2;
1172 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1173 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1174 IF ff_tag_data = best_lang THEN
1182 -- Now look for some quality metrics
1184 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1189 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1196 $func$ LANGUAGE PLPGSQL;
1198 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1200 idx config.biblio_fingerprint%ROWTYPE;
1201 xfrm config.xml_transform%ROWTYPE;
1203 transformed_xml TEXT;
1205 xml_node_list TEXT[];
1207 output_text TEXT := '';
1210 IF marc IS NULL OR marc = '' THEN
1214 -- Loop over the indexing entries
1215 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1217 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1219 -- See if we can skip the XSLT ... it's expensive
1220 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1221 -- Can't skip the transform
1222 IF xfrm.xslt <> '---' THEN
1223 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1225 transformed_xml := marc;
1228 prev_xfrm := xfrm.name;
1231 raw_text := COALESCE(
1239 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1248 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1249 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1251 IF idx.first_word IS TRUE THEN
1252 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1255 output_text := output_text || idx.name || ':' ||
1256 REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
1260 RETURN BTRIM(output_text);
1263 $func$ LANGUAGE PLPGSQL;
1265 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1266 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1269 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1271 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1275 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1276 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1281 $func$ LANGUAGE PLPGSQL;
1283 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1285 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1287 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1289 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1290 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1294 $func$ LANGUAGE PLPGSQL;
1296 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1303 uri_owner_list TEXT[];
1311 -- Clear any URI mappings and call numbers for this bib.
1312 -- This leads to acn / auricnm inflation, but also enables
1313 -- old acn/auricnm's to go away and for bibs to be deleted.
1314 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1315 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1316 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1319 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1320 IF ARRAY_UPPER(uris,1) > 0 THEN
1321 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1322 -- First we pull info out of the 856
1325 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1326 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1327 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1329 IF uri_label IS NULL THEN
1330 uri_label := uri_href;
1332 CONTINUE WHEN uri_href IS NULL;
1334 -- Get the distinct list of libraries wanting to use
1336 DISTINCT REGEXP_REPLACE(
1338 $re$^.*?\((\w+)\).*$$re$,
1341 ) INTO uri_owner_list
1344 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1349 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1351 -- look for a matching uri
1352 IF uri_use IS NULL THEN
1353 SELECT id INTO uri_id
1355 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1356 ORDER BY id LIMIT 1;
1357 IF NOT FOUND THEN -- create one
1358 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1359 SELECT id INTO uri_id
1361 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1364 SELECT id INTO uri_id
1366 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1367 ORDER BY id LIMIT 1;
1368 IF NOT FOUND THEN -- create one
1369 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1370 SELECT id INTO uri_id
1372 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1376 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1377 uri_owner := uri_owner_list[j];
1379 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1380 CONTINUE WHEN NOT FOUND;
1382 -- we need a call number to link through
1383 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1385 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1386 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1387 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1390 -- now, link them if they're not already
1391 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1393 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1405 $func$ LANGUAGE PLPGSQL;
1407 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1409 new_mapping BOOL := TRUE;
1412 tmp_mr metabib.metarecord%ROWTYPE;
1413 deleted_mrs BIGINT[];
1416 -- We need to make sure we're not a deleted master record of an MR
1417 IF bib_is_deleted THEN
1418 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1420 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1421 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1424 -- Now, are there any more sources on this MR?
1425 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1427 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1428 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1429 DELETE FROM metabib.metarecord WHERE id = old_mr;
1431 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1432 UPDATE metabib.metarecord
1434 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1439 ELSE -- insert or update
1441 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1443 -- Find the first fingerprint-matching
1444 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1445 old_mr := tmp_mr.id;
1446 new_mapping := FALSE;
1448 ELSE -- Our fingerprint changed ... maybe remove the old MR
1449 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1450 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1451 IF source_count = 0 THEN -- No other records
1452 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1453 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1459 -- we found no suitable, preexisting MR based on old source maps
1460 IF old_mr IS NULL THEN
1461 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1463 IF old_mr IS NULL THEN -- nope, create one and grab its id
1464 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1465 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1467 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1468 UPDATE metabib.metarecord
1470 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1474 ELSE -- there was one we already attached to, update its mods cache and master_record
1475 UPDATE metabib.metarecord
1477 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1482 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1487 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1488 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1494 $func$ LANGUAGE PLPGSQL;
1497 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1498 DELETE FROM authority.bib_linking WHERE bib = $1;
1499 INSERT INTO authority.bib_linking (bib, authority)
1502 FROM ( SELECT DISTINCT $1 AS bib,
1503 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1504 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1505 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1506 ) y JOIN authority.record_entry r ON r.id = y.authority;
1508 $func$ LANGUAGE SQL;
1510 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1512 transformed_xml TEXT;
1513 rmarc TEXT := prmarc;
1517 xfrm config.xml_transform%ROWTYPE;
1518 attr_vector INT[] := '{}'::INT[];
1519 attr_vector_tmp INT[];
1520 attr_list TEXT[] := pattr_list;
1522 norm_attr_value TEXT[];
1525 attr_def config.record_attr_definition%ROWTYPE;
1526 ccvm_row config.coded_value_map%ROWTYPE;
1530 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1531 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1534 fixed_field IS NOT NULL OR
1535 xpath IS NOT NULL OR
1536 phys_char_sf IS NOT NULL OR
1543 IF rmarc IS NULL THEN
1544 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1547 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1549 jump_past := FALSE; -- This gets set when we are non-multi and have found something
1550 attr_value := '{}'::TEXT[];
1551 norm_attr_value := '{}'::TEXT[];
1552 attr_vector_tmp := '{}'::INT[];
1554 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1556 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1557 SELECT ARRAY_AGG(value) INTO attr_value
1558 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1560 AND tag LIKE attr_def.tag
1562 WHEN attr_def.sf_list IS NOT NULL
1563 THEN POSITION(subfield IN attr_def.sf_list) > 0
1569 IF NOT attr_def.multi THEN
1570 attr_value := ARRAY[ARRAY_TO_STRING(attr_value, COALESCE(attr_def.joiner,' '))];
1575 IF NOT jump_past AND attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1576 attr_value := attr_value || vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1578 IF NOT attr_def.multi THEN
1579 attr_value := ARRAY[attr_value[1]];
1584 IF NOT jump_past AND attr_def.xpath IS NOT NULL THEN -- and xpath expression
1586 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1588 -- See if we can skip the XSLT ... it's expensive
1589 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1590 -- Can't skip the transform
1591 IF xfrm.xslt <> '---' THEN
1592 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1594 transformed_xml := rmarc;
1597 prev_xfrm := xfrm.name;
1600 IF xfrm.name IS NULL THEN
1601 -- just grab the marcxml (empty) transform
1602 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1603 prev_xfrm := xfrm.name;
1606 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1607 tmp_val := oils_xpath_string(
1610 COALESCE(attr_def.joiner,' '),
1611 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1613 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1614 attr_value := attr_value || tmp_val;
1615 EXIT WHEN NOT attr_def.multi;
1620 IF NOT jump_past AND attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1621 SELECT ARRAY_AGG(m.value) INTO tmp_array
1622 FROM vandelay.marc21_physical_characteristics(rmarc) v
1623 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1624 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1625 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1627 attr_value := attr_value || tmp_array;
1629 IF NOT attr_def.multi THEN
1630 attr_value := ARRAY[attr_value[1]];
1635 -- apply index normalizers to attr_value
1636 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1638 SELECT n.func AS func,
1639 n.param_count AS param_count,
1641 FROM config.index_normalizer n
1642 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1643 WHERE attr = attr_def.name
1645 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1646 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1648 WHEN normalizer.param_count > 0
1649 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1655 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1656 -- note that a string that contains only blanks
1657 -- is a valid value for some attributes
1658 norm_attr_value := norm_attr_value || tmp_val;
1662 IF attr_def.filter THEN
1663 -- Create unknown uncontrolled values and find the IDs of the values
1664 IF ccvm_row.id IS NULL THEN
1665 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1666 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1667 BEGIN -- use subtransaction to isolate unique constraint violations
1668 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1669 EXCEPTION WHEN unique_violation THEN END;
1673 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1675 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1678 -- Add the new value to the vector
1679 attr_vector := attr_vector || attr_vector_tmp;
1682 IF attr_def.sorter THEN
1683 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1684 IF norm_attr_value[1] IS NOT NULL THEN
1685 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1691 /* We may need to rewrite the vlist to contain
1692 the intersection of new values for requested
1693 attrs and old values for ignored attrs. To
1694 do this, we take the old attr vlist and
1695 subtract any values that are valid for the
1696 requested attrs, and then add back the new
1697 set of attr values. */
1699 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1700 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1701 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1702 attr_vector := attr_vector || attr_vector_tmp;
1705 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1706 -- attributes can depend on earlier ones.
1707 PERFORM metabib.compile_composite_attr_cache_init();
1708 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1710 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1712 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1713 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1715 IF attr_def.filter THEN
1716 IF attr_vector @@ tmp_val::query_int THEN
1717 attr_vector = attr_vector + intset(ccvm_row.id);
1718 EXIT WHEN NOT attr_def.multi;
1722 IF attr_def.sorter THEN
1723 IF attr_vector @@ tmp_val THEN
1724 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1725 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1733 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1734 IF rdeleted THEN -- initial insert OR revivication
1735 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1736 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1738 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1744 $func$ LANGUAGE PLPGSQL;
1747 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1748 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1753 IF NEW.deleted THEN -- If this bib is deleted
1755 PERFORM * FROM config.internal_flag WHERE
1756 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1758 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1760 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1762 IF NOT tmp_bool THEN
1763 -- One needs to keep these around to support searches
1764 -- with the #deleted modifier, so one should turn on the named
1765 -- internal flag for that functionality.
1766 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1769 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1770 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1771 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1772 RETURN NEW; -- and we're done
1775 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1776 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1778 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1783 -- Record authority linking
1784 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1786 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1789 -- Flatten and insert the mfr data
1790 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1792 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1794 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1795 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1797 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1801 -- Gather and insert the field entry data
1802 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1804 -- Located URI magic
1805 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1806 IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF;
1808 -- (re)map metarecord-bib linking
1809 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1810 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1812 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1814 ELSE -- we're doing an update, and we're not deleted, remap
1815 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1817 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1823 $func$ LANGUAGE PLPGSQL;
1825 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1831 SELECT n.func AS func,
1832 n.param_count AS param_count,
1834 FROM config.index_normalizer n
1835 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1836 WHERE m.field = mapped_field AND m.pos < 0
1839 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1840 quote_literal( facet_text ) ||
1842 WHEN normalizer.param_count > 0
1843 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1846 ')' INTO facet_text;
1853 $$ LANGUAGE PLPGSQL;
1856 -- This mimics a specific part of QueryParser, turning the first part of a
1857 -- classed search (search_class) into a set of classes and possibly fields.
1858 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1859 -- or "au" or "au|corporate|personal" or anything like that, where the first
1860 -- element of the list you get by separating on the "|" character is either
1861 -- a registered class (config.metabib_class) or an alias
1862 -- (config.metabib_search_alias), and the rest of any such elements are
1863 -- fields (config.metabib_field).
1865 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1866 RETURNS SETOF RECORD AS $func$
1868 search_parts TEXT[];
1870 search_part_count INTEGER;
1872 registered_class config.metabib_class%ROWTYPE;
1873 registered_alias config.metabib_search_alias%ROWTYPE;
1874 registered_field config.metabib_field%ROWTYPE;
1876 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1878 search_part_count := ARRAY_LENGTH(search_parts, 1);
1879 IF search_part_count = 0 THEN
1882 SELECT INTO registered_class
1883 * FROM config.metabib_class WHERE name = search_parts[1];
1885 IF search_part_count < 2 THEN -- all fields
1886 rec := (registered_class.name, NULL::INTEGER);
1890 FOR field_name IN SELECT *
1891 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1892 SELECT INTO registered_field
1893 * FROM config.metabib_field
1894 WHERE name = field_name AND
1895 field_class = registered_class.name;
1897 rec := (registered_class.name, registered_field.id);
1902 -- maybe we have an alias?
1903 SELECT INTO registered_alias
1904 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1908 IF search_part_count < 2 THEN -- return w/e the alias says
1910 registered_alias.field_class, registered_alias.field
1915 FOR field_name IN SELECT *
1916 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1917 SELECT INTO registered_field
1918 * FROM config.metabib_field
1919 WHERE name = field_name AND
1920 field_class = registered_alias.field_class;
1923 registered_alias.field_class,
1934 $func$ LANGUAGE PLPGSQL ROWS 1;
1937 -- Given a string such as a user might type into a search box, prepare
1938 -- two changed variants for TO_TSQUERY(). See
1939 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1940 -- The first variant is normalized to match indexed documents regardless
1941 -- of diacritics. The second variant keeps its diacritics for proper
1942 -- highlighting via TS_HEADLINE().
1944 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1947 orig_ended_in_space BOOLEAN;
1952 orig_ended_in_space := orig ~ E'\\s$';
1954 orig := ARRAY_TO_STRING(
1955 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1958 normalized := public.naco_normalize(orig); -- also trim()s
1959 plain := trim(orig);
1961 IF NOT orig_ended_in_space THEN
1962 plain := plain || ':*';
1963 normalized := normalized || ':*';
1966 plain := ARRAY_TO_STRING(
1967 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1969 normalized := ARRAY_TO_STRING(
1970 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1973 RETURN ARRAY[normalized, plain];
1975 $$ LANGUAGE PLPGSQL;
1977 -- Functions metabib.browse, metabib.staged_browse, and metabib.suggest_browse_entries
1978 -- will be created later, after internal dependencies are resolved.
1980 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1984 temp_vector TEXT := '';
1990 NEW.index_vector = ''::tsvector;
1992 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1994 SELECT n.func AS func,
1995 n.param_count AS param_count,
1997 FROM config.index_normalizer n
1998 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1999 WHERE field = NEW.field AND m.pos < 0
2001 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2002 quote_literal( value ) ||
2004 WHEN normalizer.param_count > 0
2005 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2015 SELECT n.func AS func,
2016 n.param_count AS param_count,
2018 FROM config.index_normalizer n
2019 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
2020 WHERE field = NEW.field AND m.pos >= 0
2022 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2023 quote_literal( value ) ||
2025 WHEN normalizer.param_count > 0
2026 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2034 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
2036 value := ARRAY_TO_STRING(
2037 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
2039 value := public.search_normalize(value);
2040 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2042 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
2045 SELECT DISTINCT m.ts_config, m.index_weight
2046 FROM config.metabib_class_ts_map m
2047 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
2048 LEFT JOIN config.coded_value_map ccvm ON (
2049 ccvm.ctype IN ('item_lang', 'language') AND
2050 ccvm.code = m.index_lang AND
2051 r.vlist @> intset(ccvm.id)
2053 WHERE m.field_class = TG_ARGV[0]
2055 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
2056 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2058 SELECT DISTINCT m.ts_config, m.index_weight
2059 FROM config.metabib_field_ts_map m
2060 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
2061 LEFT JOIN config.coded_value_map ccvm ON (
2062 ccvm.ctype IN ('item_lang', 'language') AND
2063 ccvm.code = m.index_lang AND
2064 r.vlist @> intset(ccvm.id)
2066 WHERE m.metabib_field = NEW.field
2068 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2069 ORDER BY index_weight ASC
2073 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2074 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2078 cur_weight = ts_rec.index_weight;
2079 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2082 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2084 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2089 $$ LANGUAGE PLPGSQL;
2092 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2093 browse_entry BIGINT,
2098 sources INT, -- visible ones, that is
2099 asources INT, -- visible ones, that is
2100 row_number INT, -- internal use, sort of
2101 accurate BOOL, -- Count in sources field is accurate? Not
2102 -- if we had more than a browse superpage
2103 -- of records to look at.
2104 aaccurate BOOL, -- See previous comment...
2109 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2112 ) RETURNS BIGINT AS $p$
2114 FROM metabib.browse_entry mbe
2115 JOIN metabib.browse_entry_def_map mbedm ON (
2116 mbedm.entry = mbe.id
2117 AND mbedm.def = ANY($1)
2119 WHERE mbe.sort_value >= public.naco_normalize($2)
2120 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2121 $p$ LANGUAGE SQL STABLE;
2123 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2126 ) RETURNS BIGINT AS $p$
2128 FROM metabib.browse_entry mbe
2129 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2130 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2131 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2132 ash.atag = map.authority_field
2133 AND map.metabib_field = ANY($1)
2135 WHERE mbe.sort_value >= public.naco_normalize($2)
2136 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2137 $p$ LANGUAGE SQL STABLE;
2139 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2142 ) RETURNS BIGINT AS $p$
2144 FROM metabib.browse_entry mbe
2145 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2146 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2147 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2148 ash.atag = map.authority_field
2149 AND map.metabib_field = ANY($1)
2151 WHERE mbe.sort_value >= public.naco_normalize($2)
2152 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2153 $p$ LANGUAGE SQL STABLE;
2155 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2158 ) RETURNS BIGINT AS $p$
2159 SELECT id FROM metabib.browse_entry
2161 metabib.browse_bib_pivot($1, $2),
2162 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2164 ORDER BY sort_value, value LIMIT 1;
2165 $p$ LANGUAGE SQL STABLE;
2168 -- This function is used to help clean up facet labels. Due to quirks in
2169 -- MARC parsing, some facet labels may be generated with periods or commas
2170 -- at the end. This will strip a trailing commas off all the time, and
2171 -- periods when they don't look like they are part of initials.
2172 -- Smith, John => no change
2173 -- Smith, John, => Smith, John
2174 -- Smith, John. => Smith, John
2175 -- Public, John Q. => no change
2176 CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$
2182 last_char = substring(result from '.$');
2184 IF last_char = ',' THEN
2185 result := substring(result from '^(.*),$');
2187 ELSIF last_char = '.' THEN
2188 IF substring(result from ' \w\.$') IS NULL THEN
2189 result := substring(result from '^(.*)\.$');
2196 $$ language 'plpgsql';