2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE VIEW metabib.combined_all_field_entry AS
177 SELECT * FROM metabib.combined_title_field_entry
179 SELECT * FROM metabib.combined_author_field_entry
181 SELECT * FROM metabib.combined_subject_field_entry
183 SELECT * FROM metabib.combined_keyword_field_entry
185 SELECT * FROM metabib.combined_identifier_field_entry
187 SELECT * FROM metabib.combined_series_field_entry;
189 CREATE TABLE metabib.facet_entry (
190 id BIGSERIAL PRIMARY KEY,
191 source BIGINT NOT NULL,
195 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
196 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
197 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
199 CREATE TABLE metabib.display_entry (
200 id BIGSERIAL PRIMARY KEY,
201 source BIGINT NOT NULL,
206 CREATE INDEX metabib_display_entry_field_idx
207 ON metabib.display_entry (field);
208 CREATE INDEX metabib_display_entry_source_idx
209 ON metabib.display_entry (source);
211 CREATE VIEW metabib.flat_display_entry AS
212 /* One row per display entry fleshed with field info */
220 FROM metabib.display_entry mde
221 JOIN config.metabib_field cmf ON (cmf.id = mde.field)
222 JOIN config.display_field_map cdfm ON (cdfm.field = mde.field)
225 CREATE VIEW metabib.compressed_display_entry AS
226 /* Like flat_display_entry except values are compressed into
227 one row per display_field_map and JSON-ified. */
235 TO_JSON(ARRAY_AGG(value))
239 FROM metabib.flat_display_entry
240 GROUP BY 1, 2, 3, 4, 5
243 CREATE VIEW metabib.wide_display_entry AS
244 /* Table-like view of well-known display fields.
245 This VIEW expands as well-known display fields are added. */
248 COALESCE(mcde_title.value, 'null') AS title,
249 COALESCE(mcde_author.value, 'null') AS author,
250 COALESCE(mcde_subject_geographic.value, 'null') AS subject_geographic,
251 COALESCE(mcde_subject_name.value, 'null') AS subject_name,
252 COALESCE(mcde_subject_temporal.value, 'null') AS subject_temporal,
253 COALESCE(mcde_subject_topic.value, 'null') AS subject_topic,
254 COALESCE(mcde_creators.value, 'null') AS creators,
255 COALESCE(mcde_isbn.value, 'null') AS isbn,
256 COALESCE(mcde_issn.value, 'null') AS issn,
257 COALESCE(mcde_upc.value, 'null') AS upc,
258 COALESCE(mcde_tcn.value, 'null') AS tcn,
259 COALESCE(mcde_edition.value, 'null') AS edition,
260 COALESCE(mcde_physical_description.value, 'null') AS physical_description,
261 COALESCE(mcde_publisher.value, 'null') AS publisher,
262 COALESCE(mcde_series_title.value, 'null') AS series_title,
263 COALESCE(mcde_abstract.value, 'null') AS abstract,
264 COALESCE(mcde_toc.value, 'null') AS toc,
265 COALESCE(mcde_pubdate.value, 'null') AS pubdate,
266 COALESCE(mcde_type_of_resource.value, 'null') AS type_of_resource
267 FROM biblio.record_entry bre
268 LEFT JOIN metabib.compressed_display_entry mcde_title
269 ON (bre.id = mcde_title.source AND mcde_title.name = 'title')
270 LEFT JOIN metabib.compressed_display_entry mcde_author
271 ON (bre.id = mcde_author.source AND mcde_author.name = 'author')
272 LEFT JOIN metabib.compressed_display_entry mcde_subject
273 ON (bre.id = mcde_subject.source AND mcde_subject.name = 'subject')
274 LEFT JOIN metabib.compressed_display_entry mcde_subject_geographic
275 ON (bre.id = mcde_subject_geographic.source
276 AND mcde_subject_geographic.name = 'subject_geographic')
277 LEFT JOIN metabib.compressed_display_entry mcde_subject_name
278 ON (bre.id = mcde_subject_name.source
279 AND mcde_subject_name.name = 'subject_name')
280 LEFT JOIN metabib.compressed_display_entry mcde_subject_temporal
281 ON (bre.id = mcde_subject_temporal.source
282 AND mcde_subject_temporal.name = 'subject_temporal')
283 LEFT JOIN metabib.compressed_display_entry mcde_subject_topic
284 ON (bre.id = mcde_subject_topic.source
285 AND mcde_subject_topic.name = 'subject_topic')
286 LEFT JOIN metabib.compressed_display_entry mcde_creators
287 ON (bre.id = mcde_creators.source AND mcde_creators.name = 'creators')
288 LEFT JOIN metabib.compressed_display_entry mcde_isbn
289 ON (bre.id = mcde_isbn.source AND mcde_isbn.name = 'isbn')
290 LEFT JOIN metabib.compressed_display_entry mcde_issn
291 ON (bre.id = mcde_issn.source AND mcde_issn.name = 'issn')
292 LEFT JOIN metabib.compressed_display_entry mcde_upc
293 ON (bre.id = mcde_upc.source AND mcde_upc.name = 'upc')
294 LEFT JOIN metabib.compressed_display_entry mcde_tcn
295 ON (bre.id = mcde_tcn.source AND mcde_tcn.name = 'tcn')
296 LEFT JOIN metabib.compressed_display_entry mcde_edition
297 ON (bre.id = mcde_edition.source AND mcde_edition.name = 'edition')
298 LEFT JOIN metabib.compressed_display_entry mcde_physical_description
299 ON (bre.id = mcde_physical_description.source
300 AND mcde_physical_description.name = 'physical_description')
301 LEFT JOIN metabib.compressed_display_entry mcde_publisher
302 ON (bre.id = mcde_publisher.source AND mcde_publisher.name = 'publisher')
303 LEFT JOIN metabib.compressed_display_entry mcde_series_title
304 ON (bre.id = mcde_series_title.source AND mcde_series_title.name = 'series_title')
305 LEFT JOIN metabib.compressed_display_entry mcde_abstract
306 ON (bre.id = mcde_abstract.source AND mcde_abstract.name = 'abstract')
307 LEFT JOIN metabib.compressed_display_entry mcde_toc
308 ON (bre.id = mcde_toc.source AND mcde_toc.name = 'toc')
309 LEFT JOIN metabib.compressed_display_entry mcde_pubdate
310 ON (bre.id = mcde_pubdate.source AND mcde_pubdate.name = 'pubdate')
311 LEFT JOIN metabib.compressed_display_entry mcde_type_of_resource
312 ON (bre.id = mcde_type_of_resource.source
313 AND mcde_type_of_resource.name = 'type_of_resource')
316 CREATE TABLE metabib.browse_entry (
317 id BIGSERIAL PRIMARY KEY,
319 index_vector tsvector,
320 sort_value TEXT NOT NULL,
321 UNIQUE(sort_value, value)
325 CREATE INDEX browse_entry_sort_value_idx
326 ON metabib.browse_entry USING BTREE (sort_value);
328 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
329 CREATE TRIGGER metabib_browse_entry_fti_trigger
330 BEFORE INSERT OR UPDATE ON metabib.browse_entry
331 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
334 CREATE TABLE metabib.browse_entry_def_map (
335 id BIGSERIAL PRIMARY KEY,
336 entry BIGINT REFERENCES metabib.browse_entry (id),
337 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
338 source BIGINT REFERENCES biblio.record_entry (id),
339 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
341 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
342 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
343 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
345 CREATE TABLE metabib.browse_entry_simple_heading_map (
346 id BIGSERIAL PRIMARY KEY,
347 entry BIGINT REFERENCES metabib.browse_entry (id),
348 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
350 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
351 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
353 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
354 RETURNS TRIGGER AS $$
357 display_field_text TEXT;
359 display_field_text := NEW.value;
362 SELECT n.func AS func,
363 n.param_count AS param_count,
365 FROM config.index_normalizer n
366 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
367 WHERE m.field = NEW.field AND m.pos < 0
370 EXECUTE 'SELECT ' || normalizer.func || '(' ||
371 quote_literal( display_field_text ) ||
373 WHEN normalizer.param_count > 0
374 THEN ',' || REPLACE(REPLACE(BTRIM(
375 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
378 ')' INTO display_field_text;
382 NEW.value = display_field_text;
388 CREATE TRIGGER display_field_normalize_tgr
389 BEFORE UPDATE OR INSERT ON metabib.display_entry
390 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
392 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
393 RETURNS TRIGGER AS $$
395 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
400 CREATE TRIGGER display_field_force_nfc_tgr
401 BEFORE UPDATE OR INSERT ON metabib.display_entry
402 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
405 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
410 facet_text := NEW.value;
413 SELECT n.func AS func,
414 n.param_count AS param_count,
416 FROM config.index_normalizer n
417 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
418 WHERE m.field = NEW.field AND m.pos < 0
421 EXECUTE 'SELECT ' || normalizer.func || '(' ||
422 quote_literal( facet_text ) ||
424 WHEN normalizer.param_count > 0
425 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
432 NEW.value = facet_text;
438 CREATE TRIGGER facet_normalize_tgr
439 BEFORE UPDATE OR INSERT ON metabib.facet_entry
440 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
442 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
444 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
449 CREATE TRIGGER facet_force_nfc_tgr
450 BEFORE UPDATE OR INSERT ON metabib.facet_entry
451 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
453 -- DECREMENTING serial starts at -1
454 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
456 CREATE TABLE metabib.uncontrolled_record_attr_value (
457 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
458 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
461 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
463 CREATE VIEW metabib.record_attr_id_map AS
464 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
466 SELECT c.id, c.ctype AS attr, c.code AS value
467 FROM config.coded_value_map c
468 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
470 CREATE VIEW metabib.composite_attr_id_map AS
471 SELECT c.id, c.ctype AS attr, c.code AS value
472 FROM config.coded_value_map c
473 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
475 CREATE VIEW metabib.full_attr_id_map AS
476 SELECT id, attr, value FROM metabib.record_attr_id_map
478 SELECT id, attr, value FROM metabib.composite_attr_id_map;
481 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
482 $_SHARED{metabib_compile_composite_attr_cache} = {}
483 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
484 return exists $_SHARED{metabib_compile_composite_attr_cache};
485 $f$ LANGUAGE PLPERLU;
487 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
488 delete $_SHARED{metabib_compile_composite_attr_cache};
489 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
490 $f$ LANGUAGE PLPERLU;
492 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
493 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
496 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
498 PERFORM metabib.compile_composite_attr_cache_invalidate();
501 $f$ LANGUAGE PLPGSQL;
503 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
505 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
510 my $def = decode_json($json);
512 die("Composite attribute definition not supplied") unless $def;
514 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
516 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
517 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
524 if (ref $d eq 'HASH') { # node or AND
525 if (exists $d->{_attr}) { # it is a node
526 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
527 my $id = spi_exec_prepared(
528 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
532 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
533 return '!' . recurse($$d{_not});
534 } else { # an AND list
535 @list = map { recurse($$d{$_}) } sort keys %$d;
537 } elsif (ref $d eq 'ARRAY') {
539 @list = map { recurse($_) } @$d;
542 @list = grep { defined && $_ ne '' } @list;
544 return '(' . join($j,@list) . ')' if @list;
548 my $val = recurse($def) || undef;
549 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
552 $func$ IMMUTABLE LANGUAGE plperlu;
554 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
555 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
556 $func$ STRICT IMMUTABLE LANGUAGE SQL;
558 CREATE TABLE metabib.record_attr_vector_list (
559 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
560 vlist INT[] NOT NULL -- stores id from ccvm AND murav
562 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
564 /* This becomes a view, and we do sorters differently ...
565 CREATE TABLE metabib.record_attr (
566 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
567 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
569 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
570 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
571 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
575 CREATE TABLE metabib.record_sorter (
576 id BIGSERIAL PRIMARY KEY,
577 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
578 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
581 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
582 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
583 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
586 CREATE TYPE metabib.record_attr_type AS (
591 -- Back-compat view ... we're moving to an INTARRAY world
592 CREATE VIEW metabib.record_attr_flat AS
593 SELECT v.source AS id,
596 FROM metabib.record_attr_vector_list v
597 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
599 SELECT v.source AS id,
602 FROM metabib.record_attr_vector_list v
603 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
605 CREATE VIEW metabib.record_attr AS
606 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
607 FROM metabib.record_attr_flat
608 WHERE attr IS NOT NULL
611 -- Back-back-compat view ... we use to live in an HSTORE world
612 CREATE TYPE metabib.rec_desc_type AS (
630 CREATE VIEW metabib.rec_descriptor AS
633 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
634 FROM metabib.record_attr;
636 -- Use a sequence that matches previous version, for easier upgrading.
637 CREATE SEQUENCE metabib.full_rec_id_seq;
639 CREATE TABLE metabib.real_full_rec (
640 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
641 record BIGINT NOT NULL,
642 tag CHAR(3) NOT NULL,
647 index_vector tsvector NOT NULL
649 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
651 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
652 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
653 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
654 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
655 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
656 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
657 CREATE INDEX metabib_full_rec_isxn_caseless_idx
658 ON metabib.real_full_rec (LOWER(value))
659 WHERE tag IN ('020', '022', '024');
660 -- This next index might fully supplant the one above, but leaving both for now.
661 -- (they are not too large)
662 -- The reason we need this index is to ensure that the query parser always
663 -- prefers this index over the simpler tag/subfield index, as this greatly
664 -- increases Vandelay overlay speed for these identifiers, especially when
665 -- a record has many of these fields (around > 4-6 seems like the cutoff
666 -- on at least one PG9.1 system)
667 -- A similar index could be added for other fields (e.g. 010), but one should
668 -- leave out the LOWER() in all other cases.
669 -- TODO: verify whether we can discard the non tag/subfield/substring version
670 -- above (metabib_full_rec_isxn_caseless_idx)
671 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
672 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
673 WHERE tag IN ('020', '022', '024');
676 CREATE TRIGGER metabib_full_rec_fti_trigger
677 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
678 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
680 CREATE OR REPLACE VIEW metabib.full_rec AS
687 SUBSTRING(value,1,1024) AS value,
689 FROM metabib.real_full_rec;
691 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
692 AS ON INSERT TO metabib.full_rec
694 INSERT INTO metabib.real_full_rec VALUES (
695 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
705 CREATE OR REPLACE RULE metabib_full_rec_update_rule
706 AS ON UPDATE TO metabib.full_rec
708 UPDATE metabib.real_full_rec SET
714 subfield = NEW.subfield,
716 index_vector = NEW.index_vector
719 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
720 AS ON DELETE TO metabib.full_rec
722 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
724 CREATE TABLE metabib.metarecord_source_map (
725 id BIGSERIAL PRIMARY KEY,
726 metarecord BIGINT NOT NULL,
727 source BIGINT NOT NULL
729 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
730 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
732 CREATE TYPE metabib.field_entry_template AS (
745 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
750 ) RETURNS SETOF metabib.field_entry_template AS $func$
752 bib biblio.record_entry%ROWTYPE;
753 idx config.metabib_field%ROWTYPE;
754 xfrm config.xml_transform%ROWTYPE;
756 transformed_xml TEXT;
758 xml_node_list TEXT[];
765 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
767 authority_link BIGINT;
768 output_row metabib.field_entry_template%ROWTYPE;
772 -- Start out with no field-use bools set
773 output_row.browse_field = FALSE;
774 output_row.facet_field = FALSE;
775 output_row.display_field = FALSE;
776 output_row.search_field = FALSE;
779 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
781 -- Loop over the indexing entries
782 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
783 CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
785 process_idx := FALSE;
786 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
787 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
788 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
789 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
790 CONTINUE WHEN process_idx = FALSE; -- disabled for all types
792 joiner := COALESCE(idx.joiner, default_joiner);
794 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
796 -- See if we can skip the XSLT ... it's expensive
797 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
798 -- Can't skip the transform
799 IF xfrm.xslt <> '---' THEN
800 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
802 transformed_xml := bib.marc;
805 prev_xfrm := xfrm.name;
808 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
811 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
812 CONTINUE WHEN xml_node !~ E'^\\s*<';
814 -- XXX much of this should be moved into oils_xpath_string...
815 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
816 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
817 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
818 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
822 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
824 IF raw_text IS NOT NULL THEN
825 raw_text := raw_text || joiner;
828 raw_text := COALESCE(raw_text,'') || curr_text;
830 -- autosuggest/metabib.browse_entry
831 IF idx.browse_field THEN
833 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
834 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
836 browse_text := curr_text;
839 IF idx.browse_sort_xpath IS NOT NULL AND
840 idx.browse_sort_xpath <> '' THEN
842 sort_value := oils_xpath_string(
843 idx.browse_sort_xpath, xml_node, joiner,
844 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
847 sort_value := browse_text;
850 output_row.field_class = idx.field_class;
851 output_row.field = idx.id;
852 output_row.source = rid;
853 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
854 output_row.sort_value :=
855 public.naco_normalize(sort_value);
857 output_row.authority := NULL;
859 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
860 authority_text := oils_xpath_string(
861 idx.authority_xpath, xml_node, joiner,
863 ARRAY[xfrm.prefix, xfrm.namespace_uri],
864 ARRAY['xlink','http://www.w3.org/1999/xlink']
868 IF authority_text ~ '^\d+$' THEN
869 authority_link := authority_text::BIGINT;
870 PERFORM * FROM authority.record_entry WHERE id = authority_link;
872 output_row.authority := authority_link;
878 output_row.browse_field = TRUE;
879 -- Returning browse rows with search_field = true for search+browse
880 -- configs allows us to retain granularity of being able to search
881 -- browse fields with "starts with" type operators (for example, for
882 -- titles of songs in music albums)
883 IF idx.search_field THEN
884 output_row.search_field = TRUE;
886 RETURN NEXT output_row;
887 output_row.browse_field = FALSE;
888 output_row.search_field = FALSE;
889 output_row.sort_value := NULL;
892 -- insert raw node text for faceting
893 IF idx.facet_field THEN
895 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
896 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
898 facet_text := curr_text;
901 output_row.field_class = idx.field_class;
902 output_row.field = -1 * idx.id;
903 output_row.source = rid;
904 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
906 output_row.facet_field = TRUE;
907 RETURN NEXT output_row;
908 output_row.facet_field = FALSE;
911 -- insert raw node text for display
912 IF idx.display_field THEN
914 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
915 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
917 display_text := curr_text;
920 output_row.field_class = idx.field_class;
921 output_row.field = -1 * idx.id;
922 output_row.source = rid;
923 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
925 output_row.display_field = TRUE;
926 RETURN NEXT output_row;
927 output_row.display_field = FALSE;
932 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
934 -- insert combined node text for searching
935 IF idx.search_field THEN
936 output_row.field_class = idx.field_class;
937 output_row.field = idx.id;
938 output_row.source = rid;
939 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
941 output_row.search_field = TRUE;
942 RETURN NEXT output_row;
943 output_row.search_field = FALSE;
949 $func$ LANGUAGE PLPGSQL;
951 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
958 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
959 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
960 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
961 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
962 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
963 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
964 FROM metabib.keyword_field_entry WHERE source = bib_id;
966 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
967 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
968 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
969 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
970 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
971 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
972 FROM metabib.title_field_entry WHERE source = bib_id;
974 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
975 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
976 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
977 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
978 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
979 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
980 FROM metabib.author_field_entry WHERE source = bib_id;
982 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
983 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
984 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
985 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
986 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
987 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
988 FROM metabib.subject_field_entry WHERE source = bib_id;
990 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
991 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
992 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
993 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
994 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
995 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
996 FROM metabib.series_field_entry WHERE source = bib_id;
998 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
999 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
1000 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
1001 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
1002 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
1003 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
1004 FROM metabib.identifier_field_entry WHERE source = bib_id;
1006 -- For each virtual def, gather the data from the combined real field
1007 -- entries and append it to the virtual combined entry.
1008 FOR vfield, rfields IN SELECT virtual, ARRAY_AGG(real) FROM config.metabib_field_virtual_map GROUP BY virtual LOOP
1009 SELECT field_class INTO vclass
1010 FROM config.metabib_field
1013 SELECT string_agg(index_vector::TEXT,' ')::tsvector INTO rdata
1014 FROM metabib.combined_all_field_entry
1015 WHERE record = bib_id
1016 AND metabib_field = ANY (rfields);
1018 BEGIN -- I cannot wait for INSERT ON CONFLICT ... 9.5, though
1020 INSERT INTO metabib.combined_$$ || vclass || $$_field_entry
1021 (record, metabib_field, index_vector) VALUES ($1, $2, $3)
1022 $$ USING bib_id, vfield, rdata;
1023 EXCEPTION WHEN unique_violation THEN
1025 UPDATE metabib.combined_$$ || vclass || $$_field_entry
1026 SET index_vector = index_vector || $3
1028 AND metabib_field = $2
1029 $$ USING bib_id, vfield, rdata;
1033 $func$ LANGUAGE PLPGSQL;
1035 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
1037 skip_facet BOOL DEFAULT FALSE,
1038 skip_display BOOL DEFAULT FALSE,
1039 skip_browse BOOL DEFAULT FALSE,
1040 skip_search BOOL DEFAULT FALSE,
1041 only_fields INT[] DEFAULT '{}'::INT[]
1042 ) RETURNS VOID AS $func$
1045 ind_data metabib.field_entry_template%ROWTYPE;
1046 mbe_row metabib.browse_entry%ROWTYPE;
1049 b_skip_display BOOL;
1053 field_list INT[] := only_fields;
1054 field_types TEXT[] := '{}'::TEXT[];
1057 IF field_list = '{}'::INT[] THEN
1058 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
1061 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
1062 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
1063 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
1064 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
1066 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
1067 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
1068 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
1069 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
1071 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1073 IF NOT b_skip_search THEN
1074 FOR fclass IN SELECT * FROM config.metabib_class LOOP
1075 -- RAISE NOTICE 'Emptying out %', fclass.name;
1076 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
1079 IF NOT b_skip_facet THEN
1080 DELETE FROM metabib.facet_entry WHERE source = bib_id;
1082 IF NOT b_skip_display THEN
1083 DELETE FROM metabib.display_entry WHERE source = bib_id;
1085 IF NOT b_skip_browse THEN
1086 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
1090 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
1092 -- don't store what has been normalized away
1093 CONTINUE WHEN ind_data.value IS NULL;
1095 IF ind_data.field < 0 THEN
1096 ind_data.field = -1 * ind_data.field;
1099 IF ind_data.facet_field AND NOT b_skip_facet THEN
1100 INSERT INTO metabib.facet_entry (field, source, value)
1101 VALUES (ind_data.field, ind_data.source, ind_data.value);
1104 IF ind_data.display_field AND NOT b_skip_display THEN
1105 INSERT INTO metabib.display_entry (field, source, value)
1106 VALUES (ind_data.field, ind_data.source, ind_data.value);
1110 IF ind_data.browse_field AND NOT b_skip_browse THEN
1111 -- A caveat about this SELECT: this should take care of replacing
1112 -- old mbe rows when data changes, but not if normalization (by
1113 -- which I mean specifically the output of
1114 -- evergreen.oils_tsearch2()) changes. It may or may not be
1115 -- expensive to add a comparison of index_vector to index_vector
1116 -- to the WHERE clause below.
1118 CONTINUE WHEN ind_data.sort_value IS NULL;
1120 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
1121 SELECT INTO mbe_row * FROM metabib.browse_entry
1122 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
1125 mbe_id := mbe_row.id;
1127 INSERT INTO metabib.browse_entry
1128 ( value, sort_value ) VALUES
1129 ( value_prepped, ind_data.sort_value );
1131 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
1134 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
1135 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
1138 IF ind_data.search_field AND NOT b_skip_search THEN
1139 -- Avoid inserting duplicate rows
1140 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
1141 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
1142 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
1143 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
1144 IF mbe_id IS NULL THEN
1146 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
1148 quote_literal(ind_data.field) || $$, $$ ||
1149 quote_literal(ind_data.source) || $$, $$ ||
1150 quote_literal(ind_data.value) ||
1157 IF NOT b_skip_search THEN
1158 PERFORM metabib.update_combined_index_vectors(bib_id);
1163 $func$ LANGUAGE PLPGSQL;
1165 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
1167 auth authority.record_entry%ROWTYPE;
1168 output authority.full_rec%ROWTYPE;
1171 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
1173 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
1174 output.record := rid;
1175 output.ind1 := field.ind1;
1176 output.ind2 := field.ind2;
1177 output.tag := field.tag;
1178 output.subfield := field.subfield;
1179 output.value := field.value;
1184 $func$ LANGUAGE PLPGSQL;
1186 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
1188 bib biblio.record_entry%ROWTYPE;
1189 output metabib.full_rec%ROWTYPE;
1192 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
1194 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
1195 output.record := rid;
1196 output.ind1 := field.ind1;
1197 output.ind2 := field.ind2;
1198 output.tag := field.tag;
1199 output.subfield := field.subfield;
1200 output.value := field.value;
1205 $func$ LANGUAGE PLPGSQL;
1207 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1208 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1209 $func$ LANGUAGE SQL;
1211 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1212 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1213 $func$ LANGUAGE SQL;
1215 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1216 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
1217 $func$ LANGUAGE SQL;
1219 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1220 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1221 $func$ LANGUAGE SQL;
1223 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1236 IF marc IS NULL OR marc = '' THEN
1240 -- First, the count of tags
1241 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1243 -- now go through a bunch of pain to get the record type
1244 IF best_type IS NOT NULL THEN
1245 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1247 IF ldr IS NOT NULL THEN
1248 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1249 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1252 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1253 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1255 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1257 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1259 IF type_map.code IS NOT NULL THEN
1260 IF best_type = type_map.code THEN
1261 qual := qual + qual / 2;
1264 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1265 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1266 IF ff_tag_data = best_lang THEN
1274 -- Now look for some quality metrics
1276 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1281 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1288 $func$ LANGUAGE PLPGSQL;
1290 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1292 idx config.biblio_fingerprint%ROWTYPE;
1293 xfrm config.xml_transform%ROWTYPE;
1295 transformed_xml TEXT;
1297 xml_node_list TEXT[];
1299 output_text TEXT := '';
1302 IF marc IS NULL OR marc = '' THEN
1306 -- Loop over the indexing entries
1307 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1309 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1311 -- See if we can skip the XSLT ... it's expensive
1312 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1313 -- Can't skip the transform
1314 IF xfrm.xslt <> '---' THEN
1315 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1317 transformed_xml := marc;
1320 prev_xfrm := xfrm.name;
1323 raw_text := COALESCE(
1331 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1340 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1341 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1343 IF idx.first_word IS TRUE THEN
1344 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1347 output_text := output_text || idx.name || ':' ||
1348 REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
1352 RETURN BTRIM(output_text);
1355 $func$ LANGUAGE PLPGSQL;
1357 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1358 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1361 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1363 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1367 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1368 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1373 $func$ LANGUAGE PLPGSQL;
1375 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1377 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1379 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1381 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1382 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1386 $func$ LANGUAGE PLPGSQL;
1388 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1395 uri_owner_list TEXT[];
1403 -- Clear any URI mappings and call numbers for this bib.
1404 -- This leads to acn / auricnm inflation, but also enables
1405 -- old acn/auricnm's to go away and for bibs to be deleted.
1406 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1407 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1408 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1411 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1412 IF ARRAY_UPPER(uris,1) > 0 THEN
1413 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1414 -- First we pull info out of the 856
1417 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1418 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1419 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1421 IF uri_label IS NULL THEN
1422 uri_label := uri_href;
1424 CONTINUE WHEN uri_href IS NULL;
1426 -- Get the distinct list of libraries wanting to use
1428 DISTINCT REGEXP_REPLACE(
1430 $re$^.*?\((\w+)\).*$$re$,
1433 ) INTO uri_owner_list
1436 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1441 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1443 -- look for a matching uri
1444 IF uri_use IS NULL THEN
1445 SELECT id INTO uri_id
1447 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1448 ORDER BY id LIMIT 1;
1449 IF NOT FOUND THEN -- create one
1450 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1451 SELECT id INTO uri_id
1453 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1456 SELECT id INTO uri_id
1458 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1459 ORDER BY id LIMIT 1;
1460 IF NOT FOUND THEN -- create one
1461 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1462 SELECT id INTO uri_id
1464 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1468 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1469 uri_owner := uri_owner_list[j];
1471 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1472 CONTINUE WHEN NOT FOUND;
1474 -- we need a call number to link through
1475 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1477 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1478 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1479 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1482 -- now, link them if they're not already
1483 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1485 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1497 $func$ LANGUAGE PLPGSQL;
1499 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib(
1502 bib_is_deleted boolean DEFAULT false,
1503 retain_deleted boolean DEFAULT false
1504 ) RETURNS bigint AS $function$
1506 new_mapping BOOL := TRUE;
1509 tmp_mr metabib.metarecord%ROWTYPE;
1510 deleted_mrs BIGINT[];
1513 -- We need to make sure we're not a deleted master record of an MR
1514 IF bib_is_deleted THEN
1515 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1516 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1519 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1521 -- Now, are there any more sources on this MR?
1522 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1524 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1525 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1526 DELETE FROM metabib.metarecord WHERE id = old_mr;
1528 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1529 UPDATE metabib.metarecord
1531 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1536 ELSE -- insert or update
1538 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1540 -- Find the first fingerprint-matching
1541 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1542 old_mr := tmp_mr.id;
1543 new_mapping := FALSE;
1545 ELSE -- Our fingerprint changed ... maybe remove the old MR
1546 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1547 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1548 IF source_count = 0 THEN -- No other records
1549 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1550 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1556 -- we found no suitable, preexisting MR based on old source maps
1557 IF old_mr IS NULL THEN
1558 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1560 IF old_mr IS NULL THEN -- nope, create one and grab its id
1561 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1562 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1564 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1565 UPDATE metabib.metarecord
1567 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1571 ELSE -- there was one we already attached to, update its mods cache and master_record
1572 UPDATE metabib.metarecord
1574 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1579 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1584 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1585 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1591 $function$ LANGUAGE plpgsql;
1593 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1594 DELETE FROM authority.bib_linking WHERE bib = $1;
1595 INSERT INTO authority.bib_linking (bib, authority)
1598 FROM ( SELECT DISTINCT $1 AS bib,
1599 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1600 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1601 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1602 ) y JOIN authority.record_entry r ON r.id = y.authority;
1604 $func$ LANGUAGE SQL;
1606 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1608 transformed_xml TEXT;
1609 rmarc TEXT := prmarc;
1613 xfrm config.xml_transform%ROWTYPE;
1614 attr_vector INT[] := '{}'::INT[];
1615 attr_vector_tmp INT[];
1616 attr_list TEXT[] := pattr_list;
1618 norm_attr_value TEXT[];
1621 attr_def config.record_attr_definition%ROWTYPE;
1622 ccvm_row config.coded_value_map%ROWTYPE;
1626 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1627 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1630 fixed_field IS NOT NULL OR
1631 xpath IS NOT NULL OR
1632 phys_char_sf IS NOT NULL OR
1639 IF rmarc IS NULL THEN
1640 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1643 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1645 jump_past := FALSE; -- This gets set when we are non-multi and have found something
1646 attr_value := '{}'::TEXT[];
1647 norm_attr_value := '{}'::TEXT[];
1648 attr_vector_tmp := '{}'::INT[];
1650 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1652 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1653 SELECT ARRAY_AGG(value) INTO attr_value
1654 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1656 AND tag LIKE attr_def.tag
1658 WHEN attr_def.sf_list IS NOT NULL
1659 THEN POSITION(subfield IN attr_def.sf_list) > 0
1665 IF NOT attr_def.multi THEN
1666 attr_value := ARRAY[ARRAY_TO_STRING(attr_value, COALESCE(attr_def.joiner,' '))];
1671 IF NOT jump_past AND attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1672 attr_value := attr_value || vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1674 IF NOT attr_def.multi THEN
1675 attr_value := ARRAY[attr_value[1]];
1680 IF NOT jump_past AND attr_def.xpath IS NOT NULL THEN -- and xpath expression
1682 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1684 -- See if we can skip the XSLT ... it's expensive
1685 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1686 -- Can't skip the transform
1687 IF xfrm.xslt <> '---' THEN
1688 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1690 transformed_xml := rmarc;
1693 prev_xfrm := xfrm.name;
1696 IF xfrm.name IS NULL THEN
1697 -- just grab the marcxml (empty) transform
1698 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1699 prev_xfrm := xfrm.name;
1702 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1703 tmp_val := oils_xpath_string(
1706 COALESCE(attr_def.joiner,' '),
1707 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1709 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1710 attr_value := attr_value || tmp_val;
1711 EXIT WHEN NOT attr_def.multi;
1716 IF NOT jump_past AND attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1717 SELECT ARRAY_AGG(m.value) INTO tmp_array
1718 FROM vandelay.marc21_physical_characteristics(rmarc) v
1719 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1720 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1721 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1723 attr_value := attr_value || tmp_array;
1725 IF NOT attr_def.multi THEN
1726 attr_value := ARRAY[attr_value[1]];
1731 -- apply index normalizers to attr_value
1732 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1734 SELECT n.func AS func,
1735 n.param_count AS param_count,
1737 FROM config.index_normalizer n
1738 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1739 WHERE attr = attr_def.name
1741 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1742 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1744 WHEN normalizer.param_count > 0
1745 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1751 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1752 -- note that a string that contains only blanks
1753 -- is a valid value for some attributes
1754 norm_attr_value := norm_attr_value || tmp_val;
1758 IF attr_def.filter THEN
1759 -- Create unknown uncontrolled values and find the IDs of the values
1760 IF ccvm_row.id IS NULL THEN
1761 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1762 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1763 BEGIN -- use subtransaction to isolate unique constraint violations
1764 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1765 EXCEPTION WHEN unique_violation THEN END;
1769 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1771 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1774 -- Add the new value to the vector
1775 attr_vector := attr_vector || attr_vector_tmp;
1778 IF attr_def.sorter THEN
1779 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1780 IF norm_attr_value[1] IS NOT NULL THEN
1781 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1787 /* We may need to rewrite the vlist to contain
1788 the intersection of new values for requested
1789 attrs and old values for ignored attrs. To
1790 do this, we take the old attr vlist and
1791 subtract any values that are valid for the
1792 requested attrs, and then add back the new
1793 set of attr values. */
1795 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1796 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1797 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1798 attr_vector := attr_vector || attr_vector_tmp;
1801 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1802 -- attributes can depend on earlier ones.
1803 PERFORM metabib.compile_composite_attr_cache_init();
1804 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1806 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1808 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1809 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1811 IF attr_def.filter THEN
1812 IF attr_vector @@ tmp_val::query_int THEN
1813 attr_vector = attr_vector + intset(ccvm_row.id);
1814 EXIT WHEN NOT attr_def.multi;
1818 IF attr_def.sorter THEN
1819 IF attr_vector @@ tmp_val THEN
1820 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1821 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1829 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1830 IF rdeleted THEN -- initial insert OR revivication
1831 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1832 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1834 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1840 $func$ LANGUAGE PLPGSQL;
1843 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1844 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1849 IF NEW.deleted THEN -- If this bib is deleted
1851 PERFORM * FROM config.internal_flag WHERE
1852 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1854 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1856 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1858 IF NOT tmp_bool THEN
1859 -- One needs to keep these around to support searches
1860 -- with the #deleted modifier, so one should turn on the named
1861 -- internal flag for that functionality.
1862 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1865 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1866 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1867 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1868 RETURN NEW; -- and we're done
1871 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1872 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1874 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1879 -- Record authority linking
1880 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1882 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1885 -- Flatten and insert the mfr data
1886 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1888 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1890 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1891 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1893 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1897 -- Gather and insert the field entry data
1898 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1900 -- Located URI magic
1901 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1902 IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF;
1904 -- (re)map metarecord-bib linking
1905 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1906 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1908 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1910 ELSE -- we're doing an update, and we're not deleted, remap
1911 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1913 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1919 $func$ LANGUAGE PLPGSQL;
1921 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1927 SELECT n.func AS func,
1928 n.param_count AS param_count,
1930 FROM config.index_normalizer n
1931 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1932 WHERE m.field = mapped_field AND m.pos < 0
1935 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1936 quote_literal( facet_text ) ||
1938 WHEN normalizer.param_count > 0
1939 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1942 ')' INTO facet_text;
1949 $$ LANGUAGE PLPGSQL;
1952 -- This mimics a specific part of QueryParser, turning the first part of a
1953 -- classed search (search_class) into a set of classes and possibly fields.
1954 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1955 -- or "au" or "au|corporate|personal" or anything like that, where the first
1956 -- element of the list you get by separating on the "|" character is either
1957 -- a registered class (config.metabib_class) or an alias
1958 -- (config.metabib_search_alias), and the rest of any such elements are
1959 -- fields (config.metabib_field).
1961 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1962 RETURNS SETOF RECORD AS $func$
1964 search_parts TEXT[];
1966 search_part_count INTEGER;
1968 registered_class config.metabib_class%ROWTYPE;
1969 registered_alias config.metabib_search_alias%ROWTYPE;
1970 registered_field config.metabib_field%ROWTYPE;
1972 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1974 search_part_count := ARRAY_LENGTH(search_parts, 1);
1975 IF search_part_count = 0 THEN
1978 SELECT INTO registered_class
1979 * FROM config.metabib_class WHERE name = search_parts[1];
1981 IF search_part_count < 2 THEN -- all fields
1982 rec := (registered_class.name, NULL::INTEGER);
1986 FOR field_name IN SELECT *
1987 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1988 SELECT INTO registered_field
1989 * FROM config.metabib_field
1990 WHERE name = field_name AND
1991 field_class = registered_class.name;
1993 rec := (registered_class.name, registered_field.id);
1998 -- maybe we have an alias?
1999 SELECT INTO registered_alias
2000 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
2004 IF search_part_count < 2 THEN -- return w/e the alias says
2006 registered_alias.field_class, registered_alias.field
2011 FOR field_name IN SELECT *
2012 FROM UNNEST(search_parts[2:search_part_count]) LOOP
2013 SELECT INTO registered_field
2014 * FROM config.metabib_field
2015 WHERE name = field_name AND
2016 field_class = registered_alias.field_class;
2019 registered_alias.field_class,
2030 $func$ LANGUAGE PLPGSQL ROWS 1;
2033 -- Given a string such as a user might type into a search box, prepare
2034 -- two changed variants for TO_TSQUERY(). See
2035 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
2036 -- The first variant is normalized to match indexed documents regardless
2037 -- of diacritics. The second variant keeps its diacritics for proper
2038 -- highlighting via TS_HEADLINE().
2040 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
2043 orig_ended_in_space BOOLEAN;
2048 orig_ended_in_space := orig ~ E'\\s$';
2050 orig := ARRAY_TO_STRING(
2051 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
2054 normalized := public.naco_normalize(orig); -- also trim()s
2055 plain := trim(orig);
2057 IF NOT orig_ended_in_space THEN
2058 plain := plain || ':*';
2059 normalized := normalized || ':*';
2062 plain := ARRAY_TO_STRING(
2063 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
2065 normalized := ARRAY_TO_STRING(
2066 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
2069 RETURN ARRAY[normalized, plain];
2071 $$ LANGUAGE PLPGSQL;
2073 -- Functions metabib.browse, metabib.staged_browse, and metabib.suggest_browse_entries
2074 -- will be created later, after internal dependencies are resolved.
2076 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
2080 temp_vector TEXT := '';
2086 NEW.index_vector = ''::tsvector;
2088 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
2090 SELECT n.func AS func,
2091 n.param_count AS param_count,
2093 FROM config.index_normalizer n
2094 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
2095 WHERE field = NEW.field AND m.pos < 0
2097 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2098 quote_literal( value ) ||
2100 WHEN normalizer.param_count > 0
2101 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2111 SELECT n.func AS func,
2112 n.param_count AS param_count,
2114 FROM config.index_normalizer n
2115 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
2116 WHERE field = NEW.field AND m.pos >= 0
2118 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2119 quote_literal( value ) ||
2121 WHEN normalizer.param_count > 0
2122 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2130 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
2132 value := ARRAY_TO_STRING(
2133 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
2135 value := public.search_normalize(value);
2136 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2138 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
2141 SELECT DISTINCT m.ts_config, m.index_weight
2142 FROM config.metabib_class_ts_map m
2143 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
2144 LEFT JOIN config.coded_value_map ccvm ON (
2145 ccvm.ctype IN ('item_lang', 'language') AND
2146 ccvm.code = m.index_lang AND
2147 r.vlist @> intset(ccvm.id)
2149 WHERE m.field_class = TG_ARGV[0]
2151 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
2152 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2154 SELECT DISTINCT m.ts_config, m.index_weight
2155 FROM config.metabib_field_ts_map m
2156 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
2157 LEFT JOIN config.coded_value_map ccvm ON (
2158 ccvm.ctype IN ('item_lang', 'language') AND
2159 ccvm.code = m.index_lang AND
2160 r.vlist @> intset(ccvm.id)
2162 WHERE m.metabib_field = NEW.field
2164 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
2165 ORDER BY index_weight ASC
2169 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2170 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2174 cur_weight = ts_rec.index_weight;
2175 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2178 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2180 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2185 $$ LANGUAGE PLPGSQL;
2188 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2189 browse_entry BIGINT,
2194 sources INT, -- visible ones, that is
2195 asources INT, -- visible ones, that is
2196 row_number INT, -- internal use, sort of
2197 accurate BOOL, -- Count in sources field is accurate? Not
2198 -- if we had more than a browse superpage
2199 -- of records to look at.
2200 aaccurate BOOL, -- See previous comment...
2205 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2208 ) RETURNS BIGINT AS $p$
2210 FROM metabib.browse_entry mbe
2211 JOIN metabib.browse_entry_def_map mbedm ON (
2212 mbedm.entry = mbe.id
2213 AND mbedm.def = ANY($1)
2215 WHERE mbe.sort_value >= public.naco_normalize($2)
2216 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2217 $p$ LANGUAGE SQL STABLE;
2219 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2222 ) RETURNS BIGINT AS $p$
2224 FROM metabib.browse_entry mbe
2225 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2226 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2227 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2228 ash.atag = map.authority_field
2229 AND map.metabib_field = ANY($1)
2231 WHERE mbe.sort_value >= public.naco_normalize($2)
2232 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2233 $p$ LANGUAGE SQL STABLE;
2235 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2238 ) RETURNS BIGINT AS $p$
2240 FROM metabib.browse_entry mbe
2241 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2242 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2243 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2244 ash.atag = map.authority_field
2245 AND map.metabib_field = ANY($1)
2247 WHERE mbe.sort_value >= public.naco_normalize($2)
2248 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2249 $p$ LANGUAGE SQL STABLE;
2251 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2254 ) RETURNS BIGINT AS $p$
2255 SELECT id FROM metabib.browse_entry
2257 metabib.browse_bib_pivot($1, $2),
2258 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2260 ORDER BY sort_value, value LIMIT 1;
2261 $p$ LANGUAGE SQL STABLE;
2264 -- This function is used to help clean up facet labels. Due to quirks in
2265 -- MARC parsing, some facet labels may be generated with periods or commas
2266 -- at the end. This will strip a trailing commas off all the time, and
2267 -- periods when they don't look like they are part of initials.
2268 -- Smith, John => no change
2269 -- Smith, John, => Smith, John
2270 -- Smith, John. => Smith, John
2271 -- Public, John Q. => no change
2272 CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$
2278 last_char = substring(result from '.$');
2280 IF last_char = ',' THEN
2281 result := substring(result from '^(.*),$');
2283 ELSIF last_char = '.' THEN
2284 IF substring(result from ' \w\.$') IS NULL THEN
2285 result := substring(result from '^(.*)\.$');
2292 $$ language 'plpgsql';