2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.display_entry (
187 id BIGSERIAL PRIMARY KEY,
188 source BIGINT NOT NULL,
193 CREATE INDEX metabib_display_entry_field_idx
194 ON metabib.display_entry (field);
195 CREATE INDEX metabib_display_entry_source_idx
196 ON metabib.display_entry (source);
198 CREATE TABLE metabib.browse_entry (
199 id BIGSERIAL PRIMARY KEY,
201 index_vector tsvector,
202 sort_value TEXT NOT NULL,
203 UNIQUE(sort_value, value)
207 CREATE INDEX browse_entry_sort_value_idx
208 ON metabib.browse_entry USING BTREE (sort_value);
210 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
211 CREATE TRIGGER metabib_browse_entry_fti_trigger
212 BEFORE INSERT OR UPDATE ON metabib.browse_entry
213 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
216 CREATE TABLE metabib.browse_entry_def_map (
217 id BIGSERIAL PRIMARY KEY,
218 entry BIGINT REFERENCES metabib.browse_entry (id),
219 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
220 source BIGINT REFERENCES biblio.record_entry (id),
221 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
223 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
224 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
225 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
227 CREATE TABLE metabib.browse_entry_simple_heading_map (
228 id BIGSERIAL PRIMARY KEY,
229 entry BIGINT REFERENCES metabib.browse_entry (id),
230 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
232 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
233 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
235 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
236 RETURNS TRIGGER AS $$
239 display_field_text TEXT;
241 display_field_text := NEW.value;
244 SELECT n.func AS func,
245 n.param_count AS param_count,
247 FROM config.index_normalizer n
248 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
249 WHERE m.field = NEW.field AND m.pos < 0
252 EXECUTE 'SELECT ' || normalizer.func || '(' ||
253 quote_literal( display_field_text ) ||
255 WHEN normalizer.param_count > 0
256 THEN ',' || REPLACE(REPLACE(BTRIM(
257 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
260 ')' INTO display_field_text;
264 NEW.value = display_field_text;
270 CREATE TRIGGER display_field_normalize_tgr
271 BEFORE UPDATE OR INSERT ON metabib.display_entry
272 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
274 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
275 RETURNS TRIGGER AS $$
277 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
282 CREATE TRIGGER display_field_force_nfc_tgr
283 BEFORE UPDATE OR INSERT ON metabib.display_entry
284 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
287 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
292 facet_text := NEW.value;
295 SELECT n.func AS func,
296 n.param_count AS param_count,
298 FROM config.index_normalizer n
299 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
300 WHERE m.field = NEW.field AND m.pos < 0
303 EXECUTE 'SELECT ' || normalizer.func || '(' ||
304 quote_literal( facet_text ) ||
306 WHEN normalizer.param_count > 0
307 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
314 NEW.value = facet_text;
320 CREATE TRIGGER facet_normalize_tgr
321 BEFORE UPDATE OR INSERT ON metabib.facet_entry
322 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
324 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
326 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
331 CREATE TRIGGER facet_force_nfc_tgr
332 BEFORE UPDATE OR INSERT ON metabib.facet_entry
333 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
335 -- DECREMENTING serial starts at -1
336 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
338 CREATE TABLE metabib.uncontrolled_record_attr_value (
339 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
340 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
343 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
345 CREATE VIEW metabib.record_attr_id_map AS
346 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
348 SELECT c.id, c.ctype AS attr, c.code AS value
349 FROM config.coded_value_map c
350 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
352 CREATE VIEW metabib.composite_attr_id_map AS
353 SELECT c.id, c.ctype AS attr, c.code AS value
354 FROM config.coded_value_map c
355 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
357 CREATE VIEW metabib.full_attr_id_map AS
358 SELECT id, attr, value FROM metabib.record_attr_id_map
360 SELECT id, attr, value FROM metabib.composite_attr_id_map;
363 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_init () RETURNS BOOL AS $f$
364 $_SHARED{metabib_compile_composite_attr_cache} = {}
365 if ! exists $_SHARED{metabib_compile_composite_attr_cache};
366 return exists $_SHARED{metabib_compile_composite_attr_cache};
367 $f$ LANGUAGE PLPERLU;
369 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_disable () RETURNS BOOL AS $f$
370 delete $_SHARED{metabib_compile_composite_attr_cache};
371 return ! exists $_SHARED{metabib_compile_composite_attr_cache};
372 $f$ LANGUAGE PLPERLU;
374 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr_cache_invalidate () RETURNS BOOL AS $f$
375 SELECT metabib.compile_composite_attr_cache_disable() AND metabib.compile_composite_attr_cache_init();
378 CREATE OR REPLACE FUNCTION metabib.composite_attr_def_cache_inval_tgr () RETURNS TRIGGER AS $f$
380 PERFORM metabib.compile_composite_attr_cache_invalidate();
383 $f$ LANGUAGE PLPGSQL;
385 CREATE TRIGGER ccraed_cache_inval_tgr AFTER INSERT OR UPDATE OR DELETE ON config.composite_attr_entry_definition FOR EACH STATEMENT EXECUTE PROCEDURE metabib.composite_attr_def_cache_inval_tgr();
387 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
392 my $def = decode_json($json);
394 die("Composite attribute definition not supplied") unless $def;
396 my $_cache = (exists $_SHARED{metabib_compile_composite_attr_cache}) ? 1 : 0;
398 return $_SHARED{metabib_compile_composite_attr_cache}{$json}
399 if ($_cache && $_SHARED{metabib_compile_composite_attr_cache}{$json});
406 if (ref $d eq 'HASH') { # node or AND
407 if (exists $d->{_attr}) { # it is a node
408 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
409 my $id = spi_exec_prepared(
410 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
414 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
415 return '!' . recurse($$d{_not});
416 } else { # an AND list
417 @list = map { recurse($$d{$_}) } sort keys %$d;
419 } elsif (ref $d eq 'ARRAY') {
421 @list = map { recurse($_) } @$d;
424 @list = grep { defined && $_ ne '' } @list;
426 return '(' . join($j,@list) . ')' if @list;
430 my $val = recurse($def) || undef;
431 $_SHARED{metabib_compile_composite_attr_cache}{$json} = $val if $_cache;
434 $func$ IMMUTABLE LANGUAGE plperlu;
436 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
437 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
438 $func$ STRICT IMMUTABLE LANGUAGE SQL;
440 CREATE TABLE metabib.record_attr_vector_list (
441 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
442 vlist INT[] NOT NULL -- stores id from ccvm AND murav
444 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
446 /* This becomes a view, and we do sorters differently ...
447 CREATE TABLE metabib.record_attr (
448 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
449 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
451 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
452 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
453 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
457 CREATE TABLE metabib.record_sorter (
458 id BIGSERIAL PRIMARY KEY,
459 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
460 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
463 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
464 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
465 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
468 CREATE TYPE metabib.record_attr_type AS (
473 -- Back-compat view ... we're moving to an INTARRAY world
474 CREATE VIEW metabib.record_attr_flat AS
475 SELECT v.source AS id,
478 FROM metabib.record_attr_vector_list v
479 LEFT JOIN metabib.uncontrolled_record_attr_value m ON ( m.id = ANY( v.vlist ) )
481 SELECT v.source AS id,
484 FROM metabib.record_attr_vector_list v
485 LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) );
487 CREATE VIEW metabib.record_attr AS
488 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs
489 FROM metabib.record_attr_flat
490 WHERE attr IS NOT NULL
493 -- Back-back-compat view ... we use to live in an HSTORE world
494 CREATE TYPE metabib.rec_desc_type AS (
512 CREATE VIEW metabib.rec_descriptor AS
515 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
516 FROM metabib.record_attr;
518 -- Use a sequence that matches previous version, for easier upgrading.
519 CREATE SEQUENCE metabib.full_rec_id_seq;
521 CREATE TABLE metabib.real_full_rec (
522 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
523 record BIGINT NOT NULL,
524 tag CHAR(3) NOT NULL,
529 index_vector tsvector NOT NULL
531 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
533 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
534 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
535 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
536 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
537 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
538 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
539 CREATE INDEX metabib_full_rec_isxn_caseless_idx
540 ON metabib.real_full_rec (LOWER(value))
541 WHERE tag IN ('020', '022', '024');
542 -- This next index might fully supplant the one above, but leaving both for now.
543 -- (they are not too large)
544 -- The reason we need this index is to ensure that the query parser always
545 -- prefers this index over the simpler tag/subfield index, as this greatly
546 -- increases Vandelay overlay speed for these identifiers, especially when
547 -- a record has many of these fields (around > 4-6 seems like the cutoff
548 -- on at least one PG9.1 system)
549 -- A similar index could be added for other fields (e.g. 010), but one should
550 -- leave out the LOWER() in all other cases.
551 -- TODO: verify whether we can discard the non tag/subfield/substring version
552 -- above (metabib_full_rec_isxn_caseless_idx)
553 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
554 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
555 WHERE tag IN ('020', '022', '024');
558 CREATE TRIGGER metabib_full_rec_fti_trigger
559 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
560 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
562 CREATE OR REPLACE VIEW metabib.full_rec AS
569 SUBSTRING(value,1,1024) AS value,
571 FROM metabib.real_full_rec;
573 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
574 AS ON INSERT TO metabib.full_rec
576 INSERT INTO metabib.real_full_rec VALUES (
577 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
587 CREATE OR REPLACE RULE metabib_full_rec_update_rule
588 AS ON UPDATE TO metabib.full_rec
590 UPDATE metabib.real_full_rec SET
596 subfield = NEW.subfield,
598 index_vector = NEW.index_vector
601 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
602 AS ON DELETE TO metabib.full_rec
604 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
606 CREATE TABLE metabib.metarecord_source_map (
607 id BIGSERIAL PRIMARY KEY,
608 metarecord BIGINT NOT NULL,
609 source BIGINT NOT NULL
611 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
612 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
614 CREATE TYPE metabib.field_entry_template AS (
627 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
629 bib biblio.record_entry%ROWTYPE;
630 idx config.metabib_field%ROWTYPE;
631 xfrm config.xml_transform%ROWTYPE;
633 transformed_xml TEXT;
635 xml_node_list TEXT[];
642 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
644 authority_link BIGINT;
645 output_row metabib.field_entry_template%ROWTYPE;
648 -- Start out with no field-use bools set
649 output_row.browse_field = FALSE;
650 output_row.facet_field = FALSE;
651 output_row.display_field = FALSE;
652 output_row.search_field = FALSE;
655 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
657 -- Loop over the indexing entries
658 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
660 joiner := COALESCE(idx.joiner, default_joiner);
662 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
664 -- See if we can skip the XSLT ... it's expensive
665 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
666 -- Can't skip the transform
667 IF xfrm.xslt <> '---' THEN
668 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
670 transformed_xml := bib.marc;
673 prev_xfrm := xfrm.name;
676 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
679 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
680 CONTINUE WHEN xml_node !~ E'^\\s*<';
682 -- XXX much of this should be moved into oils_xpath_string...
683 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
684 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
685 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
686 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
690 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
692 IF raw_text IS NOT NULL THEN
693 raw_text := raw_text || joiner;
696 raw_text := COALESCE(raw_text,'') || curr_text;
698 -- autosuggest/metabib.browse_entry
699 IF idx.browse_field THEN
701 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
702 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
704 browse_text := curr_text;
707 IF idx.browse_sort_xpath IS NOT NULL AND
708 idx.browse_sort_xpath <> '' THEN
710 sort_value := oils_xpath_string(
711 idx.browse_sort_xpath, xml_node, joiner,
712 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
715 sort_value := browse_text;
718 output_row.field_class = idx.field_class;
719 output_row.field = idx.id;
720 output_row.source = rid;
721 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
722 output_row.sort_value :=
723 public.naco_normalize(sort_value);
725 output_row.authority := NULL;
727 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
728 authority_text := oils_xpath_string(
729 idx.authority_xpath, xml_node, joiner,
731 ARRAY[xfrm.prefix, xfrm.namespace_uri],
732 ARRAY['xlink','http://www.w3.org/1999/xlink']
736 IF authority_text ~ '^\d+$' THEN
737 authority_link := authority_text::BIGINT;
738 PERFORM * FROM authority.record_entry WHERE id = authority_link;
740 output_row.authority := authority_link;
746 output_row.browse_field = TRUE;
747 -- Returning browse rows with search_field = true for search+browse
748 -- configs allows us to retain granularity of being able to search
749 -- browse fields with "starts with" type operators (for example, for
750 -- titles of songs in music albums)
751 IF idx.search_field THEN
752 output_row.search_field = TRUE;
754 RETURN NEXT output_row;
755 output_row.browse_field = FALSE;
756 output_row.search_field = FALSE;
757 output_row.sort_value := NULL;
760 -- insert raw node text for faceting
761 IF idx.facet_field THEN
763 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
764 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
766 facet_text := curr_text;
769 output_row.field_class = idx.field_class;
770 output_row.field = -1 * idx.id;
771 output_row.source = rid;
772 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
774 output_row.facet_field = TRUE;
775 RETURN NEXT output_row;
776 output_row.facet_field = FALSE;
779 -- insert raw node text for display
780 IF idx.display_field THEN
782 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
783 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
785 display_text := curr_text;
788 output_row.field_class = idx.field_class;
789 output_row.field = -1 * idx.id;
790 output_row.source = rid;
791 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
793 output_row.display_field = TRUE;
794 RETURN NEXT output_row;
795 output_row.display_field = FALSE;
800 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
802 -- insert combined node text for searching
803 IF idx.search_field THEN
804 output_row.field_class = idx.field_class;
805 output_row.field = idx.id;
806 output_row.source = rid;
807 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
809 output_row.search_field = TRUE;
810 RETURN NEXT output_row;
811 output_row.search_field = FALSE;
818 $func$ LANGUAGE PLPGSQL;
820 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
822 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
823 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
824 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
825 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
826 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
827 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
828 FROM metabib.keyword_field_entry WHERE source = bib_id;
830 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
831 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
832 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
833 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
834 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
835 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
836 FROM metabib.title_field_entry WHERE source = bib_id;
838 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
839 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
840 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
841 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
842 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
843 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
844 FROM metabib.author_field_entry WHERE source = bib_id;
846 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
847 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
848 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
849 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
850 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
851 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
852 FROM metabib.subject_field_entry WHERE source = bib_id;
854 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
855 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
856 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
857 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
858 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
859 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
860 FROM metabib.series_field_entry WHERE source = bib_id;
862 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
863 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
864 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
865 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
866 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
867 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
868 FROM metabib.identifier_field_entry WHERE source = bib_id;
871 $func$ LANGUAGE PLPGSQL;
873 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
874 bib_id BIGINT, skip_facet BOOL DEFAULT FALSE,
875 skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE,
876 skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
879 ind_data metabib.field_entry_template%ROWTYPE;
880 mbe_row metabib.browse_entry%ROWTYPE;
889 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
890 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
891 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
892 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
894 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
896 IF NOT b_skip_search THEN
897 FOR fclass IN SELECT * FROM config.metabib_class LOOP
898 -- RAISE NOTICE 'Emptying out %', fclass.name;
899 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
902 IF NOT b_skip_facet THEN
903 DELETE FROM metabib.facet_entry WHERE source = bib_id;
905 IF NOT b_skip_display THEN
906 DELETE FROM metabib.display_entry WHERE source = bib_id;
908 IF NOT b_skip_browse THEN
909 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
913 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
915 -- don't store what has been normalized away
916 CONTINUE WHEN ind_data.value IS NULL;
918 IF ind_data.field < 0 THEN
919 ind_data.field = -1 * ind_data.field;
922 IF ind_data.facet_field AND NOT b_skip_facet THEN
923 INSERT INTO metabib.facet_entry (field, source, value)
924 VALUES (ind_data.field, ind_data.source, ind_data.value);
927 IF ind_data.display_field AND NOT b_skip_display THEN
928 INSERT INTO metabib.display_entry (field, source, value)
929 VALUES (ind_data.field, ind_data.source, ind_data.value);
933 IF ind_data.browse_field AND NOT b_skip_browse THEN
934 -- A caveat about this SELECT: this should take care of replacing
935 -- old mbe rows when data changes, but not if normalization (by
936 -- which I mean specifically the output of
937 -- evergreen.oils_tsearch2()) changes. It may or may not be
938 -- expensive to add a comparison of index_vector to index_vector
939 -- to the WHERE clause below.
941 CONTINUE WHEN ind_data.sort_value IS NULL;
943 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
944 SELECT INTO mbe_row * FROM metabib.browse_entry
945 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
948 mbe_id := mbe_row.id;
950 INSERT INTO metabib.browse_entry
951 ( value, sort_value ) VALUES
952 ( value_prepped, ind_data.sort_value );
954 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
957 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
958 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
961 IF ind_data.search_field AND NOT b_skip_search THEN
962 -- Avoid inserting duplicate rows
963 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
964 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
965 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
966 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
967 IF mbe_id IS NULL THEN
969 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
971 quote_literal(ind_data.field) || $$, $$ ||
972 quote_literal(ind_data.source) || $$, $$ ||
973 quote_literal(ind_data.value) ||
980 IF NOT b_skip_search THEN
981 PERFORM metabib.update_combined_index_vectors(bib_id);
986 $func$ LANGUAGE PLPGSQL;
988 -- default to a space joiner
989 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
990 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
993 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
995 auth authority.record_entry%ROWTYPE;
996 output authority.full_rec%ROWTYPE;
999 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
1001 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
1002 output.record := rid;
1003 output.ind1 := field.ind1;
1004 output.ind2 := field.ind2;
1005 output.tag := field.tag;
1006 output.subfield := field.subfield;
1007 output.value := field.value;
1012 $func$ LANGUAGE PLPGSQL;
1014 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
1016 bib biblio.record_entry%ROWTYPE;
1017 output metabib.full_rec%ROWTYPE;
1020 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
1022 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
1023 output.record := rid;
1024 output.ind1 := field.ind1;
1025 output.ind2 := field.ind2;
1026 output.tag := field.tag;
1027 output.subfield := field.subfield;
1028 output.value := field.value;
1033 $func$ LANGUAGE PLPGSQL;
1035 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1036 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1037 $func$ LANGUAGE SQL;
1039 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1040 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE );
1041 $func$ LANGUAGE SQL;
1043 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1044 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE );
1045 $func$ LANGUAGE SQL;
1047 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1048 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1049 $func$ LANGUAGE SQL;
1051 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1064 IF marc IS NULL OR marc = '' THEN
1068 -- First, the count of tags
1069 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1071 -- now go through a bunch of pain to get the record type
1072 IF best_type IS NOT NULL THEN
1073 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1075 IF ldr IS NOT NULL THEN
1076 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1077 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1080 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1081 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1083 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1085 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1087 IF type_map.code IS NOT NULL THEN
1088 IF best_type = type_map.code THEN
1089 qual := qual + qual / 2;
1092 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1093 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1094 IF ff_tag_data = best_lang THEN
1102 -- Now look for some quality metrics
1104 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1109 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1116 $func$ LANGUAGE PLPGSQL;
1118 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1120 idx config.biblio_fingerprint%ROWTYPE;
1121 xfrm config.xml_transform%ROWTYPE;
1123 transformed_xml TEXT;
1125 xml_node_list TEXT[];
1127 output_text TEXT := '';
1130 IF marc IS NULL OR marc = '' THEN
1134 -- Loop over the indexing entries
1135 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1137 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1139 -- See if we can skip the XSLT ... it's expensive
1140 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1141 -- Can't skip the transform
1142 IF xfrm.xslt <> '---' THEN
1143 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1145 transformed_xml := marc;
1148 prev_xfrm := xfrm.name;
1151 raw_text := COALESCE(
1159 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1168 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1169 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1171 IF idx.first_word IS TRUE THEN
1172 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1175 output_text := output_text || idx.name || ':' ||
1176 REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
1180 RETURN BTRIM(output_text);
1183 $func$ LANGUAGE PLPGSQL;
1185 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1186 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1189 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1191 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1195 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1196 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1201 $func$ LANGUAGE PLPGSQL;
1203 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1205 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1207 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1209 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1210 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1214 $func$ LANGUAGE PLPGSQL;
1216 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1223 uri_owner_list TEXT[];
1231 -- Clear any URI mappings and call numbers for this bib.
1232 -- This leads to acn / auricnm inflation, but also enables
1233 -- old acn/auricnm's to go away and for bibs to be deleted.
1234 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1235 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1236 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1239 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1240 IF ARRAY_UPPER(uris,1) > 0 THEN
1241 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1242 -- First we pull info out of the 856
1245 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1246 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1247 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1249 IF uri_label IS NULL THEN
1250 uri_label := uri_href;
1252 CONTINUE WHEN uri_href IS NULL;
1254 -- Get the distinct list of libraries wanting to use
1256 DISTINCT REGEXP_REPLACE(
1258 $re$^.*?\((\w+)\).*$$re$,
1261 ) INTO uri_owner_list
1264 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1269 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1271 -- look for a matching uri
1272 IF uri_use IS NULL THEN
1273 SELECT id INTO uri_id
1275 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1276 ORDER BY id LIMIT 1;
1277 IF NOT FOUND THEN -- create one
1278 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1279 SELECT id INTO uri_id
1281 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1284 SELECT id INTO uri_id
1286 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1287 ORDER BY id LIMIT 1;
1288 IF NOT FOUND THEN -- create one
1289 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1290 SELECT id INTO uri_id
1292 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1296 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1297 uri_owner := uri_owner_list[j];
1299 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1300 CONTINUE WHEN NOT FOUND;
1302 -- we need a call number to link through
1303 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1305 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1306 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1307 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1310 -- now, link them if they're not already
1311 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1313 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1325 $func$ LANGUAGE PLPGSQL;
1327 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$
1329 new_mapping BOOL := TRUE;
1332 tmp_mr metabib.metarecord%ROWTYPE;
1333 deleted_mrs BIGINT[];
1336 -- We need to make sure we're not a deleted master record of an MR
1337 IF bib_is_deleted THEN
1338 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
1340 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
1341 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
1344 -- Now, are there any more sources on this MR?
1345 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
1347 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
1348 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
1349 DELETE FROM metabib.metarecord WHERE id = old_mr;
1351 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
1352 UPDATE metabib.metarecord
1354 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1359 ELSE -- insert or update
1361 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1363 -- Find the first fingerprint-matching
1364 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
1365 old_mr := tmp_mr.id;
1366 new_mapping := FALSE;
1368 ELSE -- Our fingerprint changed ... maybe remove the old MR
1369 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
1370 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1371 IF source_count = 0 THEN -- No other records
1372 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1373 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1379 -- we found no suitable, preexisting MR based on old source maps
1380 IF old_mr IS NULL THEN
1381 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1383 IF old_mr IS NULL THEN -- nope, create one and grab its id
1384 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1385 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1387 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1388 UPDATE metabib.metarecord
1390 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1394 ELSE -- there was one we already attached to, update its mods cache and master_record
1395 UPDATE metabib.metarecord
1397 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
1402 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1407 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1408 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1414 $func$ LANGUAGE PLPGSQL;
1417 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1418 DELETE FROM authority.bib_linking WHERE bib = $1;
1419 INSERT INTO authority.bib_linking (bib, authority)
1422 FROM ( SELECT DISTINCT $1 AS bib,
1423 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1424 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1425 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1426 ) y JOIN authority.record_entry r ON r.id = y.authority;
1428 $func$ LANGUAGE SQL;
1430 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1432 transformed_xml TEXT;
1433 rmarc TEXT := prmarc;
1437 xfrm config.xml_transform%ROWTYPE;
1438 attr_vector INT[] := '{}'::INT[];
1439 attr_vector_tmp INT[];
1440 attr_list TEXT[] := pattr_list;
1442 norm_attr_value TEXT[];
1444 attr_def config.record_attr_definition%ROWTYPE;
1445 ccvm_row config.coded_value_map%ROWTYPE;
1448 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1449 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition
1452 fixed_field IS NOT NULL OR
1453 xpath IS NOT NULL OR
1454 phys_char_sf IS NOT NULL OR
1461 IF rmarc IS NULL THEN
1462 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1465 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1467 attr_value := '{}'::TEXT[];
1468 norm_attr_value := '{}'::TEXT[];
1469 attr_vector_tmp := '{}'::INT[];
1471 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1473 -- tag+sf attrs only support SVF
1474 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1475 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1476 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1478 AND tag LIKE attr_def.tag
1480 WHEN attr_def.sf_list IS NOT NULL
1481 THEN POSITION(subfield IN attr_def.sf_list) > 0
1488 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1489 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1491 IF NOT attr_def.multi THEN
1492 attr_value := ARRAY[attr_value[1]];
1495 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1497 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1499 -- See if we can skip the XSLT ... it's expensive
1500 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1501 -- Can't skip the transform
1502 IF xfrm.xslt <> '---' THEN
1503 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1505 transformed_xml := rmarc;
1508 prev_xfrm := xfrm.name;
1511 IF xfrm.name IS NULL THEN
1512 -- just grab the marcxml (empty) transform
1513 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1514 prev_xfrm := xfrm.name;
1517 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
1518 tmp_val := oils_xpath_string(
1521 COALESCE(attr_def.joiner,' '),
1522 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1524 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1525 attr_value := attr_value || tmp_val;
1526 EXIT WHEN NOT attr_def.multi;
1530 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1531 SELECT ARRAY_AGG(m.value) INTO attr_value
1532 FROM vandelay.marc21_physical_characteristics(rmarc) v
1533 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1534 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1535 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1537 IF NOT attr_def.multi THEN
1538 attr_value := ARRAY[attr_value[1]];
1543 -- apply index normalizers to attr_value
1544 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1546 SELECT n.func AS func,
1547 n.param_count AS param_count,
1549 FROM config.index_normalizer n
1550 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1551 WHERE attr = attr_def.name
1553 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1554 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1556 WHEN normalizer.param_count > 0
1557 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1563 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
1564 -- note that a string that contains only blanks
1565 -- is a valid value for some attributes
1566 norm_attr_value := norm_attr_value || tmp_val;
1570 IF attr_def.filter THEN
1571 -- Create unknown uncontrolled values and find the IDs of the values
1572 IF ccvm_row.id IS NULL THEN
1573 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1574 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1575 BEGIN -- use subtransaction to isolate unique constraint violations
1576 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1577 EXCEPTION WHEN unique_violation THEN END;
1581 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1583 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1586 -- Add the new value to the vector
1587 attr_vector := attr_vector || attr_vector_tmp;
1590 IF attr_def.sorter THEN
1591 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1592 IF norm_attr_value[1] IS NOT NULL THEN
1593 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1599 /* We may need to rewrite the vlist to contain
1600 the intersection of new values for requested
1601 attrs and old values for ignored attrs. To
1602 do this, we take the old attr vlist and
1603 subtract any values that are valid for the
1604 requested attrs, and then add back the new
1605 set of attr values. */
1607 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1608 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1609 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1610 attr_vector := attr_vector || attr_vector_tmp;
1613 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1614 -- attributes can depend on earlier ones.
1615 PERFORM metabib.compile_composite_attr_cache_init();
1616 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1618 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1620 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1621 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1623 IF attr_def.filter THEN
1624 IF attr_vector @@ tmp_val::query_int THEN
1625 attr_vector = attr_vector + intset(ccvm_row.id);
1626 EXIT WHEN NOT attr_def.multi;
1630 IF attr_def.sorter THEN
1631 IF attr_vector @@ tmp_val THEN
1632 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1633 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1641 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1642 IF rdeleted THEN -- initial insert OR revivication
1643 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1644 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1646 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1652 $func$ LANGUAGE PLPGSQL;
1655 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1656 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1661 IF NEW.deleted THEN -- If this bib is deleted
1663 PERFORM * FROM config.internal_flag WHERE
1664 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1666 tmp_bool := FOUND; -- Just in case this is changed by some other statement
1668 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
1670 IF NOT tmp_bool THEN
1671 -- One needs to keep these around to support searches
1672 -- with the #deleted modifier, so one should turn on the named
1673 -- internal flag for that functionality.
1674 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1677 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1678 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1679 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1680 RETURN NEW; -- and we're done
1683 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1684 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1686 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1691 -- Record authority linking
1692 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1694 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1697 -- Flatten and insert the mfr data
1698 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1700 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1702 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1703 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1705 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1709 -- Gather and insert the field entry data
1710 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1712 -- Located URI magic
1713 IF TG_OP = 'INSERT' THEN
1714 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1716 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1719 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1721 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1725 -- (re)map metarecord-bib linking
1726 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1727 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1729 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1731 ELSE -- we're doing an update, and we're not deleted, remap
1732 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1734 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1740 $func$ LANGUAGE PLPGSQL;
1742 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1748 SELECT n.func AS func,
1749 n.param_count AS param_count,
1751 FROM config.index_normalizer n
1752 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1753 WHERE m.field = mapped_field AND m.pos < 0
1756 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1757 quote_literal( facet_text ) ||
1759 WHEN normalizer.param_count > 0
1760 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1763 ')' INTO facet_text;
1770 $$ LANGUAGE PLPGSQL;
1773 -- This mimics a specific part of QueryParser, turning the first part of a
1774 -- classed search (search_class) into a set of classes and possibly fields.
1775 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1776 -- or "au" or "au|corporate|personal" or anything like that, where the first
1777 -- element of the list you get by separating on the "|" character is either
1778 -- a registered class (config.metabib_class) or an alias
1779 -- (config.metabib_search_alias), and the rest of any such elements are
1780 -- fields (config.metabib_field).
1782 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1783 RETURNS SETOF RECORD AS $func$
1785 search_parts TEXT[];
1787 search_part_count INTEGER;
1789 registered_class config.metabib_class%ROWTYPE;
1790 registered_alias config.metabib_search_alias%ROWTYPE;
1791 registered_field config.metabib_field%ROWTYPE;
1793 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1795 search_part_count := ARRAY_LENGTH(search_parts, 1);
1796 IF search_part_count = 0 THEN
1799 SELECT INTO registered_class
1800 * FROM config.metabib_class WHERE name = search_parts[1];
1802 IF search_part_count < 2 THEN -- all fields
1803 rec := (registered_class.name, NULL::INTEGER);
1807 FOR field_name IN SELECT *
1808 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1809 SELECT INTO registered_field
1810 * FROM config.metabib_field
1811 WHERE name = field_name AND
1812 field_class = registered_class.name;
1814 rec := (registered_class.name, registered_field.id);
1819 -- maybe we have an alias?
1820 SELECT INTO registered_alias
1821 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1825 IF search_part_count < 2 THEN -- return w/e the alias says
1827 registered_alias.field_class, registered_alias.field
1832 FOR field_name IN SELECT *
1833 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1834 SELECT INTO registered_field
1835 * FROM config.metabib_field
1836 WHERE name = field_name AND
1837 field_class = registered_alias.field_class;
1840 registered_alias.field_class,
1851 $func$ LANGUAGE PLPGSQL ROWS 1;
1854 -- Given a string such as a user might type into a search box, prepare
1855 -- two changed variants for TO_TSQUERY(). See
1856 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1857 -- The first variant is normalized to match indexed documents regardless
1858 -- of diacritics. The second variant keeps its diacritics for proper
1859 -- highlighting via TS_HEADLINE().
1861 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1864 orig_ended_in_space BOOLEAN;
1869 orig_ended_in_space := orig ~ E'\\s$';
1871 orig := ARRAY_TO_STRING(
1872 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1875 normalized := public.naco_normalize(orig); -- also trim()s
1876 plain := trim(orig);
1878 IF NOT orig_ended_in_space THEN
1879 plain := plain || ':*';
1880 normalized := normalized || ':*';
1883 plain := ARRAY_TO_STRING(
1884 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1886 normalized := ARRAY_TO_STRING(
1887 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1890 RETURN ARRAY[normalized, plain];
1892 $$ LANGUAGE PLPGSQL;
1894 -- Functions metabib.browse, metabib.staged_browse, and metabib.suggest_browse_entries
1895 -- will be created later, after internal dependencies are resolved.
1897 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1901 temp_vector TEXT := '';
1907 NEW.index_vector = ''::tsvector;
1909 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1911 SELECT n.func AS func,
1912 n.param_count AS param_count,
1914 FROM config.index_normalizer n
1915 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1916 WHERE field = NEW.field AND m.pos < 0
1918 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1919 quote_literal( value ) ||
1921 WHEN normalizer.param_count > 0
1922 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1932 SELECT n.func AS func,
1933 n.param_count AS param_count,
1935 FROM config.index_normalizer n
1936 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1937 WHERE field = NEW.field AND m.pos >= 0
1939 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1940 quote_literal( value ) ||
1942 WHEN normalizer.param_count > 0
1943 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1951 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
1953 value := ARRAY_TO_STRING(
1954 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
1956 value := public.search_normalize(value);
1957 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
1959 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1962 SELECT DISTINCT m.ts_config, m.index_weight
1963 FROM config.metabib_class_ts_map m
1964 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1965 LEFT JOIN config.coded_value_map ccvm ON (
1966 ccvm.ctype IN ('item_lang', 'language') AND
1967 ccvm.code = m.index_lang AND
1968 r.vlist @> intset(ccvm.id)
1970 WHERE m.field_class = TG_ARGV[0]
1972 AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field))
1973 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1975 SELECT DISTINCT m.ts_config, m.index_weight
1976 FROM config.metabib_field_ts_map m
1977 LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source)
1978 LEFT JOIN config.coded_value_map ccvm ON (
1979 ccvm.ctype IN ('item_lang', 'language') AND
1980 ccvm.code = m.index_lang AND
1981 r.vlist @> intset(ccvm.id)
1983 WHERE m.metabib_field = NEW.field
1985 AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL)
1986 ORDER BY index_weight ASC
1990 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
1991 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
1995 cur_weight = ts_rec.index_weight;
1996 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
1999 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2001 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2006 $$ LANGUAGE PLPGSQL;
2009 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2010 browse_entry BIGINT,
2015 sources INT, -- visible ones, that is
2016 asources INT, -- visible ones, that is
2017 row_number INT, -- internal use, sort of
2018 accurate BOOL, -- Count in sources field is accurate? Not
2019 -- if we had more than a browse superpage
2020 -- of records to look at.
2021 aaccurate BOOL, -- See previous comment...
2026 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2029 ) RETURNS BIGINT AS $p$
2031 FROM metabib.browse_entry mbe
2032 JOIN metabib.browse_entry_def_map mbedm ON (
2033 mbedm.entry = mbe.id
2034 AND mbedm.def = ANY($1)
2036 WHERE mbe.sort_value >= public.naco_normalize($2)
2037 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2038 $p$ LANGUAGE SQL STABLE;
2040 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2043 ) RETURNS BIGINT AS $p$
2045 FROM metabib.browse_entry mbe
2046 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2047 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2048 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2049 ash.atag = map.authority_field
2050 AND map.metabib_field = ANY($1)
2052 WHERE mbe.sort_value >= public.naco_normalize($2)
2053 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2054 $p$ LANGUAGE SQL STABLE;
2056 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2059 ) RETURNS BIGINT AS $p$
2061 FROM metabib.browse_entry mbe
2062 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2063 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2064 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2065 ash.atag = map.authority_field
2066 AND map.metabib_field = ANY($1)
2068 WHERE mbe.sort_value >= public.naco_normalize($2)
2069 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2070 $p$ LANGUAGE SQL STABLE;
2072 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2075 ) RETURNS BIGINT AS $p$
2076 SELECT id FROM metabib.browse_entry
2078 metabib.browse_bib_pivot($1, $2),
2079 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2081 ORDER BY sort_value, value LIMIT 1;
2082 $p$ LANGUAGE SQL STABLE;
2085 -- This function is used to help clean up facet labels. Due to quirks in
2086 -- MARC parsing, some facet labels may be generated with periods or commas
2087 -- at the end. This will strip a trailing commas off all the time, and
2088 -- periods when they don't look like they are part of initials.
2089 -- Smith, John => no change
2090 -- Smith, John, => Smith, John
2091 -- Smith, John. => Smith, John
2092 -- Public, John Q. => no change
2093 CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$
2099 last_char = substring(result from '.$');
2101 IF last_char = ',' THEN
2102 result := substring(result from '^(.*),$');
2104 ELSIF last_char = '.' THEN
2105 IF substring(result from ' \w\.$') IS NULL THEN
2106 result := substring(result from '^(.*)\.$');
2113 $$ language 'plpgsql';