2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.browse_entry (
187 id BIGSERIAL PRIMARY KEY,
189 index_vector tsvector,
190 sort_value TEXT NOT NULL,
191 UNIQUE(sort_value, value)
195 CREATE INDEX browse_entry_sort_value_idx
196 ON metabib.browse_entry USING BTREE (sort_value);
198 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
199 CREATE TRIGGER metabib_browse_entry_fti_trigger
200 BEFORE INSERT OR UPDATE ON metabib.browse_entry
201 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
204 CREATE TABLE metabib.browse_entry_def_map (
205 id BIGSERIAL PRIMARY KEY,
206 entry BIGINT REFERENCES metabib.browse_entry (id),
207 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
208 source BIGINT REFERENCES biblio.record_entry (id),
209 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
211 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
212 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
213 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
215 CREATE TABLE metabib.browse_entry_simple_heading_map (
216 id BIGSERIAL PRIMARY KEY,
217 entry BIGINT REFERENCES metabib.browse_entry (id),
218 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
220 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
221 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
223 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
228 facet_text := NEW.value;
231 SELECT n.func AS func,
232 n.param_count AS param_count,
234 FROM config.index_normalizer n
235 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
236 WHERE m.field = NEW.field AND m.pos < 0
239 EXECUTE 'SELECT ' || normalizer.func || '(' ||
240 quote_literal( facet_text ) ||
242 WHEN normalizer.param_count > 0
243 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
250 NEW.value = facet_text;
256 CREATE TRIGGER facet_normalize_tgr
257 BEFORE UPDATE OR INSERT ON metabib.facet_entry
258 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
260 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
262 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
267 CREATE TRIGGER facet_force_nfc_tgr
268 BEFORE UPDATE OR INSERT ON metabib.facet_entry
269 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
271 -- DECREMENTING serial starts at -1
272 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
274 CREATE TABLE metabib.uncontrolled_record_attr_value (
275 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
276 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
279 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
281 CREATE VIEW metabib.record_attr_id_map AS
282 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
284 SELECT c.id, c.ctype AS attr, c.code AS value
285 FROM config.coded_value_map c
286 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
288 CREATE VIEW metabib.composite_attr_id_map AS
289 SELECT c.id, c.ctype AS attr, c.code AS value
290 FROM config.coded_value_map c
291 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
293 CREATE VIEW metabib.full_attr_id_map AS
294 SELECT id, attr, value FROM metabib.record_attr_id_map
296 SELECT id, attr, value FROM metabib.composite_attr_id_map;
299 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_def TEXT ) RETURNS query_int AS $func$
302 my $def = decode_json(shift);
304 die("Composite attribute definition not supplied") unless $def;
311 if (ref $d eq 'HASH') { # node or AND
312 if (exists $d->{_attr}) { # it is a node
313 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
314 return spi_exec_prepared(
315 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
318 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
319 return '!' . recurse($$d{_not});
320 } else { # an AND list
321 @list = map { recurse($$d{$_}) } sort keys %$d;
323 } elsif (ref $d eq 'ARRAY') {
325 @list = map { recurse($_) } @$d;
328 @list = grep { defined && $_ ne '' } @list;
330 return '(' . join($j,@list) . ')' if @list;
334 return recurse($def) || undef;
336 $func$ IMMUTABLE LANGUAGE plperlu;
338 CREATE OR REPLACE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
339 SELECT metabib.compile_composite_attr(definition) FROM config.composite_attr_entry_definition WHERE coded_value = $1;
340 $func$ STRICT IMMUTABLE LANGUAGE SQL;
342 CREATE TABLE metabib.record_attr_vector_list (
343 source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
344 vlist INT[] NOT NULL -- stores id from ccvm AND murav
346 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
348 /* This becomes a view, and we do sorters differently ...
349 CREATE TABLE metabib.record_attr (
350 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
351 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
353 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
354 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
355 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
359 CREATE TABLE metabib.record_sorter (
360 id BIGSERIAL PRIMARY KEY,
361 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
362 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
365 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
366 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
367 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
370 CREATE TYPE metabib.record_attr_type AS (
375 -- Back-compat view ... we're moving to an INTARRAY world
376 CREATE VIEW metabib.record_attr_flat AS
377 SELECT v.source AS id,
380 FROM metabib.full_attr_id_map m
381 JOIN metabib.record_attr_vector_list v ON ( m.id = ANY( v.vlist ) );
383 CREATE VIEW metabib.record_attr AS
384 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs FROM metabib.record_attr_flat GROUP BY 1;
386 -- Back-back-compat view ... we use to live in an HSTORE world
387 CREATE TYPE metabib.rec_desc_type AS (
405 CREATE VIEW metabib.rec_descriptor AS
408 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
409 FROM metabib.record_attr;
411 -- Use a sequence that matches previous version, for easier upgrading.
412 CREATE SEQUENCE metabib.full_rec_id_seq;
414 CREATE TABLE metabib.real_full_rec (
415 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
416 record BIGINT NOT NULL,
417 tag CHAR(3) NOT NULL,
422 index_vector tsvector NOT NULL
424 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
426 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
427 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
428 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
429 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
430 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
431 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
432 CREATE INDEX metabib_full_rec_isxn_caseless_idx
433 ON metabib.real_full_rec (LOWER(value))
434 WHERE tag IN ('020', '022', '024');
435 -- This next index might fully supplant the one above, but leaving both for now.
436 -- (they are not too large)
437 -- The reason we need this index is to ensure that the query parser always
438 -- prefers this index over the simpler tag/subfield index, as this greatly
439 -- increases Vandelay overlay speed for these identifiers, especially when
440 -- a record has many of these fields (around > 4-6 seems like the cutoff
441 -- on at least one PG9.1 system)
442 -- A similar index could be added for other fields (e.g. 010), but one should
443 -- leave out the LOWER() in all other cases.
444 -- TODO: verify whether we can discard the non tag/subfield/substring version
445 -- above (metabib_full_rec_isxn_caseless_idx)
446 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
447 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
448 WHERE tag IN ('020', '022', '024');
451 CREATE TRIGGER metabib_full_rec_fti_trigger
452 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
453 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
455 CREATE OR REPLACE VIEW metabib.full_rec AS
462 SUBSTRING(value,1,1024) AS value,
464 FROM metabib.real_full_rec;
466 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
467 AS ON INSERT TO metabib.full_rec
469 INSERT INTO metabib.real_full_rec VALUES (
470 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
480 CREATE OR REPLACE RULE metabib_full_rec_update_rule
481 AS ON UPDATE TO metabib.full_rec
483 UPDATE metabib.real_full_rec SET
489 subfield = NEW.subfield,
491 index_vector = NEW.index_vector
494 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
495 AS ON DELETE TO metabib.full_rec
497 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
499 CREATE TABLE metabib.metarecord_source_map (
500 id BIGSERIAL PRIMARY KEY,
501 metarecord BIGINT NOT NULL,
502 source BIGINT NOT NULL
504 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
505 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
507 CREATE TYPE metabib.field_entry_template AS (
520 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
522 bib biblio.record_entry%ROWTYPE;
523 idx config.metabib_field%ROWTYPE;
524 xfrm config.xml_transform%ROWTYPE;
526 transformed_xml TEXT;
528 xml_node_list TEXT[];
534 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
536 authority_link BIGINT;
537 output_row metabib.field_entry_template%ROWTYPE;
540 -- Start out with no field-use bools set
541 output_row.browse_field = FALSE;
542 output_row.facet_field = FALSE;
543 output_row.search_field = FALSE;
546 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
548 -- Loop over the indexing entries
549 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
551 joiner := COALESCE(idx.joiner, default_joiner);
553 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
555 -- See if we can skip the XSLT ... it's expensive
556 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
557 -- Can't skip the transform
558 IF xfrm.xslt <> '---' THEN
559 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
561 transformed_xml := bib.marc;
564 prev_xfrm := xfrm.name;
567 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
570 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
571 CONTINUE WHEN xml_node !~ E'^\\s*<';
573 -- XXX much of this should be moved into oils_xpath_string...
574 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
575 oils_xpath( '//text()',
577 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
578 REGEXP_REPLACE( -- This escapes embeded <s
580 $re$(>[^<]+)(<)([^>]+<)$re$,
596 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
598 IF raw_text IS NOT NULL THEN
599 raw_text := raw_text || joiner;
602 raw_text := COALESCE(raw_text,'') || curr_text;
604 -- autosuggest/metabib.browse_entry
605 IF idx.browse_field THEN
607 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
608 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
610 browse_text := curr_text;
613 IF idx.browse_sort_xpath IS NOT NULL AND
614 idx.browse_sort_xpath <> '' THEN
616 sort_value := oils_xpath_string(
617 idx.browse_sort_xpath, xml_node, joiner,
618 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
621 sort_value := browse_text;
624 output_row.field_class = idx.field_class;
625 output_row.field = idx.id;
626 output_row.source = rid;
627 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
628 output_row.sort_value :=
629 public.naco_normalize(sort_value);
631 output_row.authority := NULL;
633 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
634 authority_text := oils_xpath_string(
635 idx.authority_xpath, xml_node, joiner,
637 ARRAY[xfrm.prefix, xfrm.namespace_uri],
638 ARRAY['xlink','http://www.w3.org/1999/xlink']
642 IF authority_text ~ '^\d+$' THEN
643 authority_link := authority_text::BIGINT;
644 PERFORM * FROM authority.record_entry WHERE id = authority_link;
646 output_row.authority := authority_link;
652 output_row.browse_field = TRUE;
653 -- Returning browse rows with search_field = true for search+browse
654 -- configs allows us to retain granularity of being able to search
655 -- browse fields with "starts with" type operators (for example, for
656 -- titles of songs in music albums)
657 IF idx.search_field THEN
658 output_row.search_field = TRUE;
660 RETURN NEXT output_row;
661 output_row.browse_field = FALSE;
662 output_row.search_field = FALSE;
663 output_row.sort_value := NULL;
666 -- insert raw node text for faceting
667 IF idx.facet_field THEN
669 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
670 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
672 facet_text := curr_text;
675 output_row.field_class = idx.field_class;
676 output_row.field = -1 * idx.id;
677 output_row.source = rid;
678 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
680 output_row.facet_field = TRUE;
681 RETURN NEXT output_row;
682 output_row.facet_field = FALSE;
687 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
689 -- insert combined node text for searching
690 IF idx.search_field THEN
691 output_row.field_class = idx.field_class;
692 output_row.field = idx.id;
693 output_row.source = rid;
694 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
696 output_row.search_field = TRUE;
697 RETURN NEXT output_row;
698 output_row.search_field = FALSE;
705 $func$ LANGUAGE PLPGSQL;
707 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
709 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
710 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
711 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
712 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
713 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
714 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
715 FROM metabib.keyword_field_entry WHERE source = bib_id;
717 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
718 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
719 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
720 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
721 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
722 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
723 FROM metabib.title_field_entry WHERE source = bib_id;
725 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
726 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
727 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
728 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
729 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
730 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
731 FROM metabib.author_field_entry WHERE source = bib_id;
733 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
734 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
735 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
736 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
737 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
738 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
739 FROM metabib.subject_field_entry WHERE source = bib_id;
741 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
742 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
743 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
744 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
745 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
746 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
747 FROM metabib.series_field_entry WHERE source = bib_id;
749 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
750 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
751 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
752 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
753 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
754 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
755 FROM metabib.identifier_field_entry WHERE source = bib_id;
758 $func$ LANGUAGE PLPGSQL;
760 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
763 ind_data metabib.field_entry_template%ROWTYPE;
764 mbe_row metabib.browse_entry%ROWTYPE;
772 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
773 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
774 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
776 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
778 IF NOT b_skip_search THEN
779 FOR fclass IN SELECT * FROM config.metabib_class LOOP
780 -- RAISE NOTICE 'Emptying out %', fclass.name;
781 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
784 IF NOT b_skip_facet THEN
785 DELETE FROM metabib.facet_entry WHERE source = bib_id;
787 IF NOT b_skip_browse THEN
788 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
792 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
793 IF ind_data.field < 0 THEN
794 ind_data.field = -1 * ind_data.field;
797 IF ind_data.facet_field AND NOT b_skip_facet THEN
798 INSERT INTO metabib.facet_entry (field, source, value)
799 VALUES (ind_data.field, ind_data.source, ind_data.value);
802 IF ind_data.browse_field AND NOT b_skip_browse THEN
803 -- A caveat about this SELECT: this should take care of replacing
804 -- old mbe rows when data changes, but not if normalization (by
805 -- which I mean specifically the output of
806 -- evergreen.oils_tsearch2()) changes. It may or may not be
807 -- expensive to add a comparison of index_vector to index_vector
808 -- to the WHERE clause below.
810 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
811 SELECT INTO mbe_row * FROM metabib.browse_entry
812 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
815 mbe_id := mbe_row.id;
817 INSERT INTO metabib.browse_entry
818 ( value, sort_value ) VALUES
819 ( value_prepped, ind_data.sort_value );
821 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
824 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
825 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
828 IF ind_data.search_field AND NOT b_skip_search THEN
829 -- Avoid inserting duplicate rows
830 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
831 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
832 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
833 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
834 IF mbe_id IS NULL THEN
836 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
838 quote_literal(ind_data.field) || $$, $$ ||
839 quote_literal(ind_data.source) || $$, $$ ||
840 quote_literal(ind_data.value) ||
847 IF NOT b_skip_search THEN
848 PERFORM metabib.update_combined_index_vectors(bib_id);
853 $func$ LANGUAGE PLPGSQL;
855 -- default to a space joiner
856 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
857 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
860 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
862 auth authority.record_entry%ROWTYPE;
863 output authority.full_rec%ROWTYPE;
866 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
868 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
869 output.record := rid;
870 output.ind1 := field.ind1;
871 output.ind2 := field.ind2;
872 output.tag := field.tag;
873 output.subfield := field.subfield;
874 output.value := field.value;
879 $func$ LANGUAGE PLPGSQL;
881 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
883 bib biblio.record_entry%ROWTYPE;
884 output metabib.full_rec%ROWTYPE;
887 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
889 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
890 output.record := rid;
891 output.ind1 := field.ind1;
892 output.ind2 := field.ind2;
893 output.tag := field.tag;
894 output.subfield := field.subfield;
895 output.value := field.value;
900 $func$ LANGUAGE PLPGSQL;
902 CREATE OR REPLACE FUNCTION vandelay.marc21_record_type( marc TEXT ) RETURNS config.marc21_rec_type_map AS $func$
909 retval config.marc21_rec_type_map%ROWTYPE;
911 ldr := oils_xpath_string( '//*[local-name()="leader"]', marc );
913 IF ldr IS NULL OR ldr = '' THEN
914 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
918 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
919 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
922 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
923 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
925 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
927 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
930 IF retval.code IS NULL THEN
931 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
936 $func$ LANGUAGE PLPGSQL;
938 CREATE OR REPLACE FUNCTION biblio.marc21_record_type( rid BIGINT ) RETURNS config.marc21_rec_type_map AS $func$
939 SELECT * FROM vandelay.marc21_record_type( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
942 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field_list( marc TEXT, ff TEXT ) RETURNS TEXT[] AS $func$
948 collection TEXT[] := '{}'::TEXT[];
950 rtype := (vandelay.marc21_record_type( marc )).code;
951 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP
952 IF ff_pos.tag = 'ldr' THEN
953 val := oils_xpath_string('//*[local-name()="leader"]', marc);
954 IF val IS NOT NULL THEN
955 val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length );
956 collection := collection || val;
959 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
960 val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
961 collection := collection || val;
964 val := REPEAT( ff_pos.default_val, ff_pos.length );
965 collection := collection || val;
970 $func$ LANGUAGE PLPGSQL;
972 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT ) RETURNS TEXT AS $func$
979 rtype := (vandelay.marc21_record_type( marc )).code;
980 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP
981 IF ff_pos.tag = 'ldr' THEN
982 val := oils_xpath_string('//*[local-name()="leader"]', marc);
983 IF val IS NOT NULL THEN
984 val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length );
988 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
989 val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
993 val := REPEAT( ff_pos.default_val, ff_pos.length );
999 $func$ LANGUAGE PLPGSQL;
1001 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1002 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 );
1003 $func$ LANGUAGE SQL;
1005 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1006 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 );
1007 $func$ LANGUAGE SQL;
1009 -- CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT);
1010 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT ) RETURNS SETOF biblio.record_ff_map AS $func$
1015 output biblio.record_ff_map%ROWTYPE;
1017 rtype := (vandelay.marc21_record_type( marc )).code;
1019 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP
1020 output.ff_name := ff_pos.fixed_field;
1021 output.ff_value := NULL;
1023 IF ff_pos.tag = 'ldr' THEN
1024 output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc);
1025 IF output.ff_value IS NOT NULL THEN
1026 output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length );
1028 output.ff_value := NULL;
1031 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
1032 output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length );
1033 IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF;
1035 output.ff_value := NULL;
1043 $func$ LANGUAGE PLPGSQL;
1045 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1046 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1047 $func$ LANGUAGE SQL;
1049 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1050 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1051 $func$ LANGUAGE SQL;
1053 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1066 IF marc IS NULL OR marc = '' THEN
1070 -- First, the count of tags
1071 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1073 -- now go through a bunch of pain to get the record type
1074 IF best_type IS NOT NULL THEN
1075 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1077 IF ldr IS NOT NULL THEN
1078 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1079 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1082 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1083 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1085 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1087 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1089 IF type_map.code IS NOT NULL THEN
1090 IF best_type = type_map.code THEN
1091 qual := qual + qual / 2;
1094 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1095 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1096 IF ff_tag_data = best_lang THEN
1104 -- Now look for some quality metrics
1106 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1111 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1118 $func$ LANGUAGE PLPGSQL;
1120 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1122 idx config.biblio_fingerprint%ROWTYPE;
1123 xfrm config.xml_transform%ROWTYPE;
1125 transformed_xml TEXT;
1127 xml_node_list TEXT[];
1129 output_text TEXT := '';
1132 IF marc IS NULL OR marc = '' THEN
1136 -- Loop over the indexing entries
1137 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1139 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1141 -- See if we can skip the XSLT ... it's expensive
1142 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1143 -- Can't skip the transform
1144 IF xfrm.xslt <> '---' THEN
1145 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1147 transformed_xml := marc;
1150 prev_xfrm := xfrm.name;
1153 raw_text := COALESCE(
1161 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1170 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1171 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1173 IF idx.first_word IS TRUE THEN
1174 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1177 output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
1184 $func$ LANGUAGE PLPGSQL;
1186 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1187 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1190 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1192 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1196 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1197 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1202 $func$ LANGUAGE PLPGSQL;
1204 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1206 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1208 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1210 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1211 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1215 $func$ LANGUAGE PLPGSQL;
1217 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1224 uri_owner_list TEXT[];
1232 -- Clear any URI mappings and call numbers for this bib.
1233 -- This leads to acn / auricnm inflation, but also enables
1234 -- old acn/auricnm's to go away and for bibs to be deleted.
1235 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1236 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1237 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1240 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1241 IF ARRAY_UPPER(uris,1) > 0 THEN
1242 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1243 -- First we pull info out of the 856
1246 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1247 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1248 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1250 IF uri_label IS NULL THEN
1251 uri_label := uri_href;
1253 CONTINUE WHEN uri_href IS NULL;
1255 -- Get the distinct list of libraries wanting to use
1257 DISTINCT REGEXP_REPLACE(
1259 $re$^.*?\((\w+)\).*$$re$,
1262 ) INTO uri_owner_list
1265 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1270 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1272 -- look for a matching uri
1273 IF uri_use IS NULL THEN
1274 SELECT id INTO uri_id
1276 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1277 ORDER BY id LIMIT 1;
1278 IF NOT FOUND THEN -- create one
1279 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1280 SELECT id INTO uri_id
1282 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1285 SELECT id INTO uri_id
1287 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1288 ORDER BY id LIMIT 1;
1289 IF NOT FOUND THEN -- create one
1290 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1291 SELECT id INTO uri_id
1293 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1297 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1298 uri_owner := uri_owner_list[j];
1300 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1301 CONTINUE WHEN NOT FOUND;
1303 -- we need a call number to link through
1304 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1306 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1307 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1308 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1311 -- now, link them if they're not already
1312 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1314 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1326 $func$ LANGUAGE PLPGSQL;
1328 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT ) RETURNS BIGINT AS $func$
1332 tmp_mr metabib.metarecord%ROWTYPE;
1333 deleted_mrs BIGINT[];
1336 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id; -- Rid ourselves of the search-estimate-killing linkage
1338 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1340 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN -- Find the first fingerprint-matching
1341 old_mr := tmp_mr.id;
1343 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1344 IF source_count = 0 THEN -- No other records
1345 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1346 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1352 IF old_mr IS NULL THEN -- we found no suitable, preexisting MR based on old source maps
1353 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1354 IF old_mr IS NULL THEN -- nope, create one and grab its id
1355 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1356 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1357 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1358 UPDATE metabib.metarecord
1360 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1)
1363 ELSE -- there was one we already attached to, update its mods cache and master_record
1364 UPDATE metabib.metarecord
1366 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1)
1370 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1372 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1373 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1379 $func$ LANGUAGE PLPGSQL;
1381 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1382 DELETE FROM authority.bib_linking WHERE bib = $1;
1383 INSERT INTO authority.bib_linking (bib, authority)
1386 FROM ( SELECT DISTINCT $1 AS bib,
1387 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1388 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1389 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1390 ) y JOIN authority.record_entry r ON r.id = y.authority;
1392 $func$ LANGUAGE SQL;
1394 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1396 transformed_xml TEXT;
1397 rmarc TEXT := prmarc;
1401 xfrm config.xml_transform%ROWTYPE;
1402 attr_vector INT[] := '{}'::INT[];
1403 attr_vector_tmp INT[];
1404 attr_list TEXT[] := pattr_list;
1406 norm_attr_value TEXT[];
1408 attr_def config.record_attr_definition%ROWTYPE;
1409 ccvm_row config.coded_value_map%ROWTYPE;
1412 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1413 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
1416 IF rmarc IS NULL THEN
1417 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1420 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1422 attr_value := '{}'::TEXT[];
1423 norm_attr_value := '{}'::TEXT[];
1424 attr_vector_tmp := '{}'::INT[];
1426 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
1428 -- tag+sf attrs only support SVF
1429 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1430 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1431 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1433 AND tag LIKE attr_def.tag
1435 WHEN attr_def.sf_list IS NOT NULL
1436 THEN POSITION(subfield IN attr_def.sf_list) > 0
1443 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1444 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1446 IF NOT attr_def.multi THEN
1447 attr_value := ARRAY[attr_value[1]];
1450 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1452 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1454 -- See if we can skip the XSLT ... it's expensive
1455 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1456 -- Can't skip the transform
1457 IF xfrm.xslt <> '---' THEN
1458 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1460 transformed_xml := rmarc;
1463 prev_xfrm := xfrm.name;
1466 IF xfrm.name IS NULL THEN
1467 -- just grab the marcxml (empty) transform
1468 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1469 prev_xfrm := xfrm.name;
1472 FOR tmp_xml IN SELECT XPATH(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
1473 tmp_val := oils_xpath_string(
1476 COALESCE(attr_def.joiner,' '),
1477 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1479 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1480 attr_value := attr_value || tmp_val;
1481 EXIT WHEN NOT attr_def.multi;
1485 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1486 SELECT ARRAY_AGG(m.value) INTO attr_value
1487 FROM vandelay.marc21_physical_characteristics(rmarc) v
1488 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1489 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
1490 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
1492 IF NOT attr_def.multi THEN
1493 attr_value := ARRAY[attr_value[1]];
1498 -- apply index normalizers to attr_value
1499 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
1501 SELECT n.func AS func,
1502 n.param_count AS param_count,
1504 FROM config.index_normalizer n
1505 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1506 WHERE attr = attr_def.name
1508 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1509 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1511 WHEN normalizer.param_count > 0
1512 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1518 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1519 norm_attr_value := norm_attr_value || tmp_val;
1523 IF attr_def.filter THEN
1524 -- Create unknown uncontrolled values and find the IDs of the values
1525 IF ccvm_row.id IS NULL THEN
1526 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1527 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
1528 BEGIN -- use subtransaction to isolate unique constraint violations
1529 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1530 EXCEPTION WHEN unique_violation THEN END;
1534 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1536 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
1539 -- Add the new value to the vector
1540 attr_vector := attr_vector || attr_vector_tmp;
1543 IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
1544 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1545 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1550 /* We may need to rewrite the vlist to contain
1551 the intersection of new values for requested
1552 attrs and old values for ignored attrs. To
1553 do this, we take the old attr vlist and
1554 subtract any values that are valid for the
1555 requested attrs, and then add back the new
1556 set of attr values. */
1558 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1559 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1560 SELECT attr_vector_tmp - ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1561 attr_vector := attr_vector || attr_vector_tmp;
1564 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1565 -- attributes can depend on earlier ones.
1566 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1568 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1570 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1571 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1573 IF attr_def.filter THEN
1574 IF attr_vector @@ tmp_val::query_int THEN
1575 attr_vector = attr_vector + intset(ccvm_row.id);
1576 EXIT WHEN NOT attr_def.multi;
1580 IF attr_def.sorter THEN
1581 IF attr_vector ~~ tmp_val THEN
1582 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1583 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1591 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1592 IF rdeleted THEN -- initial insert OR revivication
1593 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1594 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1596 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1602 $func$ LANGUAGE PLPGSQL;
1605 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1606 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1609 IF NEW.deleted THEN -- If this bib is deleted
1610 PERFORM * FROM config.internal_flag WHERE
1611 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1613 -- One needs to keep these around to support searches
1614 -- with the #deleted modifier, so one should turn on the named
1615 -- internal flag for that functionality.
1616 DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id;
1617 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1620 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1621 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1622 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1623 RETURN NEW; -- and we're done
1626 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1627 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1629 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1634 -- Record authority linking
1635 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1637 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1640 -- Flatten and insert the mfr data
1641 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1643 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1645 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1646 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1648 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1652 -- Gather and insert the field entry data
1653 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1655 -- Located URI magic
1656 IF TG_OP = 'INSERT' THEN
1657 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1659 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1662 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1664 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1668 -- (re)map metarecord-bib linking
1669 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1670 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1672 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1674 ELSE -- we're doing an update, and we're not deleted, remap
1675 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1677 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1683 $func$ LANGUAGE PLPGSQL;
1685 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1691 SELECT n.func AS func,
1692 n.param_count AS param_count,
1694 FROM config.index_normalizer n
1695 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1696 WHERE m.field = mapped_field AND m.pos < 0
1699 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1700 quote_literal( facet_text ) ||
1702 WHEN normalizer.param_count > 0
1703 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1706 ')' INTO facet_text;
1713 $$ LANGUAGE PLPGSQL;
1716 -- This mimics a specific part of QueryParser, turning the first part of a
1717 -- classed search (search_class) into a set of classes and possibly fields.
1718 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1719 -- or "au" or "au|corporate|personal" or anything like that, where the first
1720 -- element of the list you get by separating on the "|" character is either
1721 -- a registered class (config.metabib_class) or an alias
1722 -- (config.metabib_search_alias), and the rest of any such elements are
1723 -- fields (config.metabib_field).
1725 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1726 RETURNS SETOF RECORD AS $func$
1728 search_parts TEXT[];
1730 search_part_count INTEGER;
1732 registered_class config.metabib_class%ROWTYPE;
1733 registered_alias config.metabib_search_alias%ROWTYPE;
1734 registered_field config.metabib_field%ROWTYPE;
1736 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1738 search_part_count := ARRAY_LENGTH(search_parts, 1);
1739 IF search_part_count = 0 THEN
1742 SELECT INTO registered_class
1743 * FROM config.metabib_class WHERE name = search_parts[1];
1745 IF search_part_count < 2 THEN -- all fields
1746 rec := (registered_class.name, NULL::INTEGER);
1750 FOR field_name IN SELECT *
1751 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1752 SELECT INTO registered_field
1753 * FROM config.metabib_field
1754 WHERE name = field_name AND
1755 field_class = registered_class.name;
1757 rec := (registered_class.name, registered_field.id);
1762 -- maybe we have an alias?
1763 SELECT INTO registered_alias
1764 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1768 IF search_part_count < 2 THEN -- return w/e the alias says
1770 registered_alias.field_class, registered_alias.field
1775 FOR field_name IN SELECT *
1776 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1777 SELECT INTO registered_field
1778 * FROM config.metabib_field
1779 WHERE name = field_name AND
1780 field_class = registered_alias.field_class;
1783 registered_alias.field_class,
1794 $func$ LANGUAGE PLPGSQL ROWS 1;
1797 -- Given a string such as a user might type into a search box, prepare
1798 -- two changed variants for TO_TSQUERY(). See
1799 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1800 -- The first variant is normalized to match indexed documents regardless
1801 -- of diacritics. The second variant keeps its diacritics for proper
1802 -- highlighting via TS_HEADLINE().
1804 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1807 orig_ended_in_space BOOLEAN;
1812 orig_ended_in_space := orig ~ E'\\s$';
1814 orig := ARRAY_TO_STRING(
1815 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1818 normalized := public.naco_normalize(orig); -- also trim()s
1819 plain := trim(orig);
1821 IF NOT orig_ended_in_space THEN
1822 plain := plain || ':*';
1823 normalized := normalized || ':*';
1826 plain := ARRAY_TO_STRING(
1827 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1829 normalized := ARRAY_TO_STRING(
1830 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1833 RETURN ARRAY[normalized, plain];
1835 $$ LANGUAGE PLPGSQL;
1839 FUNCTION metabib.suggest_browse_entries(
1840 raw_query_text TEXT, -- actually typed by humans at the UI level
1841 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
1842 headline_opts TEXT, -- markup options for ts_headline()
1843 visibility_org INTEGER,-- null if you don't want opac visibility test
1844 query_limit INTEGER,-- use in LIMIT clause of interal query
1845 normalization INTEGER -- argument to TS_RANK_CD()
1847 value TEXT, -- plain
1849 buoyant_and_class_match BOOL,
1851 field_weight INTEGER,
1854 match TEXT -- marked up
1857 prepared_query_texts TEXT[];
1859 plain_query TSQUERY;
1860 opac_visibility_join TEXT;
1861 search_class_join TEXT;
1864 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
1866 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
1867 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
1869 visibility_org := NULLIF(visibility_org,-1);
1870 IF visibility_org IS NOT NULL THEN
1871 opac_visibility_join := '
1872 JOIN asset.opac_visible_copies aovc ON (
1873 aovc.record = x.source AND
1874 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
1877 opac_visibility_join := '';
1880 -- The following determines whether we only provide suggestsons matching
1881 -- the user's selected search_class, or whether we show other suggestions
1882 -- too. The reason for MIN() is that for search_classes like
1883 -- 'title|proper|uniform' you would otherwise get multiple rows. The
1884 -- implication is that if title as a class doesn't have restrict,
1885 -- nor does the proper field, but the uniform field does, you're going
1886 -- to get 'false' for your overall evaluation of 'should we restrict?'
1887 -- To invert that, change from MIN() to MAX().
1891 MIN(cmc.restrict::INT) AS restrict_class,
1892 MIN(cmf.restrict::INT) AS restrict_field
1893 FROM metabib.search_class_to_registered_components(search_class)
1894 AS _registered (field_class TEXT, field INT)
1896 config.metabib_class cmc ON (cmc.name = _registered.field_class)
1898 config.metabib_field cmf ON (cmf.id = _registered.field);
1900 -- evaluate 'should we restrict?'
1901 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
1902 search_class_join := '
1904 metabib.search_class_to_registered_components($2)
1905 AS _registered (field_class TEXT, field INT) ON (
1906 (_registered.field IS NULL AND
1907 _registered.field_class = cmf.field_class) OR
1908 (_registered.field = cmf.id)
1912 search_class_join := '
1914 metabib.search_class_to_registered_components($2)
1915 AS _registered (field_class TEXT, field INT) ON (
1916 _registered.field_class = cmc.name
1921 RETURN QUERY EXECUTE '
1930 TS_HEADLINE(value, $7, $3)
1931 FROM (SELECT DISTINCT
1934 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
1935 _registered.field = cmf.id AS restrict,
1937 TS_RANK_CD(mbe.index_vector, $1, $6),
1940 FROM metabib.browse_entry_def_map mbedm
1941 JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
1942 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
1943 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
1944 ' || search_class_join || '
1945 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1947 ' || opac_visibility_join || '
1948 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1950 ' -- sic, repeat the order by clause in the outer select too
1952 query, search_class, headline_opts,
1953 visibility_org, query_limit, normalization, plain_query
1957 -- buoyant AND chosen class = match class
1958 -- chosen field = match field
1965 $func$ LANGUAGE PLPGSQL;
1967 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1971 temp_vector TEXT := '';
1977 NEW.index_vector = ''::tsvector;
1979 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1981 SELECT n.func AS func,
1982 n.param_count AS param_count,
1984 FROM config.index_normalizer n
1985 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1986 WHERE field = NEW.field AND m.pos < 0
1988 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1989 quote_literal( value ) ||
1991 WHEN normalizer.param_count > 0
1992 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2002 SELECT n.func AS func,
2003 n.param_count AS param_count,
2005 FROM config.index_normalizer n
2006 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
2007 WHERE field = NEW.field AND m.pos >= 0
2009 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2010 quote_literal( value ) ||
2012 WHEN normalizer.param_count > 0
2013 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2021 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
2022 value := ARRAY_TO_STRING(
2023 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
2025 value := public.search_normalize(value);
2026 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2027 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
2029 SELECT ts_config, index_weight
2030 FROM config.metabib_class_ts_map
2031 WHERE field_class = TG_ARGV[0]
2032 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
2033 AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
2035 SELECT ts_config, index_weight
2036 FROM config.metabib_field_ts_map
2037 WHERE metabib_field = NEW.field
2038 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
2039 ORDER BY index_weight ASC
2041 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2042 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2045 cur_weight = ts_rec.index_weight;
2046 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2048 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2050 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2055 $$ LANGUAGE PLPGSQL;
2058 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2059 browse_entry BIGINT,
2064 sources INT, -- visible ones, that is
2065 asources INT, -- visible ones, that is
2066 row_number INT, -- internal use, sort of
2067 accurate BOOL, -- Count in sources field is accurate? Not
2068 -- if we had more than a browse superpage
2069 -- of records to look at.
2070 aaccurate BOOL, -- See previous comment...
2075 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2078 ) RETURNS BIGINT AS $p$
2080 FROM metabib.browse_entry mbe
2081 JOIN metabib.browse_entry_def_map mbedm ON (
2082 mbedm.entry = mbe.id
2083 AND mbedm.def = ANY($1)
2085 WHERE mbe.sort_value >= public.naco_normalize($2)
2086 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2087 $p$ LANGUAGE SQL STABLE;
2089 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2092 ) RETURNS BIGINT AS $p$
2094 FROM metabib.browse_entry mbe
2095 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2096 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2097 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2098 ash.atag = map.authority_field
2099 AND map.metabib_field = ANY($1)
2101 WHERE mbe.sort_value >= public.naco_normalize($2)
2102 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2103 $p$ LANGUAGE SQL STABLE;
2105 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2108 ) RETURNS BIGINT AS $p$
2110 FROM metabib.browse_entry mbe
2111 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2112 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2113 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2114 ash.atag = map.authority_field
2115 AND map.metabib_field = ANY($1)
2117 WHERE mbe.sort_value >= public.naco_normalize($2)
2118 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2119 $p$ LANGUAGE SQL STABLE;
2121 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2124 ) RETURNS BIGINT AS $p$
2125 SELECT id FROM metabib.browse_entry
2127 metabib.browse_bib_pivot($1, $2),
2128 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2130 ORDER BY sort_value, value LIMIT 1;
2131 $p$ LANGUAGE SQL STABLE;
2134 CREATE OR REPLACE FUNCTION metabib.staged_browse(
2138 context_locations INT[],
2140 browse_superpage_size INT,
2141 count_up_from_zero BOOL, -- if false, count down from -1
2144 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2152 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
2153 results_skipped INT := 0;
2154 row_counter INT := 0;
2159 all_records BIGINT[];
2160 all_brecords BIGINT[];
2161 all_arecords BIGINT[];
2162 superpage_of_records BIGINT[];
2165 IF count_up_from_zero THEN
2171 OPEN curs FOR EXECUTE query;
2174 FETCH curs INTO rec;
2176 IF result_row.pivot_point IS NOT NULL THEN
2177 RETURN NEXT result_row;
2183 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
2184 SELECT INTO all_arecords, result_row.sees, afields
2185 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
2186 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
2187 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
2189 FROM metabib.browse_entry_simple_heading_map mbeshm
2190 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2191 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
2192 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
2193 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2194 ash.atag = map.authority_field
2195 AND map.metabib_field = ANY(fields)
2197 WHERE mbeshm.entry = rec.id;
2200 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
2201 SELECT INTO all_brecords, result_row.authorities, bfields
2202 ARRAY_AGG(DISTINCT source),
2203 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
2204 ARRAY_AGG(DISTINCT def)
2205 FROM metabib.browse_entry_def_map
2206 WHERE entry = rec.id
2207 AND def = ANY(fields);
2209 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
2211 result_row.sources := 0;
2212 result_row.asources := 0;
2214 -- Bib-linked vis checking
2215 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
2217 full_end := ARRAY_LENGTH(all_brecords, 1);
2218 superpage_size := COALESCE(browse_superpage_size, full_end);
2220 slice_end := superpage_size;
2222 WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
2223 superpage_of_records := all_brecords[slice_start:slice_end];
2225 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2226 '1::INT AS rel FROM (SELECT UNNEST(' ||
2227 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2229 -- We use search.query_parser_fts() for visibility testing.
2230 -- We're calling it once per browse-superpage worth of records
2231 -- out of the set of records related to a given mbe, until we've
2232 -- either exhausted that set of records or found at least 1
2235 SELECT INTO result_row.sources visible
2236 FROM search.query_parser_fts(
2237 context_org, NULL, qpfts_query, NULL,
2238 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2240 WHERE qpfts.rel IS NULL;
2242 slice_start := slice_start + superpage_size;
2243 slice_end := slice_end + superpage_size;
2246 -- Accurate? Well, probably.
2247 result_row.accurate := browse_superpage_size IS NULL OR
2248 browse_superpage_size >= full_end;
2252 -- Authority-linked vis checking
2253 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
2255 full_end := ARRAY_LENGTH(all_arecords, 1);
2256 superpage_size := COALESCE(browse_superpage_size, full_end);
2258 slice_end := superpage_size;
2260 WHILE result_row.asources = 0 AND slice_start <= full_end LOOP
2261 superpage_of_records := all_arecords[slice_start:slice_end];
2263 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2264 '1::INT AS rel FROM (SELECT UNNEST(' ||
2265 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2267 -- We use search.query_parser_fts() for visibility testing.
2268 -- We're calling it once per browse-superpage worth of records
2269 -- out of the set of records related to a given mbe, via
2270 -- authority until we've either exhausted that set of records
2271 -- or found at least 1 visible record.
2273 SELECT INTO result_row.asources visible
2274 FROM search.query_parser_fts(
2275 context_org, NULL, qpfts_query, NULL,
2276 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2278 WHERE qpfts.rel IS NULL;
2280 slice_start := slice_start + superpage_size;
2281 slice_end := slice_end + superpage_size;
2285 -- Accurate? Well, probably.
2286 result_row.aaccurate := browse_superpage_size IS NULL OR
2287 browse_superpage_size >= full_end;
2291 IF result_row.sources > 0 OR result_row.asources > 0 THEN
2293 -- The function that calls this function needs row_number in order
2294 -- to correctly order results from two different runs of this
2296 result_row.row_number := row_number;
2298 -- Now, if row_counter is still less than limit, return a row. If
2299 -- not, but it is less than next_pivot_pos, continue on without
2300 -- returning actual result rows until we find
2301 -- that next pivot, and return it.
2303 IF row_counter < result_limit THEN
2304 result_row.browse_entry := rec.id;
2305 result_row.value := rec.value;
2307 RETURN NEXT result_row;
2309 result_row.browse_entry := NULL;
2310 result_row.authorities := NULL;
2311 result_row.fields := NULL;
2312 result_row.value := NULL;
2313 result_row.sources := NULL;
2314 result_row.sees := NULL;
2315 result_row.accurate := NULL;
2316 result_row.aaccurate := NULL;
2317 result_row.pivot_point := rec.id;
2319 IF row_counter >= next_pivot_pos THEN
2320 RETURN NEXT result_row;
2325 IF count_up_from_zero THEN
2326 row_number := row_number + 1;
2328 row_number := row_number - 1;
2331 -- row_counter is different from row_number.
2332 -- It simply counts up from zero so that we know when
2333 -- we've reached our limit.
2334 row_counter := row_counter + 1;
2338 $p$ LANGUAGE PLPGSQL;
2341 CREATE OR REPLACE FUNCTION metabib.browse(
2344 context_org INT DEFAULT NULL,
2345 context_loc_group INT DEFAULT NULL,
2346 staff BOOL DEFAULT FALSE,
2347 pivot_id BIGINT DEFAULT NULL,
2348 result_limit INT DEFAULT 10
2349 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2354 pivot_sort_value TEXT;
2355 pivot_sort_fallback TEXT;
2356 context_locations INT[];
2357 browse_superpage_size INT;
2358 results_skipped INT := 0;
2362 forward_to_pivot INT;
2364 -- First, find the pivot if we were given a browse term but not a pivot.
2365 IF pivot_id IS NULL THEN
2366 pivot_id := metabib.browse_pivot(search_field, browse_term);
2369 SELECT INTO pivot_sort_value, pivot_sort_fallback
2370 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
2372 -- Bail if we couldn't find a pivot.
2373 IF pivot_sort_value IS NULL THEN
2377 -- Transform the context_loc_group argument (if any) (logc at the
2378 -- TPAC layer) into a form we'll be able to use.
2379 IF context_loc_group IS NOT NULL THEN
2380 SELECT INTO context_locations ARRAY_AGG(location)
2381 FROM asset.copy_location_group_map
2382 WHERE lgroup = context_loc_group;
2385 -- Get the configured size of browse superpages.
2386 SELECT INTO browse_superpage_size value -- NULL ok
2387 FROM config.global_flag
2388 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
2390 -- First we're going to search backward from the pivot, then we're going
2391 -- to search forward. In each direction, we need two limits. At the
2392 -- lesser of the two limits, we delineate the edge of the result set
2393 -- we're going to return. At the greater of the two limits, we find the
2394 -- pivot value that would represent an offset from the current pivot
2395 -- at a distance of one "page" in either direction, where a "page" is a
2396 -- result set of the size specified in the "result_limit" argument.
2398 -- The two limits in each direction make four derived values in total,
2399 -- and we calculate them now.
2400 back_limit := CEIL(result_limit::FLOAT / 2);
2401 back_to_pivot := result_limit;
2402 forward_limit := result_limit / 2;
2403 forward_to_pivot := result_limit - 1;
2405 -- This is the meat of the SQL query that finds browse entries. We'll
2406 -- pass this to a function which uses it with a cursor, so that individual
2407 -- rows may be fetched in a loop until some condition is satisfied, without
2408 -- waiting for a result set of fixed size to be collected all at once.
2413 FROM metabib.browse_entry mbe
2415 EXISTS ( -- are there any bibs using this mbe via the requested fields?
2417 FROM metabib.browse_entry_def_map mbedm
2418 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
2420 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
2422 FROM metabib.browse_entry_simple_heading_map mbeshm
2423 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2424 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2425 ash.atag = map.authority_field
2426 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
2428 WHERE mbeshm.entry = mbe.id
2432 -- This is the variant of the query for browsing backward.
2433 back_query := core_query ||
2434 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
2435 ' ORDER BY mbe.sort_value DESC, mbe.value DESC ';
2437 -- This variant browses forward.
2438 forward_query := core_query ||
2439 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
2440 ' ORDER BY mbe.sort_value, mbe.value ';
2442 -- We now call the function which applies a cursor to the provided
2443 -- queries, stopping at the appropriate limits and also giving us
2444 -- the next page's pivot.
2446 SELECT * FROM metabib.staged_browse(
2447 back_query, search_field, context_org, context_locations,
2448 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
2450 SELECT * FROM metabib.staged_browse(
2451 forward_query, search_field, context_org, context_locations,
2452 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
2453 ) ORDER BY row_number DESC;
2456 $p$ LANGUAGE PLPGSQL;
2458 CREATE OR REPLACE FUNCTION metabib.browse(
2461 context_org INT DEFAULT NULL,
2462 context_loc_group INT DEFAULT NULL,
2463 staff BOOL DEFAULT FALSE,
2464 pivot_id BIGINT DEFAULT NULL,
2465 result_limit INT DEFAULT 10
2466 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2468 RETURN QUERY SELECT * FROM metabib.browse(
2469 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
2470 FROM config.metabib_field WHERE field_class = search_class),
2479 $p$ LANGUAGE PLPGSQL;