2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2007-2008 Equinox Software, Inc.
4 * Mike Rylander <miker@esilibrary.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS metabib CASCADE;
21 CREATE SCHEMA metabib;
23 CREATE TABLE metabib.metarecord (
24 id BIGSERIAL PRIMARY KEY,
25 fingerprint TEXT NOT NULL,
29 CREATE INDEX metabib_metarecord_master_record_idx ON metabib.metarecord (master_record);
30 CREATE INDEX metabib_metarecord_fingerprint_idx ON metabib.metarecord (fingerprint);
32 CREATE TABLE metabib.identifier_field_entry (
33 id BIGSERIAL PRIMARY KEY,
34 source BIGINT NOT NULL,
37 index_vector tsvector NOT NULL
39 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
40 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
41 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
43 CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifier_field_entry USING GIST (index_vector);
44 CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
45 CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source);
47 CREATE TABLE metabib.combined_identifier_field_entry (
48 record BIGINT NOT NULL,
49 metabib_field INT NULL,
50 index_vector tsvector NOT NULL
52 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
53 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
54 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
56 CREATE TABLE metabib.title_field_entry (
57 id BIGSERIAL PRIMARY KEY,
58 source BIGINT NOT NULL,
61 index_vector tsvector NOT NULL
63 CREATE TRIGGER metabib_title_field_entry_fti_trigger
64 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
65 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
67 CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_entry USING GIST (index_vector);
68 CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
69 CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source);
71 CREATE TABLE metabib.combined_title_field_entry (
72 record BIGINT NOT NULL,
73 metabib_field INT NULL,
74 index_vector tsvector NOT NULL
76 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
77 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
78 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
80 CREATE TABLE metabib.author_field_entry (
81 id BIGSERIAL PRIMARY KEY,
82 source BIGINT NOT NULL,
85 index_vector tsvector NOT NULL
87 CREATE TRIGGER metabib_author_field_entry_fti_trigger
88 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
89 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
91 CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field_entry USING GIST (index_vector);
92 CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
93 CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.subject_field_entry (
105 id BIGSERIAL PRIMARY KEY,
106 source BIGINT NOT NULL,
109 index_vector tsvector NOT NULL
111 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
112 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
113 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
115 CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_field_entry USING GIST (index_vector);
116 CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
117 CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source);
119 CREATE TABLE metabib.combined_subject_field_entry (
120 record BIGINT NOT NULL,
121 metabib_field INT NULL,
122 index_vector tsvector NOT NULL
124 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
125 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
126 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
128 CREATE TABLE metabib.keyword_field_entry (
129 id BIGSERIAL PRIMARY KEY,
130 source BIGINT NOT NULL,
133 index_vector tsvector NOT NULL
135 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
136 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
137 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
139 CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_field_entry USING GIST (index_vector);
140 CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
141 CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source);
143 CREATE TABLE metabib.combined_keyword_field_entry (
144 record BIGINT NOT NULL,
145 metabib_field INT NULL,
146 index_vector tsvector NOT NULL
148 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
149 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
150 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
152 CREATE TABLE metabib.series_field_entry (
153 id BIGSERIAL PRIMARY KEY,
154 source BIGINT NOT NULL,
157 index_vector tsvector NOT NULL
159 CREATE TRIGGER metabib_series_field_entry_fti_trigger
160 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
161 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
163 CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field_entry USING GIST (index_vector);
164 CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR;
165 CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source);
167 CREATE TABLE metabib.combined_series_field_entry (
168 record BIGINT NOT NULL,
169 metabib_field INT NULL,
170 index_vector tsvector NOT NULL
172 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
173 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
174 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
176 CREATE TABLE metabib.facet_entry (
177 id BIGSERIAL PRIMARY KEY,
178 source BIGINT NOT NULL,
182 CREATE INDEX metabib_facet_entry_field_idx ON metabib.facet_entry (field);
183 CREATE INDEX metabib_facet_entry_value_idx ON metabib.facet_entry (SUBSTRING(value,1,1024));
184 CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source);
186 CREATE TABLE metabib.browse_entry (
187 id BIGSERIAL PRIMARY KEY,
189 index_vector tsvector,
190 sort_value TEXT NOT NULL,
191 UNIQUE(sort_value, value)
195 CREATE INDEX browse_entry_sort_value_idx
196 ON metabib.browse_entry USING BTREE (sort_value);
198 CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
199 CREATE TRIGGER metabib_browse_entry_fti_trigger
200 BEFORE INSERT OR UPDATE ON metabib.browse_entry
201 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
204 CREATE TABLE metabib.browse_entry_def_map (
205 id BIGSERIAL PRIMARY KEY,
206 entry BIGINT REFERENCES metabib.browse_entry (id),
207 def INT REFERENCES config.metabib_field (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
208 source BIGINT REFERENCES biblio.record_entry (id),
209 authority BIGINT REFERENCES authority.record_entry (id) ON DELETE SET NULL
211 CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
212 CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
213 CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
215 CREATE TABLE metabib.browse_entry_simple_heading_map (
216 id BIGSERIAL PRIMARY KEY,
217 entry BIGINT REFERENCES metabib.browse_entry (id),
218 simple_heading BIGINT REFERENCES authority.simple_heading (id) ON DELETE CASCADE
220 CREATE INDEX browse_entry_sh_map_entry_idx ON metabib.browse_entry_simple_heading_map (entry);
221 CREATE INDEX browse_entry_sh_map_sh_idx ON metabib.browse_entry_simple_heading_map (simple_heading);
223 CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
228 facet_text := NEW.value;
231 SELECT n.func AS func,
232 n.param_count AS param_count,
234 FROM config.index_normalizer n
235 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
236 WHERE m.field = NEW.field AND m.pos < 0
239 EXECUTE 'SELECT ' || normalizer.func || '(' ||
240 quote_literal( facet_text ) ||
242 WHEN normalizer.param_count > 0
243 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
250 NEW.value = facet_text;
256 CREATE TRIGGER facet_normalize_tgr
257 BEFORE UPDATE OR INSERT ON metabib.facet_entry
258 FOR EACH ROW EXECUTE PROCEDURE metabib.facet_normalize_trigger();
260 CREATE OR REPLACE FUNCTION evergreen.facet_force_nfc() RETURNS TRIGGER AS $$
262 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
267 CREATE TRIGGER facet_force_nfc_tgr
268 BEFORE UPDATE OR INSERT ON metabib.facet_entry
269 FOR EACH ROW EXECUTE PROCEDURE evergreen.facet_force_nfc();
271 -- DECREMENTING serial starts at -1
272 CREATE SEQUENCE metabib.uncontrolled_record_attr_value_id_seq INCREMENT BY -1;
274 CREATE TABLE metabib.uncontrolled_record_attr_value (
275 id BIGINT PRIMARY KEY DEFAULT nextval('metabib.uncontrolled_record_attr_value_id_seq'),
276 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name),
279 CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
281 CREATE VIEW metabib.record_attr_id_map AS
282 SELECT id, attr, value FROM metabib.uncontrolled_record_attr_value
284 SELECT c.id, c.ctype AS attr, c.code AS value
285 FROM config.coded_value_map c
286 JOIN config.record_attr_definition d ON (d.name = c.ctype AND NOT d.composite);
288 CREATE VIEW metabib.composite_attr_id_map AS
289 SELECT c.id, c.ctype AS attr, c.code AS value
290 FROM config.coded_value_map c
291 JOIN config.record_attr_definition d ON (d.name = c.ctype AND d.composite);
293 CREATE VIEW metabib.full_attr_id_map AS
294 SELECT id, attr, value FROM metabib.record_attr_id_map
296 SELECT id, attr, value FROM metabib.composite_attr_id_map;
299 CREATE FUNCTION metabib.compile_composite_attr ( cattr_id INT ) RETURNS query_int AS $func$
304 my $cattr = spi_exec_query(
305 "SELECT * FROM config.composite_attr_entry_defintion WHERE id = $cid"
308 die("Composite attribute not found with an id of $cid") unless $cattr;
310 my $plan = spi_prepare('SELECT * FROM metabib.full_attr_id_map WHERE attr = $1 AND value = $2', qw/TEXT TEXT/);
311 my $def = from_json $cattr->{definition};
318 if (ref $d eq 'HASH') { # node or AND
319 if (exists $d->{_attr}) { # it is a node
320 return spi_query_prepared(
321 $plan, {limit => 1}, $d->{_attr}, $d->{_val}
323 } elsif (exists $d->{_not} && scalar(keys(%$d)) == 1) { # it is a NOT
324 return '!' . recurse($$d{_not});
325 } else { # an AND list
326 @list = map { recurse($$d{$_}) } sort keys %$d;
328 } elsif (ref $d eq 'ARRAY')
330 @list = map { recurse($_) } @$d;
332 return '(' . join($j,@list) . ')' if @list;
336 return recurse($def);
338 $func$ STRICT STABLE IMMUTABLE LANGUAGE plperlu;
340 CREATE TABLE metabib.record_attr_vector_list (
341 source BIGINT PRIMARY KEY REFERNECES biblio.record_entry (id),
342 vlist INT[] NOT NULL -- stores id from ccvm AND murav
344 CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
346 /* This becomes a view, and we do sorters differently ...
347 CREATE TABLE metabib.record_attr (
348 id BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
349 attrs HSTORE NOT NULL DEFAULT ''::HSTORE
351 CREATE INDEX metabib_svf_attrs_idx ON metabib.record_attr USING GIST (attrs);
352 CREATE INDEX metabib_svf_date1_idx ON metabib.record_attr ((attrs->'date1'));
353 CREATE INDEX metabib_svf_dates_idx ON metabib.record_attr ((attrs->'date1'),(attrs->'date2'));
357 CREATE TABLE metabib.record_sorter (
358 id BIGSERIAL PRIMARY KEY,
359 source BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON DELETE CASCADE,
360 attr TEXT NOT NULL REFERENCES config.record_attr_definition (name) ON DELETE CASCADE,
363 CREATE INDEX metabib_sorter_source_idx ON metabib.record_sorter (source); -- we may not need one of this or the next ... stats will tell
364 CREATE INDEX metabib_sorter_s_a_idx ON metabib.record_sorter (source, attr);
365 CREATE INDEX metabib_sorter_a_v_idx ON metabib.record_sorter (attr, value);
368 CREATE TYPE metabib.record_attr_type AS (
373 -- Back-compat view ... we're moving to an INTARRAY world
374 CREATE VIEW metabib.record_attr_flat AS
375 SELECT v.source AS id,
378 FROM metabib.full_attr_id_map m
379 JOIN metabib.record_attr_vector_list v ( m.id = ANY( v.vlist ) );
381 CREATE VIEW metabib.record_attr AS
382 SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs FROM metabib.record_attr_flat GROUP BY 1;
384 -- Back-back-compat view ... we use to live in an HSTORE world
385 CREATE TYPE metabib.rec_desc_type AS (
403 CREATE VIEW metabib.rec_descriptor AS
406 (populate_record(NULL::metabib.rec_desc_type, attrs)).*
407 FROM metabib.record_attr;
409 -- Use a sequence that matches previous version, for easier upgrading.
410 CREATE SEQUENCE metabib.full_rec_id_seq;
412 CREATE TABLE metabib.real_full_rec (
413 id BIGINT NOT NULL DEFAULT NEXTVAL('metabib.full_rec_id_seq'::REGCLASS),
414 record BIGINT NOT NULL,
415 tag CHAR(3) NOT NULL,
420 index_vector tsvector NOT NULL
422 ALTER TABLE metabib.real_full_rec ADD PRIMARY KEY (id);
424 CREATE INDEX metabib_full_rec_tag_subfield_idx ON metabib.real_full_rec (tag,subfield);
425 CREATE INDEX metabib_full_rec_value_idx ON metabib.real_full_rec (substring(value,1,1024));
426 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
427 CREATE INDEX metabib_full_rec_value_tpo_index ON metabib.real_full_rec (substring(value,1,1024) text_pattern_ops);
428 CREATE INDEX metabib_full_rec_record_idx ON metabib.real_full_rec (record);
429 CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GIST (index_vector);
430 CREATE INDEX metabib_full_rec_isxn_caseless_idx
431 ON metabib.real_full_rec (LOWER(value))
432 WHERE tag IN ('020', '022', '024');
433 -- This next index might fully supplant the one above, but leaving both for now.
434 -- (they are not too large)
435 -- The reason we need this index is to ensure that the query parser always
436 -- prefers this index over the simpler tag/subfield index, as this greatly
437 -- increases Vandelay overlay speed for these identifiers, especially when
438 -- a record has many of these fields (around > 4-6 seems like the cutoff
439 -- on at least one PG9.1 system)
440 -- A similar index could be added for other fields (e.g. 010), but one should
441 -- leave out the LOWER() in all other cases.
442 -- TODO: verify whether we can discard the non tag/subfield/substring version
443 -- above (metabib_full_rec_isxn_caseless_idx)
444 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
445 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
446 WHERE tag IN ('020', '022', '024');
449 CREATE TRIGGER metabib_full_rec_fti_trigger
450 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
451 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
453 CREATE OR REPLACE VIEW metabib.full_rec AS
460 SUBSTRING(value,1,1024) AS value,
462 FROM metabib.real_full_rec;
464 CREATE OR REPLACE RULE metabib_full_rec_insert_rule
465 AS ON INSERT TO metabib.full_rec
467 INSERT INTO metabib.real_full_rec VALUES (
468 COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)),
478 CREATE OR REPLACE RULE metabib_full_rec_update_rule
479 AS ON UPDATE TO metabib.full_rec
481 UPDATE metabib.real_full_rec SET
487 subfield = NEW.subfield,
489 index_vector = NEW.index_vector
492 CREATE OR REPLACE RULE metabib_full_rec_delete_rule
493 AS ON DELETE TO metabib.full_rec
495 DELETE FROM metabib.real_full_rec WHERE id = OLD.id;
497 CREATE TABLE metabib.metarecord_source_map (
498 id BIGSERIAL PRIMARY KEY,
499 metarecord BIGINT NOT NULL,
500 source BIGINT NOT NULL
502 CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_source_map (metarecord);
503 CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source);
505 CREATE TYPE metabib.field_entry_template AS (
518 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
520 bib biblio.record_entry%ROWTYPE;
521 idx config.metabib_field%ROWTYPE;
522 xfrm config.xml_transform%ROWTYPE;
524 transformed_xml TEXT;
526 xml_node_list TEXT[];
532 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
534 authority_link BIGINT;
535 output_row metabib.field_entry_template%ROWTYPE;
538 -- Start out with no field-use bools set
539 output_row.browse_field = FALSE;
540 output_row.facet_field = FALSE;
541 output_row.search_field = FALSE;
544 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
546 -- Loop over the indexing entries
547 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
549 joiner := COALESCE(idx.joiner, default_joiner);
551 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
553 -- See if we can skip the XSLT ... it's expensive
554 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
555 -- Can't skip the transform
556 IF xfrm.xslt <> '---' THEN
557 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
559 transformed_xml := bib.marc;
562 prev_xfrm := xfrm.name;
565 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
568 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
569 CONTINUE WHEN xml_node !~ E'^\\s*<';
571 -- XXX much of this should be moved into oils_xpath_string...
572 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
573 oils_xpath( '//text()',
575 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
576 REGEXP_REPLACE( -- This escapes embeded <s
578 $re$(>[^<]+)(<)([^>]+<)$re$,
594 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
596 IF raw_text IS NOT NULL THEN
597 raw_text := raw_text || joiner;
600 raw_text := COALESCE(raw_text,'') || curr_text;
602 -- autosuggest/metabib.browse_entry
603 IF idx.browse_field THEN
605 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
606 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
608 browse_text := curr_text;
611 IF idx.browse_sort_xpath IS NOT NULL AND
612 idx.browse_sort_xpath <> '' THEN
614 sort_value := oils_xpath_string(
615 idx.browse_sort_xpath, xml_node, joiner,
616 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
619 sort_value := browse_text;
622 output_row.field_class = idx.field_class;
623 output_row.field = idx.id;
624 output_row.source = rid;
625 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
626 output_row.sort_value :=
627 public.naco_normalize(sort_value);
629 output_row.authority := NULL;
631 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
632 authority_text := oils_xpath_string(
633 idx.authority_xpath, xml_node, joiner,
635 ARRAY[xfrm.prefix, xfrm.namespace_uri],
636 ARRAY['xlink','http://www.w3.org/1999/xlink']
640 IF authority_text ~ '^\d+$' THEN
641 authority_link := authority_text::BIGINT;
642 PERFORM * FROM authority.record_entry WHERE id = authority_link;
644 output_row.authority := authority_link;
650 output_row.browse_field = TRUE;
651 -- Returning browse rows with search_field = true for search+browse
652 -- configs allows us to retain granularity of being able to search
653 -- browse fields with "starts with" type operators (for example, for
654 -- titles of songs in music albums)
655 IF idx.search_field THEN
656 output_row.search_field = TRUE;
658 RETURN NEXT output_row;
659 output_row.browse_field = FALSE;
660 output_row.search_field = FALSE;
661 output_row.sort_value := NULL;
664 -- insert raw node text for faceting
665 IF idx.facet_field THEN
667 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
668 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
670 facet_text := curr_text;
673 output_row.field_class = idx.field_class;
674 output_row.field = -1 * idx.id;
675 output_row.source = rid;
676 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
678 output_row.facet_field = TRUE;
679 RETURN NEXT output_row;
680 output_row.facet_field = FALSE;
685 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
687 -- insert combined node text for searching
688 IF idx.search_field THEN
689 output_row.field_class = idx.field_class;
690 output_row.field = idx.id;
691 output_row.source = rid;
692 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
694 output_row.search_field = TRUE;
695 RETURN NEXT output_row;
696 output_row.search_field = FALSE;
703 $func$ LANGUAGE PLPGSQL;
705 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
707 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
708 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
709 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
710 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
711 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
712 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
713 FROM metabib.keyword_field_entry WHERE source = bib_id;
715 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
716 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
717 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
718 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
719 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
720 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
721 FROM metabib.title_field_entry WHERE source = bib_id;
723 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
724 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
725 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
726 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
727 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
728 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
729 FROM metabib.author_field_entry WHERE source = bib_id;
731 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
732 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
733 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
734 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
735 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
736 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
737 FROM metabib.subject_field_entry WHERE source = bib_id;
739 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
740 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
741 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
742 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
743 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
744 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
745 FROM metabib.series_field_entry WHERE source = bib_id;
747 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
748 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
749 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
750 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
751 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
752 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
753 FROM metabib.identifier_field_entry WHERE source = bib_id;
756 $func$ LANGUAGE PLPGSQL;
758 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
761 ind_data metabib.field_entry_template%ROWTYPE;
762 mbe_row metabib.browse_entry%ROWTYPE;
770 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
771 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
772 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
774 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
776 IF NOT b_skip_search THEN
777 FOR fclass IN SELECT * FROM config.metabib_class LOOP
778 -- RAISE NOTICE 'Emptying out %', fclass.name;
779 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
782 IF NOT b_skip_facet THEN
783 DELETE FROM metabib.facet_entry WHERE source = bib_id;
785 IF NOT b_skip_browse THEN
786 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
790 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
791 IF ind_data.field < 0 THEN
792 ind_data.field = -1 * ind_data.field;
795 IF ind_data.facet_field AND NOT b_skip_facet THEN
796 INSERT INTO metabib.facet_entry (field, source, value)
797 VALUES (ind_data.field, ind_data.source, ind_data.value);
800 IF ind_data.browse_field AND NOT b_skip_browse THEN
801 -- A caveat about this SELECT: this should take care of replacing
802 -- old mbe rows when data changes, but not if normalization (by
803 -- which I mean specifically the output of
804 -- evergreen.oils_tsearch2()) changes. It may or may not be
805 -- expensive to add a comparison of index_vector to index_vector
806 -- to the WHERE clause below.
808 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
809 SELECT INTO mbe_row * FROM metabib.browse_entry
810 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
813 mbe_id := mbe_row.id;
815 INSERT INTO metabib.browse_entry
816 ( value, sort_value ) VALUES
817 ( value_prepped, ind_data.sort_value );
819 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
822 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
823 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
826 IF ind_data.search_field AND NOT b_skip_search THEN
827 -- Avoid inserting duplicate rows
828 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
829 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
830 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
831 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
832 IF mbe_id IS NULL THEN
834 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
836 quote_literal(ind_data.field) || $$, $$ ||
837 quote_literal(ind_data.source) || $$, $$ ||
838 quote_literal(ind_data.value) ||
845 IF NOT b_skip_search THEN
846 PERFORM metabib.update_combined_index_vectors(bib_id);
851 $func$ LANGUAGE PLPGSQL;
853 -- default to a space joiner
854 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
855 SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
858 CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$
860 auth authority.record_entry%ROWTYPE;
861 output authority.full_rec%ROWTYPE;
864 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
866 FOR field IN SELECT * FROM vandelay.flatten_marc( auth.marc ) LOOP
867 output.record := rid;
868 output.ind1 := field.ind1;
869 output.ind2 := field.ind2;
870 output.tag := field.tag;
871 output.subfield := field.subfield;
872 output.value := field.value;
877 $func$ LANGUAGE PLPGSQL;
879 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
881 bib biblio.record_entry%ROWTYPE;
882 output metabib.full_rec%ROWTYPE;
885 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
887 FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP
888 output.record := rid;
889 output.ind1 := field.ind1;
890 output.ind2 := field.ind2;
891 output.tag := field.tag;
892 output.subfield := field.subfield;
893 output.value := field.value;
898 $func$ LANGUAGE PLPGSQL;
900 CREATE OR REPLACE FUNCTION vandelay.marc21_record_type( marc TEXT ) RETURNS config.marc21_rec_type_map AS $func$
907 retval config.marc21_rec_type_map%ROWTYPE;
909 ldr := oils_xpath_string( '//*[local-name()="leader"]', marc );
911 IF ldr IS NULL OR ldr = '' THEN
912 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
916 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
917 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
920 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
921 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
923 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
925 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
928 IF retval.code IS NULL THEN
929 SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
934 $func$ LANGUAGE PLPGSQL;
936 CREATE OR REPLACE FUNCTION biblio.marc21_record_type( rid BIGINT ) RETURNS config.marc21_rec_type_map AS $func$
937 SELECT * FROM vandelay.marc21_record_type( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
940 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field_list( marc TEXT, ff TEXT ) RETURNS TEXT[] AS $func$
946 collection TEXT[] := '{}'::TEXT[];
948 rtype := (vandelay.marc21_record_type( marc )).code;
949 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP
950 IF ff_pos.tag = 'ldr' THEN
951 val := oils_xpath_string('//*[local-name()="leader"]', marc);
952 IF val IS NOT NULL THEN
953 val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length );
954 collection := collection || val;
957 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
958 val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
959 collection := collection || val;
962 val := REPEAT( ff_pos.default_val, ff_pos.length );
963 collection := collection || val;
968 $func$ LANGUAGE PLPGSQL;
970 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT ) RETURNS TEXT AS $func$
977 rtype := (vandelay.marc21_record_type( marc )).code;
978 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP
979 IF ff_pos.tag = 'ldr' THEN
980 val := oils_xpath_string('//*[local-name()="leader"]', marc);
981 IF val IS NOT NULL THEN
982 val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length );
986 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
987 val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
991 val := REPEAT( ff_pos.default_val, ff_pos.length );
997 $func$ LANGUAGE PLPGSQL;
999 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$
1000 SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 );
1001 $func$ LANGUAGE SQL;
1003 CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$
1004 SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 );
1005 $func$ LANGUAGE SQL;
1007 -- CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT);
1008 CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT ) RETURNS SETOF biblio.record_ff_map AS $func$
1013 output biblio.record_ff_map%ROWTYPE;
1015 rtype := (vandelay.marc21_record_type( marc )).code;
1017 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP
1018 output.ff_name := ff_pos.fixed_field;
1019 output.ff_value := NULL;
1021 IF ff_pos.tag = 'ldr' THEN
1022 output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc);
1023 IF output.ff_value IS NOT NULL THEN
1024 output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length );
1026 output.ff_value := NULL;
1029 FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
1030 output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length );
1031 IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF;
1033 output.ff_value := NULL;
1041 $func$ LANGUAGE PLPGSQL;
1043 CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$
1044 SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1045 $func$ LANGUAGE SQL;
1047 CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
1048 SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) );
1049 $func$ LANGUAGE SQL;
1051 CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$
1064 IF marc IS NULL OR marc = '' THEN
1068 -- First, the count of tags
1069 qual := ARRAY_UPPER(oils_xpath('*[local-name()="datafield"]', marc), 1);
1071 -- now go through a bunch of pain to get the record type
1072 IF best_type IS NOT NULL THEN
1073 ldr := (oils_xpath('//*[local-name()="leader"]/text()', marc))[1];
1075 IF ldr IS NOT NULL THEN
1076 SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
1077 SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
1080 tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
1081 bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
1083 -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
1085 SELECT * INTO type_map FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
1087 IF type_map.code IS NOT NULL THEN
1088 IF best_type = type_map.code THEN
1089 qual := qual + qual / 2;
1092 FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = 'Lang' AND rec_type = type_map.code ORDER BY tag DESC LOOP
1093 ff_tag_data := SUBSTRING((oils_xpath('//*[@tag="' || ff_pos.tag || '"]/text()',marc))[1], ff_pos.start_pos + 1, ff_pos.length);
1094 IF ff_tag_data = best_lang THEN
1102 -- Now look for some quality metrics
1104 IF ARRAY_UPPER(oils_xpath('//*[@tag="040"]/*[@code="a" and contains(.,"DLC")]', marc), 1) = 1 THEN
1109 IF (oils_xpath('//*[@tag="003"]/text()', marc))[1] ~* E'oclo?c' THEN
1116 $func$ LANGUAGE PLPGSQL;
1118 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
1120 idx config.biblio_fingerprint%ROWTYPE;
1121 xfrm config.xml_transform%ROWTYPE;
1123 transformed_xml TEXT;
1125 xml_node_list TEXT[];
1127 output_text TEXT := '';
1130 IF marc IS NULL OR marc = '' THEN
1134 -- Loop over the indexing entries
1135 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
1137 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
1139 -- See if we can skip the XSLT ... it's expensive
1140 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1141 -- Can't skip the transform
1142 IF xfrm.xslt <> '---' THEN
1143 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
1145 transformed_xml := marc;
1148 prev_xfrm := xfrm.name;
1151 raw_text := COALESCE(
1159 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1168 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
1169 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
1171 IF idx.first_word IS TRUE THEN
1172 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
1175 output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
1182 $func$ LANGUAGE PLPGSQL;
1184 -- BEFORE UPDATE OR INSERT trigger for biblio.record_entry
1185 CREATE OR REPLACE FUNCTION biblio.fingerprint_trigger () RETURNS TRIGGER AS $func$
1188 -- For TG_ARGV, first param is language (like 'eng'), second is record type (like 'BKS')
1190 IF NEW.deleted IS TRUE THEN -- we don't much care, then, do we?
1194 NEW.fingerprint := biblio.extract_fingerprint(NEW.marc);
1195 NEW.quality := biblio.extract_quality(NEW.marc, TG_ARGV[0], TG_ARGV[1]);
1200 $func$ LANGUAGE PLPGSQL;
1202 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_full_rec( bib_id BIGINT ) RETURNS VOID AS $func$
1204 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
1206 DELETE FROM metabib.real_full_rec WHERE record = bib_id;
1208 INSERT INTO metabib.real_full_rec (record, tag, ind1, ind2, subfield, value)
1209 SELECT record, tag, ind1, ind2, subfield, value FROM biblio.flatten_marc( bib_id );
1213 $func$ LANGUAGE PLPGSQL;
1215 CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
1222 uri_owner_list TEXT[];
1230 -- Clear any URI mappings and call numbers for this bib.
1231 -- This leads to acn / auricnm inflation, but also enables
1232 -- old acn/auricnm's to go away and for bibs to be deleted.
1233 FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP
1234 DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id;
1235 DELETE FROM asset.call_number WHERE id = uri_cn_id;
1238 uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml);
1239 IF ARRAY_UPPER(uris,1) > 0 THEN
1240 FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP
1241 -- First we pull info out of the 856
1244 uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1];
1245 uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1];
1246 uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1];
1248 IF uri_label IS NULL THEN
1249 uri_label := uri_href;
1251 CONTINUE WHEN uri_href IS NULL;
1253 -- Get the distinct list of libraries wanting to use
1255 DISTINCT REGEXP_REPLACE(
1257 $re$^.*?\((\w+)\).*$$re$,
1260 ) INTO uri_owner_list
1263 '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()',
1268 IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN
1270 -- look for a matching uri
1271 IF uri_use IS NULL THEN
1272 SELECT id INTO uri_id
1274 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active
1275 ORDER BY id LIMIT 1;
1276 IF NOT FOUND THEN -- create one
1277 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1278 SELECT id INTO uri_id
1280 WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active;
1283 SELECT id INTO uri_id
1285 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active
1286 ORDER BY id LIMIT 1;
1287 IF NOT FOUND THEN -- create one
1288 INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use);
1289 SELECT id INTO uri_id
1291 WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active;
1295 FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP
1296 uri_owner := uri_owner_list[j];
1298 SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner;
1299 CONTINUE WHEN NOT FOUND;
1301 -- we need a call number to link through
1302 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1304 INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label)
1305 VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##');
1306 SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted;
1309 -- now, link them if they're not already
1310 SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id;
1312 INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id);
1324 $func$ LANGUAGE PLPGSQL;
1326 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT ) RETURNS BIGINT AS $func$
1330 tmp_mr metabib.metarecord%ROWTYPE;
1331 deleted_mrs BIGINT[];
1334 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id; -- Rid ourselves of the search-estimate-killing linkage
1336 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
1338 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN -- Find the first fingerprint-matching
1339 old_mr := tmp_mr.id;
1341 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
1342 IF source_count = 0 THEN -- No other records
1343 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
1344 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
1350 IF old_mr IS NULL THEN -- we found no suitable, preexisting MR based on old source maps
1351 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
1352 IF old_mr IS NULL THEN -- nope, create one and grab its id
1353 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
1354 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
1355 ELSE -- indeed there is. update it with a null cache and recalcualated master record
1356 UPDATE metabib.metarecord
1358 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1)
1361 ELSE -- there was one we already attached to, update its mods cache and master_record
1362 UPDATE metabib.metarecord
1364 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1)
1368 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
1370 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
1371 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
1377 $func$ LANGUAGE PLPGSQL;
1379 CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$
1380 DELETE FROM authority.bib_linking WHERE bib = $1;
1381 INSERT INTO authority.bib_linking (bib, authority)
1384 FROM ( SELECT DISTINCT $1 AS bib,
1385 BTRIM(remove_paren_substring(txt))::BIGINT AS authority
1386 FROM unnest(oils_xpath('//*[@code="0"]/text()',$2)) x(txt)
1387 WHERE BTRIM(remove_paren_substring(txt)) ~ $re$^\d+$$re$
1388 ) y JOIN authority.record_entry r ON r.id = y.authority;
1390 $func$ LANGUAGE SQL;
1392 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
1394 transformed_xml TEXT;
1395 rmarc TEXT := prmarc;
1399 xfrm config.xml_transform%ROWTYPE;
1400 attr_vector INT[] := '{}'::INT[];
1401 attr_vector_tmp INT[];
1402 attr_list TEXT[] := pattr_list;
1404 norm_attr_value TEXT[];
1406 attr_def config.record_attr_definition%ROWTYPE;
1407 ccvm_row config.code_value_map%ROWTYPE;
1410 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
1411 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
1414 IF rmarc IS NULL THEN
1415 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
1418 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
1420 attr_value := '{}'::TEXT[]
1421 norm_attr_value := '{}'::TEXT[]
1422 attr_vector_tmp := '{}'::INT[]
1424 SELECT * INTO ccvm_row FROM config.code_value_map c WHERE c.ctype = attr_def.name;
1426 -- tag+sf attrs only support SVF
1427 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
1428 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
1429 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
1431 AND tag LIKE attr_def.tag
1433 WHEN attr_def.sf_list IS NOT NULL
1434 THEN POSITION(subfield IN attr_def.sf_list) > 0
1441 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
1442 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
1444 IF NOT attr_def.multi THEN
1445 attr_value := ARRAY[attr_value[1]];
1448 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
1450 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
1452 -- See if we can skip the XSLT ... it's expensive
1453 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
1454 -- Can't skip the transform
1455 IF xfrm.xslt <> '---' THEN
1456 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
1458 transformed_xml := rmarc;
1461 prev_xfrm := xfrm.name;
1464 IF xfrm.name IS NULL THEN
1465 -- just grab the marcxml (empty) transform
1466 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
1467 prev_xfrm := xfrm.name;
1470 FOR tmp_xml IN SELECT XPATH(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
1471 attr_value := attr_value ||
1475 COALESCE(attr_def.joiner,' '),
1476 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
1478 EXIT WHEN NOT attr_def.multi;
1481 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
1482 SELECT ARRAY_AGG(m.value) INTO attr_vlue
1483 FROM vandelay.marc21_physical_characteristics(rmarc) v
1484 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
1485 WHERE v.subfield = attr_def.phys_char_sf
1486 AND ( ccvm.id IS NULL OR ( ccvm.id IS NOT NULL AND v.id IS NOT NULL) );
1488 IF NOT attr_def.multi THEN
1489 attr_value := ARRAY[attr_value[1]];
1494 -- apply index normalizers to attr_value
1495 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value);
1497 SELECT n.func AS func,
1498 n.param_count AS param_count,
1500 FROM config.index_normalizer n
1501 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
1502 WHERE attr = attr_def.name
1504 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1505 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
1507 WHEN normalizer.param_count > 0
1508 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1514 norm_attr_value := norm_attr_value || tmp_val;
1517 IF attr_def.filter THEN
1518 -- Create unknown uncontrolled values and find the IDs of the values
1519 IF ccvm.id IS NULL THEN
1520 FOR tmp_val FROM SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
1521 BEGIN; -- use subtransaction to isolate unique constraint violations
1522 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
1523 EXCEPTION WHEN unique_violation THEN END;
1526 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
1528 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND value = ANY( norm_attr_value );
1531 -- Add the new value to the vector
1532 attr_vector := attr_vector || attr_vector_tmp;
1535 IF attr_def.sorter THEN
1536 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1537 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
1542 /* We may need to rewrite the vlist to contain
1543 the intersection of new values for requested
1544 attrs and old values for ignored attrs. To
1545 do this, we take the old attr vlist and
1546 subtract any values that are valid for the
1547 requested attrs, and then add back the new
1548 set of attr values. */
1550 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
1551 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
1552 SELECT attr_vector_tmp - ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
1553 attr_vector := attr_vector || attr_vector_tmp;
1556 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
1557 -- attributes can depend on earlier ones.
1558 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
1560 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
1562 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
1563 NEXT WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
1565 IF attr_def.filter THEN
1566 IF attr_vector @@ tmp_val::query_int THEN
1567 attr_vector = attr_vector + intset(ccvm_row.id);
1568 EXIT WHEN NOT attr_def.multi;
1572 IF attr_def.sorter THEN
1573 IF attr_vector ~~ tmp_val THEN
1574 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
1575 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
1583 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
1584 IF rdeleted THEN -- initial insert OR revivication
1585 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
1586 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
1588 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
1594 $$ LANGUAGE PLPGSQL;
1597 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
1598 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
1601 IF NEW.deleted IS TRUE THEN -- If this bib is deleted
1602 PERFORM * FROM config.internal_flag WHERE
1603 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
1605 -- One needs to keep these around to support searches
1606 -- with the #deleted modifier, so one should turn on the named
1607 -- internal flag for that functionality.
1608 DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id;
1609 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
1612 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
1613 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
1614 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
1615 RETURN NEW; -- and we're done
1618 IF TG_OP = 'UPDATE' THEN -- re-ingest?
1619 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
1621 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
1626 -- Record authority linking
1627 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
1629 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
1632 -- Flatten and insert the mfr data
1633 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
1635 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
1637 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
1638 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
1640 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
1644 -- Gather and insert the field entry data
1645 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
1647 -- Located URI magic
1648 IF TG_OP = 'INSERT' THEN
1649 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1651 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1654 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
1656 PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor );
1660 -- (re)map metarecord-bib linking
1661 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
1662 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
1664 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1666 ELSE -- we're doing an update, and we're not deleted, remap
1667 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
1669 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
1675 $func$ LANGUAGE PLPGSQL;
1677 CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
1683 SELECT n.func AS func,
1684 n.param_count AS param_count,
1686 FROM config.index_normalizer n
1687 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1688 WHERE m.field = mapped_field AND m.pos < 0
1691 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1692 quote_literal( facet_text ) ||
1694 WHEN normalizer.param_count > 0
1695 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1698 ')' INTO facet_text;
1705 $$ LANGUAGE PLPGSQL;
1708 -- This mimics a specific part of QueryParser, turning the first part of a
1709 -- classed search (search_class) into a set of classes and possibly fields.
1710 -- search_class might look like "author" or "title|proper" or "ti|uniform"
1711 -- or "au" or "au|corporate|personal" or anything like that, where the first
1712 -- element of the list you get by separating on the "|" character is either
1713 -- a registered class (config.metabib_class) or an alias
1714 -- (config.metabib_search_alias), and the rest of any such elements are
1715 -- fields (config.metabib_field).
1717 FUNCTION metabib.search_class_to_registered_components(search_class TEXT)
1718 RETURNS SETOF RECORD AS $func$
1720 search_parts TEXT[];
1722 search_part_count INTEGER;
1724 registered_class config.metabib_class%ROWTYPE;
1725 registered_alias config.metabib_search_alias%ROWTYPE;
1726 registered_field config.metabib_field%ROWTYPE;
1728 search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|');
1730 search_part_count := ARRAY_LENGTH(search_parts, 1);
1731 IF search_part_count = 0 THEN
1734 SELECT INTO registered_class
1735 * FROM config.metabib_class WHERE name = search_parts[1];
1737 IF search_part_count < 2 THEN -- all fields
1738 rec := (registered_class.name, NULL::INTEGER);
1742 FOR field_name IN SELECT *
1743 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1744 SELECT INTO registered_field
1745 * FROM config.metabib_field
1746 WHERE name = field_name AND
1747 field_class = registered_class.name;
1749 rec := (registered_class.name, registered_field.id);
1754 -- maybe we have an alias?
1755 SELECT INTO registered_alias
1756 * FROM config.metabib_search_alias WHERE alias=search_parts[1];
1760 IF search_part_count < 2 THEN -- return w/e the alias says
1762 registered_alias.field_class, registered_alias.field
1767 FOR field_name IN SELECT *
1768 FROM UNNEST(search_parts[2:search_part_count]) LOOP
1769 SELECT INTO registered_field
1770 * FROM config.metabib_field
1771 WHERE name = field_name AND
1772 field_class = registered_alias.field_class;
1775 registered_alias.field_class,
1786 $func$ LANGUAGE PLPGSQL ROWS 1;
1789 -- Given a string such as a user might type into a search box, prepare
1790 -- two changed variants for TO_TSQUERY(). See
1791 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
1792 -- The first variant is normalized to match indexed documents regardless
1793 -- of diacritics. The second variant keeps its diacritics for proper
1794 -- highlighting via TS_HEADLINE().
1796 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
1799 orig_ended_in_space BOOLEAN;
1804 orig_ended_in_space := orig ~ E'\\s$';
1806 orig := ARRAY_TO_STRING(
1807 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
1810 normalized := public.naco_normalize(orig); -- also trim()s
1811 plain := trim(orig);
1813 IF NOT orig_ended_in_space THEN
1814 plain := plain || ':*';
1815 normalized := normalized || ':*';
1818 plain := ARRAY_TO_STRING(
1819 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
1821 normalized := ARRAY_TO_STRING(
1822 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
1825 RETURN ARRAY[normalized, plain];
1827 $$ LANGUAGE PLPGSQL;
1831 FUNCTION metabib.suggest_browse_entries(
1832 raw_query_text TEXT, -- actually typed by humans at the UI level
1833 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
1834 headline_opts TEXT, -- markup options for ts_headline()
1835 visibility_org INTEGER,-- null if you don't want opac visibility test
1836 query_limit INTEGER,-- use in LIMIT clause of interal query
1837 normalization INTEGER -- argument to TS_RANK_CD()
1839 value TEXT, -- plain
1841 buoyant_and_class_match BOOL,
1843 field_weight INTEGER,
1846 match TEXT -- marked up
1849 prepared_query_texts TEXT[];
1851 plain_query TSQUERY;
1852 opac_visibility_join TEXT;
1853 search_class_join TEXT;
1856 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
1858 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
1859 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
1861 visibility_org := NULLIF(visibility_org,-1);
1862 IF visibility_org IS NOT NULL THEN
1863 opac_visibility_join := '
1864 JOIN asset.opac_visible_copies aovc ON (
1865 aovc.record = x.source AND
1866 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
1869 opac_visibility_join := '';
1872 -- The following determines whether we only provide suggestsons matching
1873 -- the user's selected search_class, or whether we show other suggestions
1874 -- too. The reason for MIN() is that for search_classes like
1875 -- 'title|proper|uniform' you would otherwise get multiple rows. The
1876 -- implication is that if title as a class doesn't have restrict,
1877 -- nor does the proper field, but the uniform field does, you're going
1878 -- to get 'false' for your overall evaluation of 'should we restrict?'
1879 -- To invert that, change from MIN() to MAX().
1883 MIN(cmc.restrict::INT) AS restrict_class,
1884 MIN(cmf.restrict::INT) AS restrict_field
1885 FROM metabib.search_class_to_registered_components(search_class)
1886 AS _registered (field_class TEXT, field INT)
1888 config.metabib_class cmc ON (cmc.name = _registered.field_class)
1890 config.metabib_field cmf ON (cmf.id = _registered.field);
1892 -- evaluate 'should we restrict?'
1893 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
1894 search_class_join := '
1896 metabib.search_class_to_registered_components($2)
1897 AS _registered (field_class TEXT, field INT) ON (
1898 (_registered.field IS NULL AND
1899 _registered.field_class = cmf.field_class) OR
1900 (_registered.field = cmf.id)
1904 search_class_join := '
1906 metabib.search_class_to_registered_components($2)
1907 AS _registered (field_class TEXT, field INT) ON (
1908 _registered.field_class = cmc.name
1913 RETURN QUERY EXECUTE '
1922 TS_HEADLINE(value, $7, $3)
1923 FROM (SELECT DISTINCT
1926 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
1927 _registered.field = cmf.id AS restrict,
1929 TS_RANK_CD(mbe.index_vector, $1, $6),
1932 FROM metabib.browse_entry_def_map mbedm
1933 JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
1934 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
1935 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
1936 ' || search_class_join || '
1937 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1939 ' || opac_visibility_join || '
1940 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
1942 ' -- sic, repeat the order by clause in the outer select too
1944 query, search_class, headline_opts,
1945 visibility_org, query_limit, normalization, plain_query
1949 -- buoyant AND chosen class = match class
1950 -- chosen field = match field
1957 $func$ LANGUAGE PLPGSQL;
1959 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
1963 temp_vector TEXT := '';
1969 NEW.index_vector = ''::tsvector;
1971 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
1973 SELECT n.func AS func,
1974 n.param_count AS param_count,
1976 FROM config.index_normalizer n
1977 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1978 WHERE field = NEW.field AND m.pos < 0
1980 EXECUTE 'SELECT ' || normalizer.func || '(' ||
1981 quote_literal( value ) ||
1983 WHEN normalizer.param_count > 0
1984 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
1994 SELECT n.func AS func,
1995 n.param_count AS param_count,
1997 FROM config.index_normalizer n
1998 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
1999 WHERE field = NEW.field AND m.pos >= 0
2001 EXECUTE 'SELECT ' || normalizer.func || '(' ||
2002 quote_literal( value ) ||
2004 WHEN normalizer.param_count > 0
2005 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
2013 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
2014 value := ARRAY_TO_STRING(
2015 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
2017 value := public.search_normalize(value);
2018 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2019 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
2021 SELECT ts_config, index_weight
2022 FROM config.metabib_class_ts_map
2023 WHERE field_class = TG_ARGV[0]
2024 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
2025 AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
2027 SELECT ts_config, index_weight
2028 FROM config.metabib_field_ts_map
2029 WHERE metabib_field = NEW.field
2030 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
2031 ORDER BY index_weight ASC
2033 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
2034 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2037 cur_weight = ts_rec.index_weight;
2038 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
2040 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
2042 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
2047 $$ LANGUAGE PLPGSQL;
2050 CREATE TYPE metabib.flat_browse_entry_appearance AS (
2051 browse_entry BIGINT,
2056 sources INT, -- visible ones, that is
2057 asources INT, -- visible ones, that is
2058 row_number INT, -- internal use, sort of
2059 accurate BOOL, -- Count in sources field is accurate? Not
2060 -- if we had more than a browse superpage
2061 -- of records to look at.
2062 aaccurate BOOL, -- See previous comment...
2067 CREATE OR REPLACE FUNCTION metabib.browse_bib_pivot(
2070 ) RETURNS BIGINT AS $p$
2072 FROM metabib.browse_entry mbe
2073 JOIN metabib.browse_entry_def_map mbedm ON (
2074 mbedm.entry = mbe.id
2075 AND mbedm.def = ANY($1)
2077 WHERE mbe.sort_value >= public.naco_normalize($2)
2078 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2079 $p$ LANGUAGE SQL STABLE;
2081 CREATE OR REPLACE FUNCTION metabib.browse_authority_pivot(
2084 ) RETURNS BIGINT AS $p$
2086 FROM metabib.browse_entry mbe
2087 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2088 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2089 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2090 ash.atag = map.authority_field
2091 AND map.metabib_field = ANY($1)
2093 WHERE mbe.sort_value >= public.naco_normalize($2)
2094 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2095 $p$ LANGUAGE SQL STABLE;
2097 CREATE OR REPLACE FUNCTION metabib.browse_authority_refs_pivot(
2100 ) RETURNS BIGINT AS $p$
2102 FROM metabib.browse_entry mbe
2103 JOIN metabib.browse_entry_simple_heading_map mbeshm ON ( mbeshm.entry = mbe.id )
2104 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2105 JOIN authority.control_set_auth_field_metabib_field_map_refs_only map ON (
2106 ash.atag = map.authority_field
2107 AND map.metabib_field = ANY($1)
2109 WHERE mbe.sort_value >= public.naco_normalize($2)
2110 ORDER BY mbe.sort_value, mbe.value LIMIT 1;
2111 $p$ LANGUAGE SQL STABLE;
2113 CREATE OR REPLACE FUNCTION metabib.browse_pivot(
2116 ) RETURNS BIGINT AS $p$
2117 SELECT id FROM metabib.browse_entry
2119 metabib.browse_bib_pivot($1, $2),
2120 metabib.browse_authority_refs_pivot($1,$2) -- only look in 4xx, 5xx, 7xx of authority
2122 ORDER BY sort_value, value LIMIT 1;
2123 $p$ LANGUAGE SQL STABLE;
2126 CREATE OR REPLACE FUNCTION metabib.staged_browse(
2130 context_locations INT[],
2132 browse_superpage_size INT,
2133 count_up_from_zero BOOL, -- if false, count down from -1
2136 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2144 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
2145 results_skipped INT := 0;
2146 row_counter INT := 0;
2151 all_records BIGINT[];
2152 all_brecords BIGINT[];
2153 all_arecords BIGINT[];
2154 superpage_of_records BIGINT[];
2157 IF count_up_from_zero THEN
2163 OPEN curs FOR EXECUTE query;
2166 FETCH curs INTO rec;
2168 IF result_row.pivot_point IS NOT NULL THEN
2169 RETURN NEXT result_row;
2175 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
2176 SELECT INTO all_arecords, result_row.sees, afields
2177 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
2178 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
2179 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
2181 FROM metabib.browse_entry_simple_heading_map mbeshm
2182 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2183 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
2184 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
2185 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2186 ash.atag = map.authority_field
2187 AND map.metabib_field = ANY(fields)
2189 WHERE mbeshm.entry = rec.id;
2192 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
2193 SELECT INTO all_brecords, result_row.authorities, bfields
2194 ARRAY_AGG(DISTINCT source),
2195 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
2196 ARRAY_AGG(DISTINCT def)
2197 FROM metabib.browse_entry_def_map
2198 WHERE entry = rec.id
2199 AND def = ANY(fields);
2201 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
2203 result_row.sources := 0;
2204 result_row.asources := 0;
2206 -- Bib-linked vis checking
2207 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
2209 full_end := ARRAY_LENGTH(all_brecords, 1);
2210 superpage_size := COALESCE(browse_superpage_size, full_end);
2212 slice_end := superpage_size;
2214 WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
2215 superpage_of_records := all_brecords[slice_start:slice_end];
2217 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2218 '1::INT AS rel FROM (SELECT UNNEST(' ||
2219 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2221 -- We use search.query_parser_fts() for visibility testing.
2222 -- We're calling it once per browse-superpage worth of records
2223 -- out of the set of records related to a given mbe, until we've
2224 -- either exhausted that set of records or found at least 1
2227 SELECT INTO result_row.sources visible
2228 FROM search.query_parser_fts(
2229 context_org, NULL, qpfts_query, NULL,
2230 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2232 WHERE qpfts.rel IS NULL;
2234 slice_start := slice_start + superpage_size;
2235 slice_end := slice_end + superpage_size;
2238 -- Accurate? Well, probably.
2239 result_row.accurate := browse_superpage_size IS NULL OR
2240 browse_superpage_size >= full_end;
2244 -- Authority-linked vis checking
2245 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
2247 full_end := ARRAY_LENGTH(all_arecords, 1);
2248 superpage_size := COALESCE(browse_superpage_size, full_end);
2250 slice_end := superpage_size;
2252 WHILE result_row.asources = 0 AND slice_start <= full_end LOOP
2253 superpage_of_records := all_arecords[slice_start:slice_end];
2255 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
2256 '1::INT AS rel FROM (SELECT UNNEST(' ||
2257 quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
2259 -- We use search.query_parser_fts() for visibility testing.
2260 -- We're calling it once per browse-superpage worth of records
2261 -- out of the set of records related to a given mbe, via
2262 -- authority until we've either exhausted that set of records
2263 -- or found at least 1 visible record.
2265 SELECT INTO result_row.asources visible
2266 FROM search.query_parser_fts(
2267 context_org, NULL, qpfts_query, NULL,
2268 context_locations, 0, NULL, NULL, FALSE, staff, FALSE
2270 WHERE qpfts.rel IS NULL;
2272 slice_start := slice_start + superpage_size;
2273 slice_end := slice_end + superpage_size;
2277 -- Accurate? Well, probably.
2278 result_row.aaccurate := browse_superpage_size IS NULL OR
2279 browse_superpage_size >= full_end;
2283 IF result_row.sources > 0 OR result_row.asources > 0 THEN
2285 -- The function that calls this function needs row_number in order
2286 -- to correctly order results from two different runs of this
2288 result_row.row_number := row_number;
2290 -- Now, if row_counter is still less than limit, return a row. If
2291 -- not, but it is less than next_pivot_pos, continue on without
2292 -- returning actual result rows until we find
2293 -- that next pivot, and return it.
2295 IF row_counter < result_limit THEN
2296 result_row.browse_entry := rec.id;
2297 result_row.value := rec.value;
2299 RETURN NEXT result_row;
2301 result_row.browse_entry := NULL;
2302 result_row.authorities := NULL;
2303 result_row.fields := NULL;
2304 result_row.value := NULL;
2305 result_row.sources := NULL;
2306 result_row.sees := NULL;
2307 result_row.accurate := NULL;
2308 result_row.aaccurate := NULL;
2309 result_row.pivot_point := rec.id;
2311 IF row_counter >= next_pivot_pos THEN
2312 RETURN NEXT result_row;
2317 IF count_up_from_zero THEN
2318 row_number := row_number + 1;
2320 row_number := row_number - 1;
2323 -- row_counter is different from row_number.
2324 -- It simply counts up from zero so that we know when
2325 -- we've reached our limit.
2326 row_counter := row_counter + 1;
2330 $p$ LANGUAGE PLPGSQL;
2333 CREATE OR REPLACE FUNCTION metabib.browse(
2336 context_org INT DEFAULT NULL,
2337 context_loc_group INT DEFAULT NULL,
2338 staff BOOL DEFAULT FALSE,
2339 pivot_id BIGINT DEFAULT NULL,
2340 result_limit INT DEFAULT 10
2341 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2346 pivot_sort_value TEXT;
2347 pivot_sort_fallback TEXT;
2348 context_locations INT[];
2349 browse_superpage_size INT;
2350 results_skipped INT := 0;
2354 forward_to_pivot INT;
2356 -- First, find the pivot if we were given a browse term but not a pivot.
2357 IF pivot_id IS NULL THEN
2358 pivot_id := metabib.browse_pivot(search_field, browse_term);
2361 SELECT INTO pivot_sort_value, pivot_sort_fallback
2362 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
2364 -- Bail if we couldn't find a pivot.
2365 IF pivot_sort_value IS NULL THEN
2369 -- Transform the context_loc_group argument (if any) (logc at the
2370 -- TPAC layer) into a form we'll be able to use.
2371 IF context_loc_group IS NOT NULL THEN
2372 SELECT INTO context_locations ARRAY_AGG(location)
2373 FROM asset.copy_location_group_map
2374 WHERE lgroup = context_loc_group;
2377 -- Get the configured size of browse superpages.
2378 SELECT INTO browse_superpage_size value -- NULL ok
2379 FROM config.global_flag
2380 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
2382 -- First we're going to search backward from the pivot, then we're going
2383 -- to search forward. In each direction, we need two limits. At the
2384 -- lesser of the two limits, we delineate the edge of the result set
2385 -- we're going to return. At the greater of the two limits, we find the
2386 -- pivot value that would represent an offset from the current pivot
2387 -- at a distance of one "page" in either direction, where a "page" is a
2388 -- result set of the size specified in the "result_limit" argument.
2390 -- The two limits in each direction make four derived values in total,
2391 -- and we calculate them now.
2392 back_limit := CEIL(result_limit::FLOAT / 2);
2393 back_to_pivot := result_limit;
2394 forward_limit := result_limit / 2;
2395 forward_to_pivot := result_limit - 1;
2397 -- This is the meat of the SQL query that finds browse entries. We'll
2398 -- pass this to a function which uses it with a cursor, so that individual
2399 -- rows may be fetched in a loop until some condition is satisfied, without
2400 -- waiting for a result set of fixed size to be collected all at once.
2405 FROM metabib.browse_entry mbe
2407 EXISTS ( -- are there any bibs using this mbe via the requested fields?
2409 FROM metabib.browse_entry_def_map mbedm
2410 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
2412 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
2414 FROM metabib.browse_entry_simple_heading_map mbeshm
2415 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
2416 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
2417 ash.atag = map.authority_field
2418 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
2420 WHERE mbeshm.entry = mbe.id
2424 -- This is the variant of the query for browsing backward.
2425 back_query := core_query ||
2426 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
2427 ' ORDER BY mbe.sort_value DESC, mbe.value DESC ';
2429 -- This variant browses forward.
2430 forward_query := core_query ||
2431 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
2432 ' ORDER BY mbe.sort_value, mbe.value ';
2434 -- We now call the function which applies a cursor to the provided
2435 -- queries, stopping at the appropriate limits and also giving us
2436 -- the next page's pivot.
2438 SELECT * FROM metabib.staged_browse(
2439 back_query, search_field, context_org, context_locations,
2440 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
2442 SELECT * FROM metabib.staged_browse(
2443 forward_query, search_field, context_org, context_locations,
2444 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
2445 ) ORDER BY row_number DESC;
2448 $p$ LANGUAGE PLPGSQL;
2450 CREATE OR REPLACE FUNCTION metabib.browse(
2453 context_org INT DEFAULT NULL,
2454 context_loc_group INT DEFAULT NULL,
2455 staff BOOL DEFAULT FALSE,
2456 pivot_id BIGINT DEFAULT NULL,
2457 result_limit INT DEFAULT 10
2458 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
2460 RETURN QUERY SELECT * FROM metabib.browse(
2461 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
2462 FROM config.metabib_field WHERE field_class = search_class),
2471 $p$ LANGUAGE PLPGSQL;