4 CREATE OR REPLACE FUNCTION
5 config.metabib_representative_field_is_valid(INTEGER, TEXT) RETURNS BOOLEAN AS $$
6 SELECT EXISTS (SELECT 1 FROM config.metabib_field WHERE id = $1 AND field_class = $2);
7 $$ LANGUAGE SQL STRICT IMMUTABLE;
9 COMMENT ON FUNCTION config.metabib_representative_field_is_valid(INTEGER, TEXT) IS $$
10 Ensure the field_class value on the selected representative field matches
14 ALTER TABLE config.metabib_class
15 ADD COLUMN representative_field
16 INTEGER REFERENCES config.metabib_field(id),
17 ADD CONSTRAINT rep_field_unique UNIQUE(representative_field),
18 ADD CONSTRAINT rep_field_is_valid CHECK (
19 representative_field IS NULL OR
20 config.metabib_representative_field_is_valid(representative_field, name)
24 ALTER TABLE config.metabib_field
25 ADD COLUMN display_xpath TEXT,
26 ADD COLUMN display_field BOOL NOT NULL DEFAULT FALSE;
28 CREATE TABLE config.display_field_map (
29 name TEXT PRIMARY KEY,
30 field INTEGER REFERENCES config.metabib_field (id),
31 multi BOOLEAN DEFAULT FALSE
34 CREATE TABLE metabib.display_entry (
35 id BIGSERIAL PRIMARY KEY,
36 source BIGINT NOT NULL REFERENCES biblio.record_entry (id),
37 field INT NOT NULL REFERENCES config.metabib_field (id),
41 CREATE INDEX metabib_display_entry_field_idx ON metabib.display_entry (field);
42 CREATE INDEX metabib_display_entry_source_idx ON metabib.display_entry (source);
44 -- one row per display entry fleshed with field info
45 CREATE VIEW metabib.flat_display_entry AS
53 FROM metabib.display_entry mde
54 JOIN config.metabib_field cmf ON (cmf.id = mde.field)
55 JOIN config.display_field_map cdfm ON (cdfm.field = mde.field)
58 -- like flat_display_entry except values are compressed
59 -- into one row per display_field_map and JSON-ified.
60 CREATE VIEW metabib.compressed_display_entry AS
68 TO_JSON(ARRAY_AGG(value))
72 FROM metabib.flat_display_entry
73 GROUP BY 1, 2, 3, 4, 5
76 -- TODO: expand to encompass all well-known fields
77 CREATE VIEW metabib.wide_display_entry AS
80 COALESCE(mcde_title.value, 'null') AS title,
81 COALESCE(mcde_author.value, 'null') AS author,
82 COALESCE(mcde_subject.value, 'null') AS subject,
83 COALESCE(mcde_topic_subject.value, 'null') AS topic_subject,
84 COALESCE(mcde_isbn.value, 'null') AS isbn
85 -- ensure one row per bre regardless of any display fields
86 FROM biblio.record_entry bre
87 LEFT JOIN metabib.compressed_display_entry mcde_title
88 ON (bre.id = mcde_title.source AND mcde_title.name = 'title')
89 LEFT JOIN metabib.compressed_display_entry mcde_author
90 ON (bre.id = mcde_author.source AND mcde_author.name = 'author')
91 LEFT JOIN metabib.compressed_display_entry mcde_subject
92 ON (bre.id = mcde_subject.source AND mcde_subject.name = 'subject')
93 LEFT JOIN metabib.compressed_display_entry mcde_topic_subject
94 ON (bre.id = mcde_topic_subject.source AND mcde_topic_subject.name = 'topic_subject')
95 LEFT JOIN metabib.compressed_display_entry mcde_isbn
96 ON (bre.id = mcde_isbn.source AND mcde_isbn.name = 'isbn')
100 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
101 RETURNS TRIGGER AS $$
104 display_field_text TEXT;
106 display_field_text := NEW.value;
109 SELECT n.func AS func,
110 n.param_count AS param_count,
112 FROM config.index_normalizer n
113 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
114 WHERE m.field = NEW.field AND m.pos < 0
117 EXECUTE 'SELECT ' || normalizer.func || '(' ||
118 quote_literal( display_field_text ) ||
120 WHEN normalizer.param_count > 0
121 THEN ',' || REPLACE(REPLACE(BTRIM(
122 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
125 ')' INTO display_field_text;
129 NEW.value = display_field_text;
135 CREATE TRIGGER display_field_normalize_tgr
136 BEFORE UPDATE OR INSERT ON metabib.display_entry
137 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
139 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
140 RETURNS TRIGGER AS $$
142 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
147 CREATE TRIGGER display_field_force_nfc_tgr
148 BEFORE UPDATE OR INSERT ON metabib.display_entry
149 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
151 ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE display_field BOOL;
153 DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL);
154 DROP FUNCTION biblio.extract_metabib_field_entry(BIGINT);
155 DROP FUNCTION biblio.extract_metabib_field_entry(BIGINT, TEXT);
157 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
162 ) RETURNS SETOF metabib.field_entry_template AS $func$
164 bib biblio.record_entry%ROWTYPE;
165 idx config.metabib_field%ROWTYPE;
166 xfrm config.xml_transform%ROWTYPE;
168 transformed_xml TEXT;
170 xml_node_list TEXT[];
177 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
179 authority_link BIGINT;
180 output_row metabib.field_entry_template%ROWTYPE;
184 -- Start out with no field-use bools set
185 output_row.browse_field = FALSE;
186 output_row.facet_field = FALSE;
187 output_row.display_field = FALSE;
188 output_row.search_field = FALSE;
191 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
193 -- Loop over the indexing entries
194 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
196 process_idx := FALSE;
197 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
198 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
199 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
200 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
201 CONTINUE WHEN process_idx = FALSE;
203 joiner := COALESCE(idx.joiner, default_joiner);
205 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
207 -- See if we can skip the XSLT ... it's expensive
208 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
209 -- Can't skip the transform
210 IF xfrm.xslt <> '---' THEN
211 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
213 transformed_xml := bib.marc;
216 prev_xfrm := xfrm.name;
219 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
222 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
223 CONTINUE WHEN xml_node !~ E'^\\s*<';
225 -- XXX much of this should be moved into oils_xpath_string...
226 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
227 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
228 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
229 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
233 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
235 IF raw_text IS NOT NULL THEN
236 raw_text := raw_text || joiner;
239 raw_text := COALESCE(raw_text,'') || curr_text;
241 -- autosuggest/metabib.browse_entry
242 IF idx.browse_field THEN
244 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
245 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
247 browse_text := curr_text;
250 IF idx.browse_sort_xpath IS NOT NULL AND
251 idx.browse_sort_xpath <> '' THEN
253 sort_value := oils_xpath_string(
254 idx.browse_sort_xpath, xml_node, joiner,
255 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
258 sort_value := browse_text;
261 output_row.field_class = idx.field_class;
262 output_row.field = idx.id;
263 output_row.source = rid;
264 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
265 output_row.sort_value :=
266 public.naco_normalize(sort_value);
268 output_row.authority := NULL;
270 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
271 authority_text := oils_xpath_string(
272 idx.authority_xpath, xml_node, joiner,
274 ARRAY[xfrm.prefix, xfrm.namespace_uri],
275 ARRAY['xlink','http://www.w3.org/1999/xlink']
279 IF authority_text ~ '^\d+$' THEN
280 authority_link := authority_text::BIGINT;
281 PERFORM * FROM authority.record_entry WHERE id = authority_link;
283 output_row.authority := authority_link;
289 output_row.browse_field = TRUE;
290 -- Returning browse rows with search_field = true for search+browse
291 -- configs allows us to retain granularity of being able to search
292 -- browse fields with "starts with" type operators (for example, for
293 -- titles of songs in music albums)
294 IF idx.search_field THEN
295 output_row.search_field = TRUE;
297 RETURN NEXT output_row;
298 output_row.browse_field = FALSE;
299 output_row.search_field = FALSE;
300 output_row.sort_value := NULL;
303 -- insert raw node text for faceting
304 IF idx.facet_field THEN
306 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
307 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
309 facet_text := curr_text;
312 output_row.field_class = idx.field_class;
313 output_row.field = -1 * idx.id;
314 output_row.source = rid;
315 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
317 output_row.facet_field = TRUE;
318 RETURN NEXT output_row;
319 output_row.facet_field = FALSE;
322 -- insert raw node text for display
323 IF idx.display_field THEN
325 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
326 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
328 display_text := curr_text;
331 output_row.field_class = idx.field_class;
332 output_row.field = -1 * idx.id;
333 output_row.source = rid;
334 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
336 output_row.display_field = TRUE;
337 RETURN NEXT output_row;
338 output_row.display_field = FALSE;
343 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
345 -- insert combined node text for searching
346 IF idx.search_field THEN
347 output_row.field_class = idx.field_class;
348 output_row.field = idx.id;
349 output_row.source = rid;
350 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
352 output_row.search_field = TRUE;
353 RETURN NEXT output_row;
354 output_row.search_field = FALSE;
361 $func$ LANGUAGE PLPGSQL;
363 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
365 skip_facet BOOL DEFAULT FALSE,
366 skip_display BOOL DEFAULT FALSE,
367 skip_browse BOOL DEFAULT FALSE,
368 skip_search BOOL DEFAULT FALSE,
369 only_fields INT[] DEFAULT '{}'::INT[]
370 ) RETURNS VOID AS $func$
373 ind_data metabib.field_entry_template%ROWTYPE;
374 mbe_row metabib.browse_entry%ROWTYPE;
381 field_list INT[] := only_fields;
382 field_types TEXT[] := '{}'::TEXT[];
385 IF field_list = '{}'::INT[] THEN
386 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
389 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
390 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
391 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
392 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
394 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
395 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
396 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
397 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
399 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
401 IF NOT b_skip_search THEN
402 FOR fclass IN SELECT * FROM config.metabib_class LOOP
403 -- RAISE NOTICE 'Emptying out %', fclass.name;
404 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
407 IF NOT b_skip_facet THEN
408 DELETE FROM metabib.facet_entry WHERE source = bib_id;
410 IF NOT b_skip_display THEN
411 DELETE FROM metabib.display_entry WHERE source = bib_id;
413 IF NOT b_skip_browse THEN
414 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
418 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
420 -- don't store what has been normalized away
421 CONTINUE WHEN ind_data.value IS NULL;
423 IF ind_data.field < 0 THEN
424 ind_data.field = -1 * ind_data.field;
427 IF ind_data.facet_field AND NOT b_skip_facet THEN
428 INSERT INTO metabib.facet_entry (field, source, value)
429 VALUES (ind_data.field, ind_data.source, ind_data.value);
432 IF ind_data.display_field AND NOT b_skip_display THEN
433 INSERT INTO metabib.display_entry (field, source, value)
434 VALUES (ind_data.field, ind_data.source, ind_data.value);
438 IF ind_data.browse_field AND NOT b_skip_browse THEN
439 -- A caveat about this SELECT: this should take care of replacing
440 -- old mbe rows when data changes, but not if normalization (by
441 -- which I mean specifically the output of
442 -- evergreen.oils_tsearch2()) changes. It may or may not be
443 -- expensive to add a comparison of index_vector to index_vector
444 -- to the WHERE clause below.
446 CONTINUE WHEN ind_data.sort_value IS NULL;
448 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
449 SELECT INTO mbe_row * FROM metabib.browse_entry
450 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
453 mbe_id := mbe_row.id;
455 INSERT INTO metabib.browse_entry
456 ( value, sort_value ) VALUES
457 ( value_prepped, ind_data.sort_value );
459 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
462 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
463 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
466 IF ind_data.search_field AND NOT b_skip_search THEN
467 -- Avoid inserting duplicate rows
468 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
469 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
470 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
471 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
472 IF mbe_id IS NULL THEN
474 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
476 quote_literal(ind_data.field) || $$, $$ ||
477 quote_literal(ind_data.source) || $$, $$ ||
478 quote_literal(ind_data.value) ||
485 IF NOT b_skip_search THEN
486 PERFORM metabib.update_combined_index_vectors(bib_id);
491 $func$ LANGUAGE PLPGSQL;
493 -- AFTER UPDATE OR INSERT trigger for biblio.record_entry
494 CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
499 IF NEW.deleted THEN -- If this bib is deleted
501 PERFORM * FROM config.internal_flag WHERE
502 name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
504 tmp_bool := FOUND; -- Just in case this is changed by some other statement
506 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool );
509 -- One needs to keep these around to support searches
510 -- with the #deleted modifier, so one should turn on the named
511 -- internal flag for that functionality.
512 DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id;
515 DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible
516 DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items
517 DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs
518 RETURN NEW; -- and we're done
521 IF TG_OP = 'UPDATE' THEN -- re-ingest?
522 PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled;
524 IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change
529 -- Record authority linking
530 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled;
532 PERFORM biblio.map_authority_linking( NEW.id, NEW.marc );
535 -- Flatten and insert the mfr data
536 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled;
538 PERFORM metabib.reingest_metabib_full_rec(NEW.id);
540 -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields
541 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled;
543 PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted);
547 -- Gather and insert the field entry data
548 PERFORM metabib.reingest_metabib_field_entries(NEW.id);
551 PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled;
552 IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF;
554 -- (re)map metarecord-bib linking
555 IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag
556 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled;
558 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
560 ELSE -- we're doing an update, and we're not deleted, remap
561 PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled;
563 PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint );
569 $func$ LANGUAGE PLPGSQL;