4 ALTER TABLE config.metabib_field
5 ADD COLUMN display_xpath TEXT,
6 ADD COLUMN display_field BOOL NOT NULL DEFAULT TRUE;
8 CREATE OR REPLACE FUNCTION
9 config.metabib_representative_field_is_valid(INTEGER, TEXT) RETURNS BOOLEAN AS $$
10 SELECT EXISTS (SELECT 1 FROM config.metabib_field WHERE id = $1 AND field_class = $2);
11 $$ LANGUAGE SQL STRICT IMMUTABLE;
13 COMMENT ON FUNCTION config.metabib_representative_field_is_valid(INTEGER, TEXT) IS $$
14 Ensure the field_class value on the selected representative field matches
18 ALTER TABLE config.metabib_class
19 ADD COLUMN representative_field
20 INTEGER REFERENCES config.metabib_field(id),
21 ADD CONSTRAINT rep_field_unique UNIQUE(representative_field),
22 ADD CONSTRAINT rep_field_is_valid CHECK (
23 representative_field IS NULL OR
24 config.metabib_representative_field_is_valid(representative_field, name)
27 CREATE TABLE metabib.display_entry (
28 id BIGSERIAL PRIMARY KEY,
29 source BIGINT NOT NULL,
34 CREATE INDEX metabib_display_entry_field_idx
35 ON metabib.display_entry (field);
36 CREATE INDEX metabib_display_entry_source_idx
37 ON metabib.display_entry (source);
39 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
43 display_field_text TEXT;
45 display_field_text := NEW.value;
48 SELECT n.func AS func,
49 n.param_count AS param_count,
51 FROM config.index_normalizer n
52 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
53 WHERE m.field = NEW.field AND m.pos < 0
56 EXECUTE 'SELECT ' || normalizer.func || '(' ||
57 quote_literal( display_field_text ) ||
59 WHEN normalizer.param_count > 0
60 THEN ',' || REPLACE(REPLACE(BTRIM(
61 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
64 ')' INTO display_field_text;
68 NEW.value = display_field_text;
74 CREATE TRIGGER display_field_normalize_tgr
75 BEFORE UPDATE OR INSERT ON metabib.display_entry
76 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
78 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
81 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
86 CREATE TRIGGER display_field_force_nfc_tgr
87 BEFORE UPDATE OR INSERT ON metabib.display_entry
88 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
90 ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE display_field BOOL;
92 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
94 bib biblio.record_entry%ROWTYPE;
95 idx config.metabib_field%ROWTYPE;
96 xfrm config.xml_transform%ROWTYPE;
100 xml_node_list TEXT[];
107 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
109 authority_link BIGINT;
110 output_row metabib.field_entry_template%ROWTYPE;
113 -- Start out with no field-use bools set
114 output_row.browse_field = FALSE;
115 output_row.facet_field = FALSE;
116 output_row.display_field = FALSE;
117 output_row.search_field = FALSE;
120 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
122 -- Loop over the indexing entries
123 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
125 joiner := COALESCE(idx.joiner, default_joiner);
127 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
129 -- See if we can skip the XSLT ... it's expensive
130 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
131 -- Can't skip the transform
132 IF xfrm.xslt <> '---' THEN
133 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
135 transformed_xml := bib.marc;
138 prev_xfrm := xfrm.name;
141 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
144 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
145 CONTINUE WHEN xml_node !~ E'^\\s*<';
147 -- XXX much of this should be moved into oils_xpath_string...
148 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
149 oils_xpath( '//text()',
151 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
152 REGEXP_REPLACE( -- This escapes embeded <s
154 $re$(>[^<]+)(<)([^>]+<)$re$,
170 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
172 IF raw_text IS NOT NULL THEN
173 raw_text := raw_text || joiner;
176 raw_text := COALESCE(raw_text,'') || curr_text;
178 -- autosuggest/metabib.browse_entry
179 IF idx.browse_field THEN
181 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
182 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
184 browse_text := curr_text;
187 IF idx.browse_sort_xpath IS NOT NULL AND
188 idx.browse_sort_xpath <> '' THEN
190 sort_value := oils_xpath_string(
191 idx.browse_sort_xpath, xml_node, joiner,
192 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
195 sort_value := browse_text;
198 output_row.field_class = idx.field_class;
199 output_row.field = idx.id;
200 output_row.source = rid;
201 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
202 output_row.sort_value :=
203 public.naco_normalize(sort_value);
205 output_row.authority := NULL;
207 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
208 authority_text := oils_xpath_string(
209 idx.authority_xpath, xml_node, joiner,
211 ARRAY[xfrm.prefix, xfrm.namespace_uri],
212 ARRAY['xlink','http://www.w3.org/1999/xlink']
216 IF authority_text ~ '^\d+$' THEN
217 authority_link := authority_text::BIGINT;
218 PERFORM * FROM authority.record_entry WHERE id = authority_link;
220 output_row.authority := authority_link;
226 output_row.browse_field = TRUE;
227 -- Returning browse rows with search_field = true for search+browse
228 -- configs allows us to retain granularity of being able to search
229 -- browse fields with "starts with" type operators (for example, for
230 -- titles of songs in music albums)
231 IF idx.search_field THEN
232 output_row.search_field = TRUE;
234 RETURN NEXT output_row;
235 output_row.browse_field = FALSE;
236 output_row.search_field = FALSE;
237 output_row.sort_value := NULL;
240 -- insert raw node text for faceting
241 IF idx.facet_field THEN
243 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
244 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
246 facet_text := curr_text;
249 output_row.field_class = idx.field_class;
250 output_row.field = -1 * idx.id;
251 output_row.source = rid;
252 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
254 output_row.facet_field = TRUE;
255 RETURN NEXT output_row;
256 output_row.facet_field = FALSE;
259 -- insert raw node text for display
260 IF idx.display_field THEN
262 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
263 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
265 display_text := curr_text;
268 output_row.field_class = idx.field_class;
269 output_row.field = -1 * idx.id;
270 output_row.source = rid;
271 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
273 output_row.display_field = TRUE;
274 RETURN NEXT output_row;
275 output_row.display_field = FALSE;
280 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
282 -- insert combined node text for searching
283 IF idx.search_field THEN
284 output_row.field_class = idx.field_class;
285 output_row.field = idx.id;
286 output_row.source = rid;
287 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
289 output_row.search_field = TRUE;
290 RETURN NEXT output_row;
291 output_row.search_field = FALSE;
298 $func$ LANGUAGE PLPGSQL;
300 DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL);
302 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
303 bib_id BIGINT, skip_facet BOOL DEFAULT FALSE,
304 skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE,
305 skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
308 ind_data metabib.field_entry_template%ROWTYPE;
309 mbe_row metabib.browse_entry%ROWTYPE;
318 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
319 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
320 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
321 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
323 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
325 IF NOT b_skip_search THEN
326 FOR fclass IN SELECT * FROM config.metabib_class LOOP
327 -- RAISE NOTICE 'Emptying out %', fclass.name;
328 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
331 IF NOT b_skip_facet THEN
332 DELETE FROM metabib.facet_entry WHERE source = bib_id;
334 IF NOT b_skip_display THEN
335 DELETE FROM metabib.display_entry WHERE source = bib_id;
337 IF NOT b_skip_browse THEN
338 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
342 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
343 IF ind_data.field < 0 THEN
344 ind_data.field = -1 * ind_data.field;
347 IF ind_data.facet_field AND NOT b_skip_facet THEN
348 INSERT INTO metabib.facet_entry (field, source, value)
349 VALUES (ind_data.field, ind_data.source, ind_data.value);
352 IF ind_data.display_field AND NOT b_skip_display THEN
353 INSERT INTO metabib.display_entry (field, source, value)
354 VALUES (ind_data.field, ind_data.source, ind_data.value);
358 IF ind_data.browse_field AND NOT b_skip_browse THEN
359 -- A caveat about this SELECT: this should take care of replacing
360 -- old mbe rows when data changes, but not if normalization (by
361 -- which I mean specifically the output of
362 -- evergreen.oils_tsearch2()) changes. It may or may not be
363 -- expensive to add a comparison of index_vector to index_vector
364 -- to the WHERE clause below.
366 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
367 SELECT INTO mbe_row * FROM metabib.browse_entry
368 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
371 mbe_id := mbe_row.id;
373 INSERT INTO metabib.browse_entry
374 ( value, sort_value ) VALUES
375 ( value_prepped, ind_data.sort_value );
377 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
380 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
381 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
384 IF ind_data.search_field AND NOT b_skip_search THEN
385 -- Avoid inserting duplicate rows
386 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
387 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
388 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
389 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
390 IF mbe_id IS NULL THEN
392 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
394 quote_literal(ind_data.field) || $$, $$ ||
395 quote_literal(ind_data.source) || $$, $$ ||
396 quote_literal(ind_data.value) ||
403 IF NOT b_skip_search THEN
404 PERFORM metabib.update_combined_index_vectors(bib_id);
409 $func$ LANGUAGE PLPGSQL;
412 -- DATA --------------------------------------
414 UPDATE config.metabib_field SET display_field = FALSE
415 WHERE field_class = 'keyword' OR name = 'complete';
417 INSERT INTO config.internal_flag (name, enabled)
418 VALUES ('ingest.skip_display_indexing', FALSE);
421 UPDATE config.metabib_class SET representative_field = 8 WHERE name = 'author';
423 UPDATE config.metabib_class SET representative_field = 6 WHERE name = 'title';
429 -- Ham-fisted reingest for Testing ---------------------
431 -- disable everything we can for reindexing
432 UPDATE config.internal_flag SET enabled = TRUE WHERE name IN (
433 'ingest.assume_inserts_only',
434 'ingest.disable_authority_auto_update',
435 'ingest.disable_authority_linking',
436 'ingest.disable_located_uri',
437 'ingest.disable_metabib_field_entry',
438 'ingest.disable_metabib_full_rec',
439 'ingest.disable_metabib_rec_descriptor',
440 'ingest.metarecord_mapping.preserve_on_delete',
441 'ingest.metarecord_mapping.skip_on_insert',
442 'ingest.metarecord_mapping.skip_on_update',
443 'ingest.reingest.force_on_same_marc',
444 'ingest.skip_browse_indexing',
445 'ingest.skip_facet_indexing',
446 'ingest.skip_search_indexing'
449 UPDATE config.internal_flag SET enabled = TRUE
450 WHERE name = 'ingest.reingest.force_on_same_marc';
452 UPDATE biblio.record_entry SET marc = marc;
454 UPDATE config.internal_flag SET enabled = FALSE
455 WHERE name = 'ingest.reingest.force_on_same_marc';
457 -- re-enable the default ingest flags
458 UPDATE config.internal_flag SET enabled = FALSE WHERE name IN (
459 'ingest.assume_inserts_only',
460 'ingest.disable_authority_auto_update',
461 'ingest.disable_authority_linking',
462 'ingest.disable_located_uri',
463 'ingest.disable_metabib_field_entry',
464 'ingest.disable_metabib_full_rec',
465 'ingest.disable_metabib_rec_descriptor',
466 'ingest.metarecord_mapping.preserve_on_delete',
467 'ingest.metarecord_mapping.skip_on_insert',
468 'ingest.metarecord_mapping.skip_on_update',
469 'ingest.reingest.force_on_same_marc',
470 'ingest.skip_browse_indexing',
471 'ingest.skip_facet_indexing',
472 'ingest.skip_search_indexing'