4 CREATE TABLE metabib.display_entry (
5 id BIGSERIAL PRIMARY KEY,
6 source BIGINT NOT NULL,
11 CREATE INDEX metabib_display_entry_field_idx
12 ON metabib.display_entry (field);
13 CREATE INDEX metabib_display_entry_value_idx
14 ON metabib.display_entry (SUBSTRING(value,1,1024));
15 CREATE INDEX metabib_display_entry_source_idx
16 ON metabib.display_entry (source);
18 CREATE OR REPLACE FUNCTION metabib.display_field_normalize_trigger ()
22 display_field_text TEXT;
24 display_field_text := NEW.value;
27 SELECT n.func AS func,
28 n.param_count AS param_count,
30 FROM config.index_normalizer n
31 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
32 WHERE m.field = NEW.field AND m.pos < 0
35 EXECUTE 'SELECT ' || normalizer.func || '(' ||
36 quote_literal( display_field_text ) ||
38 WHEN normalizer.param_count > 0
39 THEN ',' || REPLACE(REPLACE(BTRIM(
40 normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
43 ')' INTO display_field_text;
47 NEW.value = display_field_text;
53 CREATE TRIGGER display_field_normalize_tgr
54 BEFORE UPDATE OR INSERT ON metabib.display_entry
55 FOR EACH ROW EXECUTE PROCEDURE metabib.display_field_normalize_trigger();
57 CREATE OR REPLACE FUNCTION evergreen.display_field_force_nfc()
60 NEW.value := force_unicode_normal_form(NEW.value,'NFC');
65 CREATE TRIGGER display_field_force_nfc_tgr
66 BEFORE UPDATE OR INSERT ON metabib.display_entry
67 FOR EACH ROW EXECUTE PROCEDURE evergreen.display_field_force_nfc();
69 ALTER TABLE config.metabib_field
70 ADD COLUMN display_field BOOL NOT NULL DEFAULT TRUE;
72 ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE display_field BOOL;
74 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
76 bib biblio.record_entry%ROWTYPE;
77 idx config.metabib_field%ROWTYPE;
78 xfrm config.xml_transform%ROWTYPE;
89 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
91 authority_link BIGINT;
92 output_row metabib.field_entry_template%ROWTYPE;
95 -- Start out with no field-use bools set
96 output_row.browse_field = FALSE;
97 output_row.facet_field = FALSE;
98 output_row.display_field = FALSE;
99 output_row.search_field = FALSE;
102 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
104 -- Loop over the indexing entries
105 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
107 joiner := COALESCE(idx.joiner, default_joiner);
109 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
111 -- See if we can skip the XSLT ... it's expensive
112 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
113 -- Can't skip the transform
114 IF xfrm.xslt <> '---' THEN
115 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
117 transformed_xml := bib.marc;
120 prev_xfrm := xfrm.name;
123 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
126 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
127 CONTINUE WHEN xml_node !~ E'^\\s*<';
129 -- XXX much of this should be moved into oils_xpath_string...
130 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
131 oils_xpath( '//text()',
133 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
134 REGEXP_REPLACE( -- This escapes embeded <s
136 $re$(>[^<]+)(<)([^>]+<)$re$,
152 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
154 IF raw_text IS NOT NULL THEN
155 raw_text := raw_text || joiner;
158 raw_text := COALESCE(raw_text,'') || curr_text;
160 -- autosuggest/metabib.browse_entry
161 IF idx.browse_field THEN
163 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
164 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
166 browse_text := curr_text;
169 IF idx.browse_sort_xpath IS NOT NULL AND
170 idx.browse_sort_xpath <> '' THEN
172 sort_value := oils_xpath_string(
173 idx.browse_sort_xpath, xml_node, joiner,
174 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
177 sort_value := browse_text;
180 output_row.field_class = idx.field_class;
181 output_row.field = idx.id;
182 output_row.source = rid;
183 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
184 output_row.sort_value :=
185 public.naco_normalize(sort_value);
187 output_row.authority := NULL;
189 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
190 authority_text := oils_xpath_string(
191 idx.authority_xpath, xml_node, joiner,
193 ARRAY[xfrm.prefix, xfrm.namespace_uri],
194 ARRAY['xlink','http://www.w3.org/1999/xlink']
198 IF authority_text ~ '^\d+$' THEN
199 authority_link := authority_text::BIGINT;
200 PERFORM * FROM authority.record_entry WHERE id = authority_link;
202 output_row.authority := authority_link;
208 output_row.browse_field = TRUE;
209 -- Returning browse rows with search_field = true for search+browse
210 -- configs allows us to retain granularity of being able to search
211 -- browse fields with "starts with" type operators (for example, for
212 -- titles of songs in music albums)
213 IF idx.search_field THEN
214 output_row.search_field = TRUE;
216 RETURN NEXT output_row;
217 output_row.browse_field = FALSE;
218 output_row.search_field = FALSE;
219 output_row.sort_value := NULL;
222 -- insert raw node text for faceting
223 IF idx.facet_field THEN
225 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
226 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
228 facet_text := curr_text;
231 output_row.field_class = idx.field_class;
232 output_row.field = -1 * idx.id;
233 output_row.source = rid;
234 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
236 output_row.facet_field = TRUE;
237 RETURN NEXT output_row;
238 output_row.facet_field = FALSE;
243 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
245 -- insert combined node text for searching
246 IF idx.search_field THEN
247 output_row.field_class = idx.field_class;
248 output_row.field = idx.id;
249 output_row.source = rid;
250 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
252 output_row.search_field = TRUE;
253 RETURN NEXT output_row;
254 output_row.search_field = FALSE;
257 IF idx.display_field THEN
258 output_row.field_class = idx.field_class;
259 output_row.field = idx.id;
260 output_row.source = rid;
261 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
263 output_row.display_field = TRUE;
264 RETURN NEXT output_row;
265 output_row.display_field = FALSE;
272 $func$ LANGUAGE PLPGSQL;
274 DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL);
276 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
277 bib_id BIGINT, skip_facet BOOL DEFAULT FALSE,
278 skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE,
279 skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
282 ind_data metabib.field_entry_template%ROWTYPE;
283 mbe_row metabib.browse_entry%ROWTYPE;
292 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
293 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
294 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
295 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
297 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
299 IF NOT b_skip_search THEN
300 FOR fclass IN SELECT * FROM config.metabib_class LOOP
301 -- RAISE NOTICE 'Emptying out %', fclass.name;
302 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
305 IF NOT b_skip_facet THEN
306 DELETE FROM metabib.facet_entry WHERE source = bib_id;
308 IF NOT b_skip_display THEN
309 DELETE FROM metabib.display_entry WHERE source = bib_id;
311 IF NOT b_skip_browse THEN
312 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
316 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
317 IF ind_data.field < 0 THEN
318 ind_data.field = -1 * ind_data.field;
321 IF ind_data.facet_field AND NOT b_skip_facet THEN
322 INSERT INTO metabib.facet_entry (field, source, value)
323 VALUES (ind_data.field, ind_data.source, ind_data.value);
326 IF ind_data.display_field AND NOT b_skip_display THEN
327 INSERT INTO metabib.display_entry (field, source, value)
328 VALUES (ind_data.field, ind_data.source, ind_data.value);
332 IF ind_data.browse_field AND NOT b_skip_browse THEN
333 -- A caveat about this SELECT: this should take care of replacing
334 -- old mbe rows when data changes, but not if normalization (by
335 -- which I mean specifically the output of
336 -- evergreen.oils_tsearch2()) changes. It may or may not be
337 -- expensive to add a comparison of index_vector to index_vector
338 -- to the WHERE clause below.
340 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
341 SELECT INTO mbe_row * FROM metabib.browse_entry
342 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
345 mbe_id := mbe_row.id;
347 INSERT INTO metabib.browse_entry
348 ( value, sort_value ) VALUES
349 ( value_prepped, ind_data.sort_value );
351 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
354 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
355 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
358 IF ind_data.search_field AND NOT b_skip_search THEN
359 -- Avoid inserting duplicate rows
360 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
361 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
362 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
363 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
364 IF mbe_id IS NULL THEN
366 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
368 quote_literal(ind_data.field) || $$, $$ ||
369 quote_literal(ind_data.source) || $$, $$ ||
370 quote_literal(ind_data.value) ||
377 IF NOT b_skip_search THEN
378 PERFORM metabib.update_combined_index_vectors(bib_id);
383 $func$ LANGUAGE PLPGSQL;
386 -- DATA -------------------
388 -- "General Keywords" and "All Subjects"
390 UPDATE config.metabib_field SET display_field = FALSE WHERE id IN (15, 16);
392 INSERT INTO config.internal_flag (name, enabled)
393 VALUES ('ingest.skip_display_indexing', FALSE);
395 -- TODO: targeted ingest?
398 --UPDATE config.internal_flag SET enabled = TRUE
399 -- WHERE name = 'ingest.reingest.force_on_same_marc';
400 --UPDATE biblio.record_entry SET marc = marc;
401 --UPDATE config.internal_flag SET enabled = FALSE
402 -- WHERE name = 'ingest.reingest.force_on_same_marc';