3 SELECT evergreen.upgrade_deps_block_check('1298', :eg_version);
5 ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE browse_nocase BOOL CASCADE;
7 ALTER TABLE config.metabib_field ADD COLUMN browse_nocase BOOL NOT NULL DEFAULT FALSE;
9 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
14 ) RETURNS SETOF metabib.field_entry_template AS $func$
16 bib biblio.record_entry%ROWTYPE;
17 idx config.metabib_field%ROWTYPE;
18 xfrm config.xml_transform%ROWTYPE;
29 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
31 authority_link BIGINT;
32 output_row metabib.field_entry_template%ROWTYPE;
36 -- Start out with no field-use bools set
37 output_row.browse_nocase = FALSE;
38 output_row.browse_field = FALSE;
39 output_row.facet_field = FALSE;
40 output_row.display_field = FALSE;
41 output_row.search_field = FALSE;
44 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
46 -- Loop over the indexing entries
47 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
48 CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
51 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
52 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
53 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
54 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
55 CONTINUE WHEN process_idx = FALSE; -- disabled for all types
57 joiner := COALESCE(idx.joiner, default_joiner);
59 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
61 -- See if we can skip the XSLT ... it's expensive
62 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
63 -- Can't skip the transform
64 IF xfrm.xslt <> '---' THEN
65 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
67 transformed_xml := bib.marc;
70 prev_xfrm := xfrm.name;
73 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
76 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
77 CONTINUE WHEN xml_node !~ E'^\\s*<';
79 -- XXX much of this should be moved into oils_xpath_string...
80 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
81 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
82 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
83 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
87 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
89 IF raw_text IS NOT NULL THEN
90 raw_text := raw_text || joiner;
93 raw_text := COALESCE(raw_text,'') || curr_text;
95 -- autosuggest/metabib.browse_entry
96 IF idx.browse_field THEN
97 output_row.browse_nocase = idx.browse_nocase;
99 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
100 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
102 browse_text := curr_text;
105 IF idx.browse_sort_xpath IS NOT NULL AND
106 idx.browse_sort_xpath <> '' THEN
108 sort_value := oils_xpath_string(
109 idx.browse_sort_xpath, xml_node, joiner,
110 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
113 sort_value := browse_text;
116 output_row.field_class = idx.field_class;
117 output_row.field = idx.id;
118 output_row.source = rid;
119 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
120 output_row.sort_value :=
121 public.naco_normalize(sort_value);
123 output_row.authority := NULL;
125 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
126 authority_text := oils_xpath_string(
127 idx.authority_xpath, xml_node, joiner,
129 ARRAY[xfrm.prefix, xfrm.namespace_uri],
130 ARRAY['xlink','http://www.w3.org/1999/xlink']
134 IF authority_text ~ '^\d+$' THEN
135 authority_link := authority_text::BIGINT;
136 PERFORM * FROM authority.record_entry WHERE id = authority_link;
138 output_row.authority := authority_link;
144 output_row.browse_field = TRUE;
145 -- Returning browse rows with search_field = true for search+browse
146 -- configs allows us to retain granularity of being able to search
147 -- browse fields with "starts with" type operators (for example, for
148 -- titles of songs in music albums)
149 IF idx.search_field THEN
150 output_row.search_field = TRUE;
152 RETURN NEXT output_row;
153 output_row.browse_nocase = FALSE;
154 output_row.browse_field = FALSE;
155 output_row.search_field = FALSE;
156 output_row.sort_value := NULL;
159 -- insert raw node text for faceting
160 IF idx.facet_field THEN
162 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
163 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
165 facet_text := curr_text;
168 output_row.field_class = idx.field_class;
169 output_row.field = -1 * idx.id;
170 output_row.source = rid;
171 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
173 output_row.facet_field = TRUE;
174 RETURN NEXT output_row;
175 output_row.facet_field = FALSE;
178 -- insert raw node text for display
179 IF idx.display_field THEN
181 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
182 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
184 display_text := curr_text;
187 output_row.field_class = idx.field_class;
188 output_row.field = -1 * idx.id;
189 output_row.source = rid;
190 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
192 output_row.display_field = TRUE;
193 RETURN NEXT output_row;
194 output_row.display_field = FALSE;
199 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
201 -- insert combined node text for searching
202 IF idx.search_field THEN
203 output_row.field_class = idx.field_class;
204 output_row.field = idx.id;
205 output_row.source = rid;
206 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
208 output_row.search_field = TRUE;
209 RETURN NEXT output_row;
210 output_row.search_field = FALSE;
216 $func$ LANGUAGE PLPGSQL;
218 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
220 skip_facet BOOL DEFAULT FALSE,
221 skip_display BOOL DEFAULT FALSE,
222 skip_browse BOOL DEFAULT FALSE,
223 skip_search BOOL DEFAULT FALSE,
224 only_fields INT[] DEFAULT '{}'::INT[]
225 ) RETURNS VOID AS $func$
228 ind_data metabib.field_entry_template%ROWTYPE;
229 mbe_row metabib.browse_entry%ROWTYPE;
236 field_list INT[] := only_fields;
237 field_types TEXT[] := '{}'::TEXT[];
240 IF field_list = '{}'::INT[] THEN
241 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
244 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
245 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
246 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
247 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
249 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
250 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
251 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
252 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
254 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
256 IF NOT b_skip_search THEN
257 FOR fclass IN SELECT * FROM config.metabib_class LOOP
258 -- RAISE NOTICE 'Emptying out %', fclass.name;
259 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
262 IF NOT b_skip_facet THEN
263 DELETE FROM metabib.facet_entry WHERE source = bib_id;
265 IF NOT b_skip_display THEN
266 DELETE FROM metabib.display_entry WHERE source = bib_id;
268 IF NOT b_skip_browse THEN
269 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
273 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
275 -- don't store what has been normalized away
276 CONTINUE WHEN ind_data.value IS NULL;
278 IF ind_data.field < 0 THEN
279 ind_data.field = -1 * ind_data.field;
282 IF ind_data.facet_field AND NOT b_skip_facet THEN
283 INSERT INTO metabib.facet_entry (field, source, value)
284 VALUES (ind_data.field, ind_data.source, ind_data.value);
287 IF ind_data.display_field AND NOT b_skip_display THEN
288 INSERT INTO metabib.display_entry (field, source, value)
289 VALUES (ind_data.field, ind_data.source, ind_data.value);
293 IF ind_data.browse_field AND NOT b_skip_browse THEN
294 -- A caveat about this SELECT: this should take care of replacing
295 -- old mbe rows when data changes, but not if normalization (by
296 -- which I mean specifically the output of
297 -- evergreen.oils_tsearch2()) changes. It may or may not be
298 -- expensive to add a comparison of index_vector to index_vector
299 -- to the WHERE clause below.
301 CONTINUE WHEN ind_data.sort_value IS NULL;
303 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
304 IF ind_data.browse_nocase THEN
305 SELECT INTO mbe_row * FROM metabib.browse_entry
306 WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
307 ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
309 SELECT INTO mbe_row * FROM metabib.browse_entry
310 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
314 mbe_id := mbe_row.id;
316 INSERT INTO metabib.browse_entry
317 ( value, sort_value ) VALUES
318 ( value_prepped, ind_data.sort_value );
320 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
323 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
324 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
327 IF ind_data.search_field AND NOT b_skip_search THEN
328 -- Avoid inserting duplicate rows
329 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
330 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
331 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
332 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
333 IF mbe_id IS NULL THEN
335 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
337 quote_literal(ind_data.field) || $$, $$ ||
338 quote_literal(ind_data.source) || $$, $$ ||
339 quote_literal(ind_data.value) ||
346 IF NOT b_skip_search THEN
347 PERFORM metabib.update_combined_index_vectors(bib_id);
352 $func$ LANGUAGE PLPGSQL;