3 SELECT evergreen.upgrade_deps_block_check('1101', :eg_version);
5 ALTER TABLE config.metabib_field ALTER COLUMN xpath DROP NOT NULL;
7 CREATE TABLE config.metabib_field_virtual_map (
9 real INT NOT NULL REFERENCES config.metabib_field (id),
10 virtual INT NOT NULL REFERENCES config.metabib_field (id),
11 weight INT NOT NULL DEFAULT 1
13 COMMENT ON TABLE config.metabib_field_virtual_map IS $$
14 Maps between real (physically extracted) index definitions
15 and virtual (target sync, no required extraction of its own)
18 The virtual side may not extract any data of its own, but
19 will collect data from all of the real fields. This reduces
20 extraction (ingest) overhead by eliminating duplcated extraction,
21 and allows for searching across novel combinations of fields, such
22 as names used as either subjects or authors. By preserving this
23 mapping rather than defining duplicate extractions, information
24 about the originating, "real" index definitions can be used
25 in interesting ways, such as highlighting in search results.
28 CREATE OR REPLACE VIEW metabib.combined_all_field_entry AS
29 SELECT * FROM metabib.combined_title_field_entry
31 SELECT * FROM metabib.combined_author_field_entry
33 SELECT * FROM metabib.combined_subject_field_entry
35 SELECT * FROM metabib.combined_keyword_field_entry
37 SELECT * FROM metabib.combined_identifier_field_entry
39 SELECT * FROM metabib.combined_series_field_entry;
42 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
47 ) RETURNS SETOF metabib.field_entry_template AS $func$
49 bib biblio.record_entry%ROWTYPE;
50 idx config.metabib_field%ROWTYPE;
51 xfrm config.xml_transform%ROWTYPE;
62 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
64 authority_link BIGINT;
65 output_row metabib.field_entry_template%ROWTYPE;
69 -- Start out with no field-use bools set
70 output_row.browse_field = FALSE;
71 output_row.facet_field = FALSE;
72 output_row.display_field = FALSE;
73 output_row.search_field = FALSE;
76 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
78 -- Loop over the indexing entries
79 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
80 CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
83 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
84 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
85 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
86 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
87 CONTINUE WHEN process_idx = FALSE; -- disabled for all types
89 joiner := COALESCE(idx.joiner, default_joiner);
91 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
93 -- See if we can skip the XSLT ... it's expensive
94 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
95 -- Can't skip the transform
96 IF xfrm.xslt <> '---' THEN
97 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
99 transformed_xml := bib.marc;
102 prev_xfrm := xfrm.name;
105 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
108 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
109 CONTINUE WHEN xml_node !~ E'^\\s*<';
111 -- XXX much of this should be moved into oils_xpath_string...
112 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
113 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
114 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
115 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
119 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
121 IF raw_text IS NOT NULL THEN
122 raw_text := raw_text || joiner;
125 raw_text := COALESCE(raw_text,'') || curr_text;
127 -- autosuggest/metabib.browse_entry
128 IF idx.browse_field THEN
130 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
131 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
133 browse_text := curr_text;
136 IF idx.browse_sort_xpath IS NOT NULL AND
137 idx.browse_sort_xpath <> '' THEN
139 sort_value := oils_xpath_string(
140 idx.browse_sort_xpath, xml_node, joiner,
141 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
144 sort_value := browse_text;
147 output_row.field_class = idx.field_class;
148 output_row.field = idx.id;
149 output_row.source = rid;
150 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
151 output_row.sort_value :=
152 public.naco_normalize(sort_value);
154 output_row.authority := NULL;
156 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
157 authority_text := oils_xpath_string(
158 idx.authority_xpath, xml_node, joiner,
160 ARRAY[xfrm.prefix, xfrm.namespace_uri],
161 ARRAY['xlink','http://www.w3.org/1999/xlink']
165 IF authority_text ~ '^\d+$' THEN
166 authority_link := authority_text::BIGINT;
167 PERFORM * FROM authority.record_entry WHERE id = authority_link;
169 output_row.authority := authority_link;
175 output_row.browse_field = TRUE;
176 -- Returning browse rows with search_field = true for search+browse
177 -- configs allows us to retain granularity of being able to search
178 -- browse fields with "starts with" type operators (for example, for
179 -- titles of songs in music albums)
180 IF idx.search_field THEN
181 output_row.search_field = TRUE;
183 RETURN NEXT output_row;
184 output_row.browse_field = FALSE;
185 output_row.search_field = FALSE;
186 output_row.sort_value := NULL;
189 -- insert raw node text for faceting
190 IF idx.facet_field THEN
192 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
193 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
195 facet_text := curr_text;
198 output_row.field_class = idx.field_class;
199 output_row.field = -1 * idx.id;
200 output_row.source = rid;
201 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
203 output_row.facet_field = TRUE;
204 RETURN NEXT output_row;
205 output_row.facet_field = FALSE;
208 -- insert raw node text for display
209 IF idx.display_field THEN
211 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
212 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
214 display_text := curr_text;
217 output_row.field_class = idx.field_class;
218 output_row.field = -1 * idx.id;
219 output_row.source = rid;
220 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
222 output_row.display_field = TRUE;
223 RETURN NEXT output_row;
224 output_row.display_field = FALSE;
229 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
231 -- insert combined node text for searching
232 IF idx.search_field THEN
233 output_row.field_class = idx.field_class;
234 output_row.field = idx.id;
235 output_row.source = rid;
236 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
238 output_row.search_field = TRUE;
239 RETURN NEXT output_row;
240 output_row.search_field = FALSE;
246 $func$ LANGUAGE PLPGSQL;
248 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
255 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
256 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
257 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
258 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
259 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
260 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
261 FROM metabib.keyword_field_entry WHERE source = bib_id;
263 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
264 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
265 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
266 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
267 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
268 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
269 FROM metabib.title_field_entry WHERE source = bib_id;
271 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
272 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
273 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
274 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
275 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
276 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
277 FROM metabib.author_field_entry WHERE source = bib_id;
279 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
280 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
281 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
282 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
283 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
284 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
285 FROM metabib.subject_field_entry WHERE source = bib_id;
287 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
288 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
289 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
290 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
291 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
292 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
293 FROM metabib.series_field_entry WHERE source = bib_id;
295 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
296 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
297 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
298 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
299 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
300 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
301 FROM metabib.identifier_field_entry WHERE source = bib_id;
303 -- For each virtual def, gather the data from the combined real field
304 -- entries and append it to the virtual combined entry.
305 FOR vfield, rfields IN SELECT virtual, ARRAY_AGG(real) FROM config.metabib_field_virtual_map GROUP BY virtual LOOP
306 SELECT field_class INTO vclass
307 FROM config.metabib_field
310 SELECT string_agg(index_vector::TEXT,' ')::tsvector INTO rdata
311 FROM metabib.combined_all_field_entry
312 WHERE record = bib_id
313 AND metabib_field = ANY (rfields);
315 BEGIN -- I cannot wait for INSERT ON CONFLICT ... 9.5, though
317 INSERT INTO metabib.combined_$$ || vclass || $$_field_entry
318 (record, metabib_field, index_vector) VALUES ($1, $2, $3)
319 $$ USING bib_id, vfield, rdata;
320 EXCEPTION WHEN unique_violation THEN
322 UPDATE metabib.combined_$$ || vclass || $$_field_entry
323 SET index_vector = index_vector || $3
325 AND metabib_field = $2
326 $$ USING bib_id, vfield, rdata;
328 -- ignore and move on
332 $func$ LANGUAGE PLPGSQL;
334 CREATE OR REPLACE VIEW search.best_tsconfig AS
336 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
337 FROM config.metabib_field m
338 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
339 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND f.index_weight = 'C');
341 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
343 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
346 field_list INT[] DEFAULT '{}'::INT[],
347 css_class TEXT DEFAULT 'oils_SH',
348 hl_all BOOL DEFAULT TRUE,
349 minwords INT DEFAULT 5,
350 maxwords INT DEFAULT 25,
351 shortwords INT DEFAULT 0,
352 maxfrags INT DEFAULT 0,
353 delimiter TEXT DEFAULT ' ... '
354 ) RETURNS SETOF search.highlight_result AS $f$
357 v_css_class TEXT := css_class;
358 v_delimiter TEXT := delimiter;
359 v_field_list INT[] := field_list;
362 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
363 v_delimiter := ' ... ';
367 opts := opts || 'MinWords=' || minwords;
368 opts := opts || ', MaxWords=' || maxwords;
369 opts := opts || ', ShortWords=' || shortwords;
370 opts := opts || ', MaxFragments=' || maxfrags;
371 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
373 opts := opts || 'HighlightAll=TRUE';
376 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
377 v_css_class := 'oils_SH';
380 opts := opts || $$, StopSel=</b>, StartSel="<b class='$$ || v_css_class; -- "
382 IF v_field_list = '{}'::INT[] THEN
383 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
392 ts_config::REGCONFIG,
393 evergreen.escape_for_html(de.value),
394 $$ || quote_literal(tsq) || $$,
395 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
397 FROM metabib.display_entry de
398 JOIN config.metabib_field mf ON (mf.id = de.field)
399 JOIN search.best_tsconfig t ON (t.id = de.field)
404 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
406 $f$ LANGUAGE PLPGSQL;
408 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
409 SELECT regexp_replace(
425 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
427 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
429 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
430 css_class TEXT DEFAULT 'oils_SH',
431 hl_all BOOL DEFAULT TRUE,
432 minwords INT DEFAULT 5,
433 maxwords INT DEFAULT 25,
434 shortwords INT DEFAULT 0,
435 maxfrags INT DEFAULT 0,
436 delimiter TEXT DEFAULT ' ... '
437 ) RETURNS SETOF search.highlight_result AS $f$
446 IF (tsq_map ILIKE 'hstore%') THEN
447 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
449 tsq_hstore := tsq_map::HSTORE;
452 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore::HSTORE) LOOP
453 SELECT ARRAY_AGG(unnest::INT) INTO afields
454 FROM unnest(regexp_split_to_array(fields,','));
455 seen := seen || afields;
458 SELECT * FROM search.highlight_display_fields_impl(
459 rid, tsq, afields, css_class, hl_all,minwords,
460 maxwords, shortwords, maxfrags, delimiter
470 FROM metabib.display_entry
472 AND NOT (field = ANY (seen));
474 $f$ LANGUAGE PLPGSQL ROWS 10;
476 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib(
479 bib_is_deleted boolean DEFAULT false,
480 retain_deleted boolean DEFAULT false
481 ) RETURNS bigint AS $function$
483 new_mapping BOOL := TRUE;
486 tmp_mr metabib.metarecord%ROWTYPE;
487 deleted_mrs BIGINT[];
490 -- We need to make sure we're not a deleted master record of an MR
491 IF bib_is_deleted THEN
492 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
493 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
496 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
498 -- Now, are there any more sources on this MR?
499 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
501 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
502 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
503 DELETE FROM metabib.metarecord WHERE id = old_mr;
505 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
506 UPDATE metabib.metarecord
508 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
513 ELSE -- insert or update
515 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
517 -- Find the first fingerprint-matching
518 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
520 new_mapping := FALSE;
522 ELSE -- Our fingerprint changed ... maybe remove the old MR
523 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
524 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
525 IF source_count = 0 THEN -- No other records
526 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
527 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
533 -- we found no suitable, preexisting MR based on old source maps
534 IF old_mr IS NULL THEN
535 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
537 IF old_mr IS NULL THEN -- nope, create one and grab its id
538 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
539 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
541 ELSE -- indeed there is. update it with a null cache and recalcualated master record
542 UPDATE metabib.metarecord
544 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
548 ELSE -- there was one we already attached to, update its mods cache and master_record
549 UPDATE metabib.metarecord
551 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
556 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
561 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
562 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
568 $function$ LANGUAGE plpgsql;
570 CREATE OR REPLACE FUNCTION evergreen.marc_to (marc text, xfrm text) RETURNS TEXT AS $$
571 SELECT evergreen.xml_pretty_print(xslt_process($1,xslt)::XML)::TEXT FROM config.xml_transform WHERE name = $2;