3 ALTER TABLE config.metabib_field ALTER COLUMN xpath DROP NOT NULL;
5 CREATE TABLE config.metabib_field_virtual_map (
7 real INT NOT NULL REFERENCES config.metabib_field (id),
8 virtual INT NOT NULL REFERENCES config.metabib_field (id),
9 weight INT NOT NULL DEFAULT 1
11 COMMENT ON TABLE config.metabib_field_virtual_map IS $$
12 Maps between real (physically extracted) index definitions
13 and virtual (target sync, no required extraction of its own)
16 The virtual side may not extract any data of its own, but
17 will collect data from all of the real fields. This reduces
18 extraction (ingest) overhead by eliminating duplcated extraction,
19 and allows for searching across novel combinations of fields, such
20 as names used as either subjects or authors. By preserving this
21 mapping rather than defining duplicate extractions, information
22 about the originating, "real" index definitions can be used
23 in interesting ways, such as highlighting in search results.
26 CREATE OR REPLACE VIEW metabib.combined_all_field_entry AS
27 SELECT * FROM metabib.combined_title_field_entry
29 SELECT * FROM metabib.combined_author_field_entry
31 SELECT * FROM metabib.combined_subject_field_entry
33 SELECT * FROM metabib.combined_keyword_field_entry
35 SELECT * FROM metabib.combined_identifier_field_entry
37 SELECT * FROM metabib.combined_series_field_entry;
40 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
45 ) RETURNS SETOF metabib.field_entry_template AS $func$
47 bib biblio.record_entry%ROWTYPE;
48 idx config.metabib_field%ROWTYPE;
49 xfrm config.xml_transform%ROWTYPE;
60 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
62 authority_link BIGINT;
63 output_row metabib.field_entry_template%ROWTYPE;
67 -- Start out with no field-use bools set
68 output_row.browse_field = FALSE;
69 output_row.facet_field = FALSE;
70 output_row.display_field = FALSE;
71 output_row.search_field = FALSE;
74 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
76 -- Loop over the indexing entries
77 FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
78 CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
81 IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
82 IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
83 IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
84 IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
85 CONTINUE WHEN process_idx = FALSE; -- disabled for all types
87 joiner := COALESCE(idx.joiner, default_joiner);
89 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
91 -- See if we can skip the XSLT ... it's expensive
92 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
93 -- Can't skip the transform
94 IF xfrm.xslt <> '---' THEN
95 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
97 transformed_xml := bib.marc;
100 prev_xfrm := xfrm.name;
103 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
106 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
107 CONTINUE WHEN xml_node !~ E'^\\s*<';
109 -- XXX much of this should be moved into oils_xpath_string...
110 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
111 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
112 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
113 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
117 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
119 IF raw_text IS NOT NULL THEN
120 raw_text := raw_text || joiner;
123 raw_text := COALESCE(raw_text,'') || curr_text;
125 -- autosuggest/metabib.browse_entry
126 IF idx.browse_field THEN
128 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
129 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
131 browse_text := curr_text;
134 IF idx.browse_sort_xpath IS NOT NULL AND
135 idx.browse_sort_xpath <> '' THEN
137 sort_value := oils_xpath_string(
138 idx.browse_sort_xpath, xml_node, joiner,
139 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
142 sort_value := browse_text;
145 output_row.field_class = idx.field_class;
146 output_row.field = idx.id;
147 output_row.source = rid;
148 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
149 output_row.sort_value :=
150 public.naco_normalize(sort_value);
152 output_row.authority := NULL;
154 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
155 authority_text := oils_xpath_string(
156 idx.authority_xpath, xml_node, joiner,
158 ARRAY[xfrm.prefix, xfrm.namespace_uri],
159 ARRAY['xlink','http://www.w3.org/1999/xlink']
163 IF authority_text ~ '^\d+$' THEN
164 authority_link := authority_text::BIGINT;
165 PERFORM * FROM authority.record_entry WHERE id = authority_link;
167 output_row.authority := authority_link;
173 output_row.browse_field = TRUE;
174 -- Returning browse rows with search_field = true for search+browse
175 -- configs allows us to retain granularity of being able to search
176 -- browse fields with "starts with" type operators (for example, for
177 -- titles of songs in music albums)
178 IF idx.search_field THEN
179 output_row.search_field = TRUE;
181 RETURN NEXT output_row;
182 output_row.browse_field = FALSE;
183 output_row.search_field = FALSE;
184 output_row.sort_value := NULL;
187 -- insert raw node text for faceting
188 IF idx.facet_field THEN
190 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
191 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
193 facet_text := curr_text;
196 output_row.field_class = idx.field_class;
197 output_row.field = -1 * idx.id;
198 output_row.source = rid;
199 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
201 output_row.facet_field = TRUE;
202 RETURN NEXT output_row;
203 output_row.facet_field = FALSE;
206 -- insert raw node text for display
207 IF idx.display_field THEN
209 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
210 display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
212 display_text := curr_text;
215 output_row.field_class = idx.field_class;
216 output_row.field = -1 * idx.id;
217 output_row.source = rid;
218 output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
220 output_row.display_field = TRUE;
221 RETURN NEXT output_row;
222 output_row.display_field = FALSE;
227 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
229 -- insert combined node text for searching
230 IF idx.search_field THEN
231 output_row.field_class = idx.field_class;
232 output_row.field = idx.id;
233 output_row.source = rid;
234 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
236 output_row.search_field = TRUE;
237 RETURN NEXT output_row;
238 output_row.search_field = FALSE;
244 $func$ LANGUAGE PLPGSQL;
246 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
253 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
254 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
255 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
256 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
257 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
258 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
259 FROM metabib.keyword_field_entry WHERE source = bib_id;
261 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
262 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
263 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
264 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
265 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
266 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
267 FROM metabib.title_field_entry WHERE source = bib_id;
269 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
270 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
271 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
272 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
273 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
274 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
275 FROM metabib.author_field_entry WHERE source = bib_id;
277 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
278 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
279 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
280 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
281 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
282 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
283 FROM metabib.subject_field_entry WHERE source = bib_id;
285 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
286 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
287 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
288 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
289 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
290 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
291 FROM metabib.series_field_entry WHERE source = bib_id;
293 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
294 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
295 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
296 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
297 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
298 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
299 FROM metabib.identifier_field_entry WHERE source = bib_id;
301 -- For each virtual def, gather the data from the combined real field
302 -- entries and append it to the virtual combined entry.
303 FOR vfield, rfields IN SELECT virtual, ARRAY_AGG(real) FROM config.metabib_field_virtual_map GROUP BY virtual LOOP
304 SELECT field_class INTO vclass
305 FROM config.metabib_field
308 SELECT string_agg(index_vector::TEXT,' ')::tsvector INTO rdata
309 FROM metabib.combined_all_field_entry
310 WHERE record = bib_id
311 AND metabib_field = ANY (rfields);
313 BEGIN -- I cannot wait for INSERT ON CONFLICT ... 9.5, though
315 INSERT INTO metabib.combined_$$ || vclass || $$_field_entry
316 (record, metabib_field, index_vector) VALUES ($1, $2, $3)
317 $$ USING bib_id, vfield, rdata;
318 EXCEPTION WHEN unique_violation THEN
320 UPDATE metabib.combined_$$ || vclass || $$_field_entry
321 SET index_vector = index_vector || $3
323 AND metabib_field = $2
324 $$ USING bib_id, vfield, rdata;
326 -- ignore and move on
330 $func$ LANGUAGE PLPGSQL;
332 CREATE OR REPLACE VIEW search.best_tsconfig AS
334 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
335 FROM config.metabib_field m
336 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
337 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND c.index_weight = 'C');
339 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
341 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
344 field_list INT[] DEFAULT '{}'::INT[],
345 css_class TEXT DEFAULT 'oils_SH',
346 hl_all BOOL DEFAULT TRUE,
347 minwords INT DEFAULT 5,
348 maxwords INT DEFAULT 25,
349 shortwords INT DEFAULT 0,
350 maxfrags INT DEFAULT 0,
351 delimiter TEXT DEFAULT ' ... '
352 ) RETURNS SETOF search.highlight_result AS $f$
355 v_css_class TEXT := css_class;
356 v_delimiter TEXT := delimiter;
357 v_field_list INT[] := field_list;
360 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
361 v_delimiter := ' ... ';
365 opts := opts || 'MinWords=' || minwords;
366 opts := opts || ', MaxWords=' || maxwords;
367 opts := opts || ', ShortWords=' || shortwords;
368 opts := opts || ', MaxFragments=' || maxfrags;
369 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
371 opts := opts || 'HighlightAll=TRUE';
374 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
375 v_css_class := 'oils_SH';
378 opts := opts || $$, StopSel=</b>, StartSel="<b class='$$ || v_css_class; -- "
380 IF v_field_list = '{}'::INT[] THEN
381 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
390 ts_config::REGCONFIG,
391 evergreen.escape_for_html(de.value),
392 $$ || quote_literal(tsq) || $$,
393 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
395 FROM metabib.display_entry de
396 JOIN config.metabib_field mf ON (mf.id = de.field)
397 JOIN search.best_tsconfig t ON (t.id = de.field)
402 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
404 $f$ LANGUAGE PLPGSQL;
406 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
407 SELECT regexp_replace(
423 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
425 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
427 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
428 css_class TEXT DEFAULT 'oils_SH',
429 hl_all BOOL DEFAULT TRUE,
430 minwords INT DEFAULT 5,
431 maxwords INT DEFAULT 25,
432 shortwords INT DEFAULT 0,
433 maxfrags INT DEFAULT 0,
434 delimiter TEXT DEFAULT ' ... '
435 ) RETURNS SETOF search.highlight_result AS $f$
444 IF (tsq_map ILIKE 'hstore%') THEN
445 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
447 tsq_hstore := tsq_map::HSTORE;
450 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore) LOOP
451 SELECT ARRAY_AGG(unnest::INT) INTO afields
452 FROM unnest(regexp_split_to_array(fields,','));
453 seen := seen || afields;
456 SELECT * FROM search.highlight_display_fields_impl(
457 rid, tsq, afields, css_class, hl_all,minwords,
458 maxwords, shortwords, maxfrags, delimiter
468 FROM metabib.display_entry
470 AND NOT (field = ANY (seen));
472 $f$ LANGUAGE PLPGSQL ROWS 10;
474 CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib(
477 bib_is_deleted boolean DEFAULT false,
478 retain_deleted boolean DEFAULT false
479 ) RETURNS bigint AS $function$
481 new_mapping BOOL := TRUE;
484 tmp_mr metabib.metarecord%ROWTYPE;
485 deleted_mrs BIGINT[];
488 -- We need to make sure we're not a deleted master record of an MR
489 IF bib_is_deleted THEN
490 IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained
491 DELETE FROM metabib.metarecord_source_map WHERE source = bib_id;
494 FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP
496 -- Now, are there any more sources on this MR?
497 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr;
499 IF source_count = 0 AND NOT retain_deleted THEN -- No other records
500 deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case...
501 DELETE FROM metabib.metarecord WHERE id = old_mr;
503 ELSE -- indeed there are. Update it with a null cache and recalcualated master record
504 UPDATE metabib.metarecord
506 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
511 ELSE -- insert or update
513 FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP
515 -- Find the first fingerprint-matching
516 IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN
518 new_mapping := FALSE;
520 ELSE -- Our fingerprint changed ... maybe remove the old MR
521 DELETE FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id AND source = bib_id; -- remove the old source mapping
522 SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id;
523 IF source_count = 0 THEN -- No other records
524 deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id);
525 DELETE FROM metabib.metarecord WHERE id = tmp_mr.id;
531 -- we found no suitable, preexisting MR based on old source maps
532 IF old_mr IS NULL THEN
533 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint?
535 IF old_mr IS NULL THEN -- nope, create one and grab its id
536 INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id );
537 SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp;
539 ELSE -- indeed there is. update it with a null cache and recalcualated master record
540 UPDATE metabib.metarecord
542 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
546 ELSE -- there was one we already attached to, update its mods cache and master_record
547 UPDATE metabib.metarecord
549 master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1)
554 INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping
559 IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN
560 UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved
566 $function$ LANGUAGE plpgsql;
568 CREATE OR REPLACE FUNCTION evergreen.marc_to (marc text, xfrm text) RETURNS TEXT AS $$
569 SELECT evergreen.xml_pretty_print(xslt_process($1,xslt)::XML)::TEXT FROM config.xml_transform WHERE name = $2;