1 -- Evergreen DB patch 0556.unnest_biblio_extract_metabib_field_entry.sql
3 -- Replace usage of custom explode_array() function with native unnest()
7 -- check whether patch can be applied
8 SELECT evergreen.upgrade_deps_block_check('0556', :eg_version);
10 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
12 bib biblio.record_entry%ROWTYPE;
13 idx config.metabib_field%ROWTYPE;
14 xfrm config.xml_transform%ROWTYPE;
22 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
23 output_row metabib.field_entry_template%ROWTYPE;
27 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
29 -- Loop over the indexing entries
30 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
32 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
34 -- See if we can skip the XSLT ... it's expensive
35 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
36 -- Can't skip the transform
37 IF xfrm.xslt <> '---' THEN
38 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
40 transformed_xml := bib.marc;
43 prev_xfrm := xfrm.name;
46 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
49 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
50 CONTINUE WHEN xml_node !~ E'^\\s*<';
52 curr_text := ARRAY_TO_STRING(
53 oils_xpath( '//text()',
54 REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
55 REGEXP_REPLACE( -- This escapes embeded <s
57 $re$(>[^<]+)(<)([^>]+<)$re$,
69 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
71 IF raw_text IS NOT NULL THEN
72 raw_text := raw_text || joiner;
75 raw_text := COALESCE(raw_text,'') || curr_text;
77 -- insert raw node text for faceting
78 IF idx.facet_field THEN
80 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
81 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
83 facet_text := curr_text;
86 output_row.field_class = idx.field_class;
87 output_row.field = -1 * idx.id;
88 output_row.source = rid;
89 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
91 RETURN NEXT output_row;
96 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
98 -- insert combined node text for searching
99 IF idx.search_field THEN
100 output_row.field_class = idx.field_class;
101 output_row.field = idx.id;
102 output_row.source = rid;
103 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
105 RETURN NEXT output_row;
111 $func$ LANGUAGE PLPGSQL;