1 -- Evergreen DB patch XXXX.schema.allow_spaces_as_ff_attr_values.sql
3 -- LP#1414112 - don't over-normalize record attribute values to
4 -- exclude all values that contain only blanks
8 -- check whether patch can be applied
9 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
11 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
18 xfrm config.xml_transform%ROWTYPE;
19 attr_vector INT[] := '{}'::INT[];
20 attr_vector_tmp INT[];
21 attr_list TEXT[] := pattr_list;
23 norm_attr_value TEXT[];
25 attr_def config.record_attr_definition%ROWTYPE;
26 ccvm_row config.coded_value_map%ROWTYPE;
29 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
30 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
34 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
37 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
39 attr_value := '{}'::TEXT[];
40 norm_attr_value := '{}'::TEXT[];
41 attr_vector_tmp := '{}'::INT[];
43 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
45 -- tag+sf attrs only support SVF
46 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
47 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
48 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
50 AND tag LIKE attr_def.tag
52 WHEN attr_def.sf_list IS NOT NULL
53 THEN POSITION(subfield IN attr_def.sf_list) > 0
60 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
61 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
63 IF NOT attr_def.multi THEN
64 attr_value := ARRAY[attr_value[1]];
67 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
69 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
71 -- See if we can skip the XSLT ... it's expensive
72 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
73 -- Can't skip the transform
74 IF xfrm.xslt <> '---' THEN
75 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
77 transformed_xml := rmarc;
80 prev_xfrm := xfrm.name;
83 IF xfrm.name IS NULL THEN
84 -- just grab the marcxml (empty) transform
85 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
86 prev_xfrm := xfrm.name;
89 FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
90 tmp_val := oils_xpath_string(
93 COALESCE(attr_def.joiner,' '),
94 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
96 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
97 attr_value := attr_value || tmp_val;
98 EXIT WHEN NOT attr_def.multi;
102 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
103 SELECT ARRAY_AGG(m.value) INTO attr_value
104 FROM vandelay.marc21_physical_characteristics(rmarc) v
105 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
106 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
107 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
109 IF NOT attr_def.multi THEN
110 attr_value := ARRAY[attr_value[1]];
115 -- apply index normalizers to attr_value
116 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
118 SELECT n.func AS func,
119 n.param_count AS param_count,
121 FROM config.index_normalizer n
122 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
123 WHERE attr = attr_def.name
125 EXECUTE 'SELECT ' || normalizer.func || '(' ||
126 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
128 WHEN normalizer.param_count > 0
129 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
135 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
136 -- note that a string that contains only blanks
137 -- is a valid value for some attributes
138 norm_attr_value := norm_attr_value || tmp_val;
142 IF attr_def.filter THEN
143 -- Create unknown uncontrolled values and find the IDs of the values
144 IF ccvm_row.id IS NULL THEN
145 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
146 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
147 BEGIN -- use subtransaction to isolate unique constraint violations
148 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
149 EXCEPTION WHEN unique_violation THEN END;
153 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
155 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
158 -- Add the new value to the vector
159 attr_vector := attr_vector || attr_vector_tmp;
162 IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
163 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
164 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
169 /* We may need to rewrite the vlist to contain
170 the intersection of new values for requested
171 attrs and old values for ignored attrs. To
172 do this, we take the old attr vlist and
173 subtract any values that are valid for the
174 requested attrs, and then add back the new
175 set of attr values. */
177 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
178 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
179 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
180 attr_vector := attr_vector || attr_vector_tmp;
183 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
184 -- attributes can depend on earlier ones.
185 PERFORM metabib.compile_composite_attr_cache_init();
186 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
188 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
190 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
191 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
193 IF attr_def.filter THEN
194 IF attr_vector @@ tmp_val::query_int THEN
195 attr_vector = attr_vector + intset(ccvm_row.id);
196 EXIT WHEN NOT attr_def.multi;
200 IF attr_def.sorter THEN
201 IF attr_vector @@ tmp_val THEN
202 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
203 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
211 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
212 IF rdeleted THEN -- initial insert OR revivication
213 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
214 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
216 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
222 $func$ LANGUAGE PLPGSQL;