1 -- Evergreen DB patch 0904.schema.allow_spaces_as_ff_attr_values.sql
3 -- LP#1414112 - don't over-normalize record attribute values to
4 -- exclude all values that contain only blanks
8 -- check whether patch can be applied
9 SELECT evergreen.upgrade_deps_block_check('0904', :eg_version);
11 -- The code for "uncoded" audience is a space, but upgraded DBs may have a blank.
12 UPDATE config.coded_value_map
14 WHERE ctype = 'audience'
18 FROM config.coded_value_map
19 WHERE ctype = 'audience'
24 FROM config.coded_value_map
25 WHERE ctype = 'audience'
27 ) UPDATE metabib.record_attr_vector_list
28 SET vlist = vlist + intset(ccvm.id)
32 FROM metabib.real_full_rec
34 AND substring(value,23,1) = ' '
37 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
44 xfrm config.xml_transform%ROWTYPE;
45 attr_vector INT[] := '{}'::INT[];
46 attr_vector_tmp INT[];
47 attr_list TEXT[] := pattr_list;
49 norm_attr_value TEXT[];
51 attr_def config.record_attr_definition%ROWTYPE;
52 ccvm_row config.coded_value_map%ROWTYPE;
55 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
56 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
60 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
63 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
65 attr_value := '{}'::TEXT[];
66 norm_attr_value := '{}'::TEXT[];
67 attr_vector_tmp := '{}'::INT[];
69 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
71 -- tag+sf attrs only support SVF
72 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
73 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
74 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
76 AND tag LIKE attr_def.tag
78 WHEN attr_def.sf_list IS NOT NULL
79 THEN POSITION(subfield IN attr_def.sf_list) > 0
86 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
87 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
89 IF NOT attr_def.multi THEN
90 attr_value := ARRAY[attr_value[1]];
93 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
95 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
97 -- See if we can skip the XSLT ... it's expensive
98 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
99 -- Can't skip the transform
100 IF xfrm.xslt <> '---' THEN
101 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
103 transformed_xml := rmarc;
106 prev_xfrm := xfrm.name;
109 IF xfrm.name IS NULL THEN
110 -- just grab the marcxml (empty) transform
111 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
112 prev_xfrm := xfrm.name;
115 FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
116 tmp_val := oils_xpath_string(
119 COALESCE(attr_def.joiner,' '),
120 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
122 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
123 attr_value := attr_value || tmp_val;
124 EXIT WHEN NOT attr_def.multi;
128 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
129 SELECT ARRAY_AGG(m.value) INTO attr_value
130 FROM vandelay.marc21_physical_characteristics(rmarc) v
131 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
132 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
133 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
135 IF NOT attr_def.multi THEN
136 attr_value := ARRAY[attr_value[1]];
141 -- apply index normalizers to attr_value
142 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
144 SELECT n.func AS func,
145 n.param_count AS param_count,
147 FROM config.index_normalizer n
148 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
149 WHERE attr = attr_def.name
151 EXECUTE 'SELECT ' || normalizer.func || '(' ||
152 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
154 WHEN normalizer.param_count > 0
155 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
161 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
162 -- note that a string that contains only blanks
163 -- is a valid value for some attributes
164 norm_attr_value := norm_attr_value || tmp_val;
168 IF attr_def.filter THEN
169 -- Create unknown uncontrolled values and find the IDs of the values
170 IF ccvm_row.id IS NULL THEN
171 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
172 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
173 BEGIN -- use subtransaction to isolate unique constraint violations
174 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
175 EXCEPTION WHEN unique_violation THEN END;
179 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
181 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
184 -- Add the new value to the vector
185 attr_vector := attr_vector || attr_vector_tmp;
188 IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
189 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
190 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
195 /* We may need to rewrite the vlist to contain
196 the intersection of new values for requested
197 attrs and old values for ignored attrs. To
198 do this, we take the old attr vlist and
199 subtract any values that are valid for the
200 requested attrs, and then add back the new
201 set of attr values. */
203 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
204 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
205 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
206 attr_vector := attr_vector || attr_vector_tmp;
209 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
210 -- attributes can depend on earlier ones.
211 PERFORM metabib.compile_composite_attr_cache_init();
212 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
214 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
216 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
217 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
219 IF attr_def.filter THEN
220 IF attr_vector @@ tmp_val::query_int THEN
221 attr_vector = attr_vector + intset(ccvm_row.id);
222 EXIT WHEN NOT attr_def.multi;
226 IF attr_def.sorter THEN
227 IF attr_vector @@ tmp_val THEN
228 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
229 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
237 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
238 IF rdeleted THEN -- initial insert OR revivication
239 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
240 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
242 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
248 $func$ LANGUAGE PLPGSQL;