3 INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0966', :eg_version); -- miker/jpringle/gmcharlt
5 -- Allow NULL post-normalization sorters
6 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
13 xfrm config.xml_transform%ROWTYPE;
14 attr_vector INT[] := '{}'::INT[];
15 attr_vector_tmp INT[];
16 attr_list TEXT[] := pattr_list;
18 norm_attr_value TEXT[];
20 attr_def config.record_attr_definition%ROWTYPE;
21 ccvm_row config.coded_value_map%ROWTYPE;
24 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
25 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
29 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
32 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
34 attr_value := '{}'::TEXT[];
35 norm_attr_value := '{}'::TEXT[];
36 attr_vector_tmp := '{}'::INT[];
38 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
40 -- tag+sf attrs only support SVF
41 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
42 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
43 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
45 AND tag LIKE attr_def.tag
47 WHEN attr_def.sf_list IS NOT NULL
48 THEN POSITION(subfield IN attr_def.sf_list) > 0
55 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
56 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
58 IF NOT attr_def.multi THEN
59 attr_value := ARRAY[attr_value[1]];
62 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
64 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
66 -- See if we can skip the XSLT ... it's expensive
67 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
68 -- Can't skip the transform
69 IF xfrm.xslt <> '---' THEN
70 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
72 transformed_xml := rmarc;
75 prev_xfrm := xfrm.name;
78 IF xfrm.name IS NULL THEN
79 -- just grab the marcxml (empty) transform
80 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
81 prev_xfrm := xfrm.name;
84 FOR tmp_xml IN SELECT UNNEST(oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]])) LOOP
85 tmp_val := oils_xpath_string(
88 COALESCE(attr_def.joiner,' '),
89 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
91 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
92 attr_value := attr_value || tmp_val;
93 EXIT WHEN NOT attr_def.multi;
97 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
98 SELECT ARRAY_AGG(m.value) INTO attr_value
99 FROM vandelay.marc21_physical_characteristics(rmarc) v
100 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
101 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
102 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
104 IF NOT attr_def.multi THEN
105 attr_value := ARRAY[attr_value[1]];
110 -- apply index normalizers to attr_value
111 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
113 SELECT n.func AS func,
114 n.param_count AS param_count,
116 FROM config.index_normalizer n
117 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
118 WHERE attr = attr_def.name
120 EXECUTE 'SELECT ' || normalizer.func || '(' ||
121 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
123 WHEN normalizer.param_count > 0
124 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
130 IF tmp_val IS NOT NULL AND tmp_val <> '' THEN
131 -- note that a string that contains only blanks
132 -- is a valid value for some attributes
133 norm_attr_value := norm_attr_value || tmp_val;
137 IF attr_def.filter THEN
138 -- Create unknown uncontrolled values and find the IDs of the values
139 IF ccvm_row.id IS NULL THEN
140 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
141 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
142 BEGIN -- use subtransaction to isolate unique constraint violations
143 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
144 EXCEPTION WHEN unique_violation THEN END;
148 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
150 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
153 -- Add the new value to the vector
154 attr_vector := attr_vector || attr_vector_tmp;
157 IF attr_def.sorter THEN
158 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
159 IF norm_attr_value[1] IS NOT NULL THEN
160 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
166 /* We may need to rewrite the vlist to contain
167 the intersection of new values for requested
168 attrs and old values for ignored attrs. To
169 do this, we take the old attr vlist and
170 subtract any values that are valid for the
171 requested attrs, and then add back the new
172 set of attr values. */
174 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
175 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
176 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
177 attr_vector := attr_vector || attr_vector_tmp;
180 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
181 -- attributes can depend on earlier ones.
182 PERFORM metabib.compile_composite_attr_cache_init();
183 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
185 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
187 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
188 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
190 IF attr_def.filter THEN
191 IF attr_vector @@ tmp_val::query_int THEN
192 attr_vector = attr_vector + intset(ccvm_row.id);
193 EXIT WHEN NOT attr_def.multi;
197 IF attr_def.sorter THEN
198 IF attr_vector @@ tmp_val THEN
199 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
200 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
208 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
209 IF rdeleted THEN -- initial insert OR revivication
210 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
211 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
213 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
219 $func$ LANGUAGE PLPGSQL;
223 -- These are allowed to fail ...
224 INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
225 'Number or NULL Normalize',
226 'Normalize the value to NULL if it is not a number',
231 INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
232 'Approximate Low Date Normalize',
233 'Normalize the value to the nearest date-ish value, rounding down',
234 'approximate_low_date',
238 INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
239 'Approximate High Date Normalize',
240 'Normalize the value to the nearest date-ish value, rounding up',
241 'approximate_high_date',
245 INSERT INTO config.record_attr_index_norm_map (attr,norm,pos)
246 SELECT m.name, i.id, 0
247 FROM config.record_attr_definition m,
248 config.index_normalizer i
249 WHERE i.func IN ('integer_or_null')
250 AND m.name IN ('date2', 'pubdate');
252 INSERT INTO config.record_attr_index_norm_map (attr,norm,pos)
253 SELECT m.name, i.id, 0
254 FROM config.record_attr_definition m,
255 config.index_normalizer i
256 WHERE i.func IN ('approximate_low_date')
257 AND m.name IN ('date1');
259 INSERT INTO config.record_attr_index_norm_map (attr,norm,pos)
260 SELECT m.name, i.id, 0
261 FROM config.record_attr_definition m,
262 config.index_normalizer i
263 WHERE i.func IN ('approximate_high_date')
264 AND m.name IN ('date2');
266 -- Get rid of bad date1 sorter values so we can avoid a reingest
267 DELETE FROM metabib.record_sorter WHERE attr = 'pubdate' AND value !~ '^\d+$';