3 SELECT evergreen.upgrade_deps_block_check('0874', :eg_version);
5 DROP FUNCTION IF EXISTS evergreen.oils_xpath( TEXT, TEXT, ANYARRAY);
6 DROP FUNCTION IF EXISTS public.oils_xpath(TEXT, TEXT, ANYARRAY);
7 DROP FUNCTION IF EXISTS public.oils_xpath(TEXT, TEXT);
8 DROP FUNCTION IF EXISTS public.oils_xslt_process(TEXT, TEXT);
10 CREATE OR REPLACE FUNCTION evergreen.xml_famous5_to_text( TEXT ) RETURNS TEXT AS $f$
15 REPLACE( $1, '<', '<'),
28 $f$ LANGUAGE SQL IMMUTABLE;
30 CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT, TEXT[] ) RETURNS TEXT[] AS $f$
32 CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
35 evergreen.xml_famous5_to_text(x)
38 FROM UNNEST(XPATH( $1, $2::XML, $3 )::TEXT[]) x;
39 $f$ LANGUAGE SQL IMMUTABLE;
41 -- Trust me, it's just simpler to duplicate these...
42 CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS $f$
44 CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
47 evergreen.xml_famous5_to_text(x)
50 FROM UNNEST(XPATH( $1, $2::XML)::TEXT[]) x;
51 $f$ LANGUAGE SQL IMMUTABLE;
53 CREATE OR REPLACE FUNCTION evergreen.oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
62 # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68
63 # methods of parsing XML documents and stylesheets, in the hopes of broader
64 # compatibility with distributions
65 my $parser = $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new();
67 # Cache the XML parser, if we do not already have one
68 $_SHARED{'_xslt_process'}{parsers}{xml} = $parser
69 unless ($_SHARED{'_xslt_process'}{parsers}{xml});
71 my $xslt_parser = $_SHARED{'_xslt_process'}{parsers}{xslt} || XML::LibXSLT->new();
73 # Cache the XSLT processor, if we do not already have one
74 $_SHARED{'_xslt_process'}{parsers}{xslt} = $xslt_parser
75 unless ($_SHARED{'_xslt_process'}{parsers}{xslt});
77 my $stylesheet = $_SHARED{'_xslt_process'}{stylesheets}{$xslt} ||
78 $xslt_parser->parse_stylesheet( $parser->parse_string($xslt) );
80 $_SHARED{'_xslt_process'}{stylesheets}{$xslt} = $stylesheet
81 unless ($_SHARED{'_xslt_process'}{stylesheets}{$xslt});
83 return $stylesheet->output_string(
84 $stylesheet->transform(
85 $parser->parse_string($doc)
89 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
91 CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
93 res authority.simple_heading%ROWTYPE;
94 acsaf authority.control_set_authority_field%ROWTYPE;
105 auth_id INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT;
108 SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
111 SELECT control_set INTO cset
112 FROM authority.control_set_authority_field
113 WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
117 res.record := auth_id;
119 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
121 res.atag := acsaf.id;
122 tag_used := acsaf.tag;
123 nfi_used := acsaf.nfi;
124 joiner_text := COALESCE(acsaf.joiner, ' ');
126 FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
128 heading_text := COALESCE(
129 oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text),
133 IF nfi_used IS NOT NULL THEN
135 sort_text := SUBSTRING(
140 oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
152 sort_text := heading_text;
155 IF heading_text IS NOT NULL AND heading_text <> '' THEN
156 res.value := heading_text;
157 res.sort_value := public.naco_normalize(sort_text);
158 res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
168 $func$ LANGUAGE PLPGSQL IMMUTABLE;
170 CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
178 current_selector url_verify.url_selector%ROWTYPE;
182 FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
183 current_url_pos := 1;
185 SELECT (oils_xpath(current_selector.xpath || '/text()', b.marc))[current_url_pos] INTO current_url
186 FROM biblio.record_entry b
187 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
188 WHERE c.id = item_id;
190 EXIT WHEN current_url IS NULL;
192 SELECT (oils_xpath(current_selector.xpath || '/../@tag', b.marc))[current_url_pos] INTO current_tag
193 FROM biblio.record_entry b
194 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
195 WHERE c.id = item_id;
197 IF current_tag IS NULL THEN
198 current_tag := last_seen_tag;
200 last_seen_tag := current_tag;
203 SELECT (oils_xpath(current_selector.xpath || '/@code', b.marc))[current_url_pos] INTO current_sf
204 FROM biblio.record_entry b
205 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
206 WHERE c.id = item_id;
208 INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
209 VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
211 current_url_pos := current_url_pos + 1;
212 current_ord := current_ord + 1;
216 RETURN current_ord - 1;
220 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
222 bib biblio.record_entry%ROWTYPE;
223 idx config.metabib_field%ROWTYPE;
224 xfrm config.xml_transform%ROWTYPE;
226 transformed_xml TEXT;
228 xml_node_list TEXT[];
234 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
236 authority_link BIGINT;
237 output_row metabib.field_entry_template%ROWTYPE;
240 -- Start out with no field-use bools set
241 output_row.browse_field = FALSE;
242 output_row.facet_field = FALSE;
243 output_row.search_field = FALSE;
246 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
248 -- Loop over the indexing entries
249 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
251 joiner := COALESCE(idx.joiner, default_joiner);
253 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
255 -- See if we can skip the XSLT ... it's expensive
256 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
257 -- Can't skip the transform
258 IF xfrm.xslt <> '---' THEN
259 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
261 transformed_xml := bib.marc;
264 prev_xfrm := xfrm.name;
267 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
270 FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
271 CONTINUE WHEN xml_node !~ E'^\\s*<';
273 -- XXX much of this should be moved into oils_xpath_string...
274 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
275 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
276 REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
277 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
281 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
283 IF raw_text IS NOT NULL THEN
284 raw_text := raw_text || joiner;
287 raw_text := COALESCE(raw_text,'') || curr_text;
289 -- autosuggest/metabib.browse_entry
290 IF idx.browse_field THEN
292 IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
293 browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
295 browse_text := curr_text;
298 IF idx.browse_sort_xpath IS NOT NULL AND
299 idx.browse_sort_xpath <> '' THEN
301 sort_value := oils_xpath_string(
302 idx.browse_sort_xpath, xml_node, joiner,
303 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
306 sort_value := browse_text;
309 output_row.field_class = idx.field_class;
310 output_row.field = idx.id;
311 output_row.source = rid;
312 output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
313 output_row.sort_value :=
314 public.naco_normalize(sort_value);
316 output_row.authority := NULL;
318 IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
319 authority_text := oils_xpath_string(
320 idx.authority_xpath, xml_node, joiner,
322 ARRAY[xfrm.prefix, xfrm.namespace_uri],
323 ARRAY['xlink','http://www.w3.org/1999/xlink']
327 IF authority_text ~ '^\d+$' THEN
328 authority_link := authority_text::BIGINT;
329 PERFORM * FROM authority.record_entry WHERE id = authority_link;
331 output_row.authority := authority_link;
337 output_row.browse_field = TRUE;
338 -- Returning browse rows with search_field = true for search+browse
339 -- configs allows us to retain granularity of being able to search
340 -- browse fields with "starts with" type operators (for example, for
341 -- titles of songs in music albums)
342 IF idx.search_field THEN
343 output_row.search_field = TRUE;
345 RETURN NEXT output_row;
346 output_row.browse_field = FALSE;
347 output_row.search_field = FALSE;
348 output_row.sort_value := NULL;
351 -- insert raw node text for faceting
352 IF idx.facet_field THEN
354 IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
355 facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
357 facet_text := curr_text;
360 output_row.field_class = idx.field_class;
361 output_row.field = -1 * idx.id;
362 output_row.source = rid;
363 output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
365 output_row.facet_field = TRUE;
366 RETURN NEXT output_row;
367 output_row.facet_field = FALSE;
372 CONTINUE WHEN raw_text IS NULL OR raw_text = '';
374 -- insert combined node text for searching
375 IF idx.search_field THEN
376 output_row.field_class = idx.field_class;
377 output_row.field = idx.id;
378 output_row.source = rid;
379 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
381 output_row.search_field = TRUE;
382 RETURN NEXT output_row;
383 output_row.search_field = FALSE;
390 $func$ LANGUAGE PLPGSQL;
392 CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
394 transformed_xml TEXT;
395 rmarc TEXT := prmarc;
399 xfrm config.xml_transform%ROWTYPE;
400 attr_vector INT[] := '{}'::INT[];
401 attr_vector_tmp INT[];
402 attr_list TEXT[] := pattr_list;
404 norm_attr_value TEXT[];
406 attr_def config.record_attr_definition%ROWTYPE;
407 ccvm_row config.coded_value_map%ROWTYPE;
410 IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
411 SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
414 IF rmarc IS NULL THEN
415 SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
418 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
420 attr_value := '{}'::TEXT[];
421 norm_attr_value := '{}'::TEXT[];
422 attr_vector_tmp := '{}'::INT[];
424 SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
426 -- tag+sf attrs only support SVF
427 IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
428 SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
429 FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
431 AND tag LIKE attr_def.tag
433 WHEN attr_def.sf_list IS NOT NULL
434 THEN POSITION(subfield IN attr_def.sf_list) > 0
441 ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
442 attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
444 IF NOT attr_def.multi THEN
445 attr_value := ARRAY[attr_value[1]];
448 ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
450 SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
452 -- See if we can skip the XSLT ... it's expensive
453 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
454 -- Can't skip the transform
455 IF xfrm.xslt <> '---' THEN
456 transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
458 transformed_xml := rmarc;
461 prev_xfrm := xfrm.name;
464 IF xfrm.name IS NULL THEN
465 -- just grab the marcxml (empty) transform
466 SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
467 prev_xfrm := xfrm.name;
470 FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
471 tmp_val := oils_xpath_string(
474 COALESCE(attr_def.joiner,' '),
475 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
477 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
478 attr_value := attr_value || tmp_val;
479 EXIT WHEN NOT attr_def.multi;
483 ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
484 SELECT ARRAY_AGG(m.value) INTO attr_value
485 FROM vandelay.marc21_physical_characteristics(rmarc) v
486 LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
487 WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
488 AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
490 IF NOT attr_def.multi THEN
491 attr_value := ARRAY[attr_value[1]];
496 -- apply index normalizers to attr_value
497 FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
499 SELECT n.func AS func,
500 n.param_count AS param_count,
502 FROM config.index_normalizer n
503 JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
504 WHERE attr = attr_def.name
506 EXECUTE 'SELECT ' || normalizer.func || '(' ||
507 COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
509 WHEN normalizer.param_count > 0
510 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
516 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
517 norm_attr_value := norm_attr_value || tmp_val;
521 IF attr_def.filter THEN
522 -- Create unknown uncontrolled values and find the IDs of the values
523 IF ccvm_row.id IS NULL THEN
524 FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
525 IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
526 BEGIN -- use subtransaction to isolate unique constraint violations
527 INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
528 EXCEPTION WHEN unique_violation THEN END;
532 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
534 SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
537 -- Add the new value to the vector
538 attr_vector := attr_vector || attr_vector_tmp;
541 IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
542 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
543 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
548 /* We may need to rewrite the vlist to contain
549 the intersection of new values for requested
550 attrs and old values for ignored attrs. To
551 do this, we take the old attr vlist and
552 subtract any values that are valid for the
553 requested attrs, and then add back the new
554 set of attr values. */
556 IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN
557 SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
558 SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
559 attr_vector := attr_vector || attr_vector_tmp;
562 -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite
563 -- attributes can depend on earlier ones.
564 PERFORM metabib.compile_composite_attr_cache_init();
565 FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
567 FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
569 tmp_val := metabib.compile_composite_attr( ccvm_row.id );
570 CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
572 IF attr_def.filter THEN
573 IF attr_vector @@ tmp_val::query_int THEN
574 attr_vector = attr_vector + intset(ccvm_row.id);
575 EXIT WHEN NOT attr_def.multi;
579 IF attr_def.sorter THEN
580 IF attr_vector @@ tmp_val THEN
581 DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
582 INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
590 IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
591 IF rdeleted THEN -- initial insert OR revivication
592 DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
593 INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
595 UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
601 $func$ LANGUAGE PLPGSQL;
605 \qecho This script will now attempt a "quick fix" of browse_entry only.
606 \qecho If you have issues, a browse or full reingest is recommended.
607 \qecho You may cancel now without losing the effect of the rest of the
608 \qecho upgrade script, and arrange the reingest later.
610 UPDATE metabib.browse_entry SET value=evergreen.xml_famous5_to_text(value) WHERE value LIKE '%&%';
611 UPDATE metabib.browse_entry SET sort_value=evergreen.xml_famous5_to_text(sort_value) WHERE sort_value LIKE '%&%';