4 INSERT INTO config.upgrade_log (version) VALUES ('0144'); -- miker
6 CREATE FUNCTION version_specific_xpath () RETURNS TEXT AS $wrapper_function$
11 IF REGEXP_REPLACE(VERSION(),E'^.+?(\\d+\\.\\d+).*?$',E'\\1')::FLOAT < 8.3 THEN
12 out_text := 'Creating XPath functions that work like the native XPATH function in 8.3+. contrib/xml2 is required!';
14 EXECUTE $create_82_funcs$
16 CREATE OR REPLACE FUNCTION oils_xpath ( xpath TEXT, xml TEXT, ns ANYARRAY ) RETURNS TEXT[] AS $func$
23 munged_xpath := xpath;
25 IF ns IS NOT NULL THEN
26 FOR namespace IN 1 .. array_upper(ns, 1) LOOP
27 munged_xpath := REGEXP_REPLACE(
29 E'(' || ns[namespace][1] || E'):(\\w+)',
30 E'*[local-name() = "\\2" and namespace-uri() = "' || ns[namespace][2] || E'"]',
35 munged_xpath := REGEXP_REPLACE( munged_xpath, E'\\]\\[(\\D)',E' and \\1', 'g');
38 -- RAISE NOTICE 'munged xpath: %', munged_xpath;
40 node_text := xpath_nodeset(xml, munged_xpath, 'XXX_OILS_NODESET');
41 -- RAISE NOTICE 'node_text: %', node_text;
43 IF munged_xpath ~ $re$/[^/[]*@[^/]+$$re$ THEN
44 node_text := REGEXP_REPLACE(node_text,'<XXX_OILS_NODESET>[^"]+"', '<XXX_OILS_NODESET>', 'g');
45 node_text := REGEXP_REPLACE(node_text,'"</XXX_OILS_NODESET>', '</XXX_OILS_NODESET>', 'g');
48 node_text := REGEXP_REPLACE(node_text,'^<XXX_OILS_NODESET>', '');
49 node_text := REGEXP_REPLACE(node_text,'</XXX_OILS_NODESET>$', '');
51 RETURN STRING_TO_ARRAY(node_text, '</XXX_OILS_NODESET><XXX_OILS_NODESET>');
53 $func$ LANGUAGE PLPGSQL;
55 CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS 'SELECT oils_xpath( $1, $2, NULL::TEXT[] );' LANGUAGE SQL;
57 CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $$
58 SELECT xslt_process( $1, $2 );
62 ELSIF REGEXP_REPLACE(VERSION(),E'^.+?(\\d+\\.\\d+).*?$',E'\\1')::FLOAT = 8.3 THEN
63 out_text := 'Creating XPath wrapper functions around the native XPATH function in 8.3. contrib/xml2 still required!';
65 EXECUTE $create_83_funcs$
67 CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT, ANYARRAY ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML, $3 )::TEXT[];' LANGUAGE SQL;
68 CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML )::TEXT[];' LANGUAGE SQL;
70 CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $$
71 SELECT xslt_process( $1, $2 );
77 out_text := 'Creating XPath wrapper functions around the native XPATH function in 8.4+, and plperlu-based xslt processor. No contrib/xml2 needed!';
79 EXECUTE $create_84_funcs$
81 CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT, ANYARRAY ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML, $3 )::TEXT[];' LANGUAGE SQL;
82 CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML )::TEXT[];' LANGUAGE SQL;
84 CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
93 # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68
94 # methods of parsing XML documents and stylesheets, in the hopes of broader
95 # compatibility with distributions
96 my $parser = $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new();
98 # Cache the XML parser, if we do not already have one
99 $_SHARED{'_xslt_process'}{parsers}{xml} = $parser
100 unless ($_SHARED{'_xslt_process'}{parsers}{xml});
102 my $xslt_parser = $_SHARED{'_xslt_process'}{parsers}{xslt} || XML::LibXSLT->new();
104 # Cache the XSLT processor, if we do not already have one
105 $_SHARED{'_xslt_process'}{parsers}{xslt} = $xslt_parser
106 unless ($_SHARED{'_xslt_process'}{parsers}{xslt});
108 my $stylesheet = $_SHARED{'_xslt_process'}{stylesheets}{$xslt} ||
109 $xslt_parser->parse_stylesheet( $parser->parse_string($xslt) );
111 $_SHARED{'_xslt_process'}{stylesheets}{$xslt} = $stylesheet
112 unless ($_SHARED{'_xslt_process'}{stylesheets}{$xslt});
114 return $stylesheet->output_string(
115 $stylesheet->transform(
116 $parser->parse_string($doc)
120 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
127 $wrapper_function$ LANGUAGE PLPGSQL;
129 SELECT version_specific_xpath();
130 DROP FUNCTION version_specific_xpath();
132 CREATE OR REPLACE FUNCTION oils_xpath_string ( TEXT, TEXT, TEXT, ANYARRAY ) RETURNS TEXT AS $func$
133 SELECT ARRAY_TO_STRING(
136 CASE WHEN $1 ~ $re$/[^/[]*@[^/]+$$re$ OR $1 ~ $re$text\(\)$$re$ THEN '' ELSE '//text()' END,
144 CREATE OR REPLACE FUNCTION oils_xpath_string ( TEXT, TEXT, TEXT ) RETURNS TEXT AS $func$
145 SELECT oils_xpath_string( $1, $2, $3, NULL::TEXT[] );
148 CREATE OR REPLACE FUNCTION oils_xpath_string ( TEXT, TEXT, ANYARRAY ) RETURNS TEXT AS $func$
149 SELECT oils_xpath_string( $1, $2, '', $3 );
152 CREATE OR REPLACE FUNCTION oils_xpath_string ( TEXT, TEXT ) RETURNS TEXT AS $func$
153 SELECT oils_xpath_string( $1, $2, NULL::TEXT[] );
156 CREATE OR REPLACE FUNCTION oils_xpath_table ( key TEXT, document_field TEXT, relation_name TEXT, xpaths TEXT, criteria TEXT ) RETURNS SETOF RECORD AS $func$
165 xpath_list := STRING_TO_ARRAY( xpaths, '|' );
167 select_list := ARRAY_APPEND( select_list, key || '::INT AS key' );
169 FOR i IN 1 .. ARRAY_UPPER(xpath_list,1) LOOP
170 select_list := ARRAY_APPEND(
180 WHEN xpath_list[i] ~ $re$/[^/[]*@[^/]+$$re$ OR xpath_list[i] ~ $re$text\(\)$$re$ THEN xpath_list[i]
181 ELSE xpath_list[i] || '//text()'
185 $sel$ || document_field || $sel$
193 where_list := ARRAY_APPEND(
195 'c_' || i || ' IS NOT NULL'
201 SELECT $q$ || ARRAY_TO_STRING( select_list, ', ' ) || $q$ FROM $q$ || relation_name || $q$ WHERE ($q$ || criteria || $q$)
202 )x WHERE $q$ || ARRAY_TO_STRING( where_list, ' AND ' );
203 -- RAISE NOTICE 'query: %', q;
205 FOR out_record IN EXECUTE q LOOP
206 RETURN NEXT out_record;
211 $func$ LANGUAGE PLPGSQL;
213 CREATE OR REPLACE FUNCTION extract_marc_field ( TEXT, BIGINT, TEXT, TEXT ) RETURNS TEXT AS $$
218 SELECT regexp_replace(
220 $q$ || quote_literal($3) || $q$,
224 $q$ || quote_literal($4) || $q$,
227 FROM $q$ || $1 || $q$
228 WHERE id = $q$ || $2 INTO output;
233 CREATE OR REPLACE FUNCTION extract_marc_field ( TEXT, BIGINT, TEXT ) RETURNS TEXT AS $$
234 SELECT extract_marc_field($1,$2,$3,'');
237 CREATE OR REPLACE FUNCTION asset.merge_record_assets( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
239 moved_objects INT := 0;
240 source_cn asset.call_number%ROWTYPE;
241 target_cn asset.call_number%ROWTYPE;
242 metarec metabib.metarecord%ROWTYPE;
243 hold action.hold_request%ROWTYPE;
244 ser_rec serial.record_entry%ROWTYPE;
251 -- move any 856 entries on records that have at least one MARC-mapped URI entry
252 SELECT INTO uri_count COUNT(*)
253 FROM asset.uri_call_number_map m
254 JOIN asset.call_number cn ON (m.call_number = cn.id)
255 WHERE cn.record = source_record;
257 IF uri_count > 0 THEN
259 SELECT COUNT(*) INTO counter
260 FROM oils_xpath_table(
263 'biblio.record_entry',
266 ) as t(i int,c text);
268 FOR i IN 1 .. counter LOOP
269 SELECT '<datafield xmlns="http://www.loc.gov/MARC21/slim" tag="856">' ||
272 '<subfield code="' || subfield || '">' ||
275 regexp_replace(data,'&','&','g'),
281 ) || '</datafield>' INTO uri_datafield
282 FROM oils_xpath_table(
285 'biblio.record_entry',
286 '//*[@tag="856"][position()=' || i || ']/*/@code|' ||
287 '//*[@tag="856"][position()=' || i || ']/*[@code]',
288 'id=' || source_record
289 ) as t(id int,subfield text,data text);
291 uri_text := uri_text || uri_datafield;
294 IF uri_text <> '' THEN
295 UPDATE biblio.record_entry
296 SET marc = regexp_replace(marc,'(</[^>]*record>)', uri_text || E'\\1')
297 WHERE id = target_record;
302 -- Find and move metarecords to the target record
303 SELECT INTO metarec *
304 FROM metabib.metarecord
305 WHERE master_record = source_record;
308 UPDATE metabib.metarecord
309 SET master_record = target_record,
311 WHERE id = metarec.id;
313 moved_objects := moved_objects + 1;
316 -- Find call numbers attached to the source ...
317 FOR source_cn IN SELECT * FROM asset.call_number WHERE record = source_record LOOP
319 SELECT INTO target_cn *
320 FROM asset.call_number
321 WHERE label = source_cn.label
322 AND owning_lib = source_cn.owning_lib
323 AND record = target_record;
325 -- ... and if there's a conflicting one on the target ...
328 -- ... move the copies to that, and ...
330 SET call_number = target_cn.id
331 WHERE call_number = source_cn.id;
333 -- ... move V holds to the move-target call number
334 FOR hold IN SELECT * FROM action.hold_request WHERE target = source_cn.id AND hold_type = 'V' LOOP
336 UPDATE action.hold_request
337 SET target = target_cn.id
340 moved_objects := moved_objects + 1;
345 -- ... just move the call number to the target record
346 UPDATE asset.call_number
347 SET record = target_record
348 WHERE id = source_cn.id;
351 moved_objects := moved_objects + 1;
354 -- Find T holds targeting the source record ...
355 FOR hold IN SELECT * FROM action.hold_request WHERE target = source_record AND hold_type = 'T' LOOP
357 -- ... and move them to the target record
358 UPDATE action.hold_request
359 SET target = target_record
362 moved_objects := moved_objects + 1;
365 -- Find serial records targeting the source record ...
366 FOR ser_rec IN SELECT * FROM serial.record_entry WHERE record = source_record LOOP
367 -- ... and move them to the target record
368 UPDATE serial.record_entry
369 SET record = target_record
370 WHERE id = ser_rec.id;
372 moved_objects := moved_objects + 1;
375 -- Finally, "delete" the source record
376 DELETE FROM biblio.record_entry WHERE id = source_record;
378 -- That's all, folks!
379 RETURN moved_objects;
381 $func$ LANGUAGE plpgsql;
383 CREATE OR REPLACE FUNCTION vandelay.ingest_items ( import_id BIGINT, attr_def_id BIGINT ) RETURNS SETOF vandelay.import_item AS $$
408 attr_set vandelay.import_item%ROWTYPE;
414 SELECT * INTO attr_def FROM vandelay.import_item_attr_definition WHERE id = attr_def_id;
418 attr_set.definition := attr_def.id;
420 -- Build the combined XPath
424 WHEN attr_def.owning_lib IS NULL THEN 'null()'
425 WHEN LENGTH( attr_def.owning_lib ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.owning_lib || '"]'
426 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.owning_lib
431 WHEN attr_def.circ_lib IS NULL THEN 'null()'
432 WHEN LENGTH( attr_def.circ_lib ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_lib || '"]'
433 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_lib
438 WHEN attr_def.call_number IS NULL THEN 'null()'
439 WHEN LENGTH( attr_def.call_number ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.call_number || '"]'
440 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.call_number
445 WHEN attr_def.copy_number IS NULL THEN 'null()'
446 WHEN LENGTH( attr_def.copy_number ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.copy_number || '"]'
447 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.copy_number
452 WHEN attr_def.status IS NULL THEN 'null()'
453 WHEN LENGTH( attr_def.status ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.status || '"]'
454 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.status
459 WHEN attr_def.location IS NULL THEN 'null()'
460 WHEN LENGTH( attr_def.location ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.location || '"]'
461 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.location
466 WHEN attr_def.circulate IS NULL THEN 'null()'
467 WHEN LENGTH( attr_def.circulate ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circulate || '"]'
468 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circulate
473 WHEN attr_def.deposit IS NULL THEN 'null()'
474 WHEN LENGTH( attr_def.deposit ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.deposit || '"]'
475 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.deposit
480 WHEN attr_def.deposit_amount IS NULL THEN 'null()'
481 WHEN LENGTH( attr_def.deposit_amount ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.deposit_amount || '"]'
482 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.deposit_amount
487 WHEN attr_def.ref IS NULL THEN 'null()'
488 WHEN LENGTH( attr_def.ref ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.ref || '"]'
489 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.ref
494 WHEN attr_def.holdable IS NULL THEN 'null()'
495 WHEN LENGTH( attr_def.holdable ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.holdable || '"]'
496 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.holdable
501 WHEN attr_def.price IS NULL THEN 'null()'
502 WHEN LENGTH( attr_def.price ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.price || '"]'
503 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.price
508 WHEN attr_def.barcode IS NULL THEN 'null()'
509 WHEN LENGTH( attr_def.barcode ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.barcode || '"]'
510 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.barcode
515 WHEN attr_def.circ_modifier IS NULL THEN 'null()'
516 WHEN LENGTH( attr_def.circ_modifier ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_modifier || '"]'
517 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_modifier
522 WHEN attr_def.circ_as_type IS NULL THEN 'null()'
523 WHEN LENGTH( attr_def.circ_as_type ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_as_type || '"]'
524 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_as_type
529 WHEN attr_def.alert_message IS NULL THEN 'null()'
530 WHEN LENGTH( attr_def.alert_message ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.alert_message || '"]'
531 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.alert_message
536 WHEN attr_def.opac_visible IS NULL THEN 'null()'
537 WHEN LENGTH( attr_def.opac_visible ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.opac_visible || '"]'
538 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.opac_visible
543 WHEN attr_def.pub_note IS NULL THEN 'null()'
544 WHEN LENGTH( attr_def.pub_note ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.pub_note || '"]'
545 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.pub_note
549 WHEN attr_def.priv_note IS NULL THEN 'null()'
550 WHEN LENGTH( attr_def.priv_note ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.priv_note || '"]'
551 ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.priv_note
558 call_number || '|' ||
559 copy_number || '|' ||
564 deposit_amount || '|' ||
569 circ_modifier || '|' ||
570 circ_as_type || '|' ||
571 alert_message || '|' ||
576 -- RAISE NOTICE 'XPath: %', xpath;
580 FROM oils_xpath_table( 'id', 'marc', 'vandelay.queued_bib_record', xpath, 'id = ' || import_id )
581 AS t( id INT, ol TEXT, clib TEXT, cn TEXT, cnum TEXT, cs TEXT, cl TEXT, circ TEXT,
582 dep TEXT, dep_amount TEXT, r TEXT, hold TEXT, pr TEXT, bc TEXT, circ_mod TEXT,
583 circ_as TEXT, amessage TEXT, note TEXT, pnote TEXT, opac_vis TEXT )
586 tmp_attr_set.pr = REGEXP_REPLACE(tmp_attr_set.pr, E'[^0-9\\.]', '', 'g');
587 tmp_attr_set.dep_amount = REGEXP_REPLACE(tmp_attr_set.dep_amount, E'[^0-9\\.]', '', 'g');
589 tmp_attr_set.pr := NULLIF( tmp_attr_set.pr, '' );
590 tmp_attr_set.dep_amount := NULLIF( tmp_attr_set.dep_amount, '' );
592 SELECT id INTO attr_set.owning_lib FROM actor.org_unit WHERE shortname = UPPER(tmp_attr_set.ol); -- INT
593 SELECT id INTO attr_set.circ_lib FROM actor.org_unit WHERE shortname = UPPER(tmp_attr_set.clib); -- INT
594 SELECT id INTO attr_set.status FROM config.copy_status WHERE LOWER(name) = LOWER(tmp_attr_set.cs); -- INT
596 SELECT id INTO attr_set.location
597 FROM asset.copy_location
598 WHERE LOWER(name) = LOWER(tmp_attr_set.cl)
599 AND asset.copy_location.owning_lib = COALESCE(attr_set.owning_lib, attr_set.circ_lib); -- INT
601 attr_set.circulate :=
602 LOWER( SUBSTRING( tmp_attr_set.circ, 1, 1)) IN ('t','y','1')
603 OR LOWER(tmp_attr_set.circ) = 'circulating'; -- BOOL
606 LOWER( SUBSTRING( tmp_attr_set.dep, 1, 1 ) ) IN ('t','y','1')
607 OR LOWER(tmp_attr_set.dep) = 'deposit'; -- BOOL
610 LOWER( SUBSTRING( tmp_attr_set.hold, 1, 1 ) ) IN ('t','y','1')
611 OR LOWER(tmp_attr_set.hold) = 'holdable'; -- BOOL
613 attr_set.opac_visible :=
614 LOWER( SUBSTRING( tmp_attr_set.opac_vis, 1, 1 ) ) IN ('t','y','1')
615 OR LOWER(tmp_attr_set.opac_vis) = 'visible'; -- BOOL
618 LOWER( SUBSTRING( tmp_attr_set.r, 1, 1 ) ) IN ('t','y','1')
619 OR LOWER(tmp_attr_set.r) = 'reference'; -- BOOL
621 attr_set.copy_number := tmp_attr_set.cnum::INT; -- INT,
622 attr_set.deposit_amount := tmp_attr_set.dep_amount::NUMERIC(6,2); -- NUMERIC(6,2),
623 attr_set.price := tmp_attr_set.pr::NUMERIC(8,2); -- NUMERIC(8,2),
625 attr_set.call_number := tmp_attr_set.cn; -- TEXT
626 attr_set.barcode := tmp_attr_set.bc; -- TEXT,
627 attr_set.circ_modifier := tmp_attr_set.circ_mod; -- TEXT,
628 attr_set.circ_as_type := tmp_attr_set.circ_as; -- TEXT,
629 attr_set.alert_message := tmp_attr_set.amessage; -- TEXT,
630 attr_set.pub_note := tmp_attr_set.note; -- TEXT,
631 attr_set.priv_note := tmp_attr_set.pnote; -- TEXT,
632 attr_set.alert_message := tmp_attr_set.amessage; -- TEXT,
634 RETURN NEXT attr_set;
645 CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
647 bib biblio.record_entry%ROWTYPE;
648 idx config.metabib_field%ROWTYPE;
649 xfrm config.xml_transform%ROWTYPE;
651 transformed_xml TEXT;
653 xml_node_list TEXT[];
655 joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
656 output_row metabib.field_entry_template%ROWTYPE;
660 SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
662 -- Loop over the indexing entries
663 FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
665 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
667 -- RAISE NOTICE 'idx: %, xfrm: %', idx.id, xfrm.name;
669 -- See if we can skip the XSLT ... it's expensive
670 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
671 -- Can't skip the transform
672 IF xfrm.xslt <> '---' THEN
673 transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
675 transformed_xml := bib.marc;
678 prev_xfrm := xfrm.name;
681 xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
684 FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP
685 IF raw_text IS NOT NULL THEN
686 raw_text := raw_text || joiner;
688 raw_text := COALESCE(raw_text,'') || ARRAY_TO_STRING(oils_xpath( '//text()', xml_node ), ' ');
691 CONTINUE WHEN raw_text IS NULL;
693 output_row.field_class = idx.field_class;
694 output_row.field = idx.id;
695 output_row.source = rid;
696 output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
698 RETURN NEXT output_row;
700 -- RAISE NOTICE 'output: %', output_row;
707 $func$ LANGUAGE PLPGSQL;
709 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
711 idx config.biblio_fingerprint%ROWTYPE;
712 xfrm config.xml_transform%ROWTYPE;
714 transformed_xml TEXT;
716 xml_node_list TEXT[];
718 output_text TEXT := '';
721 IF marc IS NULL OR marc = '' THEN
725 -- Loop over the indexing entries
726 FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
728 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
730 -- See if we can skip the XSLT ... it's expensive
731 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
732 -- Can't skip the transform
733 IF xfrm.xslt <> '---' THEN
734 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
736 transformed_xml := marc;
739 prev_xfrm := xfrm.name;
742 raw_text := COALESCE(
750 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
759 raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
760 raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
762 IF idx.first_word IS TRUE THEN
763 raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
766 output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
773 $func$ LANGUAGE PLPGSQL;