From 2d9c9d44a6caba0ca60799372dbfbefc72a20809 Mon Sep 17 00:00:00 2001 From: miker Date: Thu, 13 May 2010 16:44:14 +0000 Subject: [PATCH] teach the overlay functions about data matching REs; add a template-based merge instead of requiring a vandelay record, and use that for bib merging; minor logic collapsing git-svn-id: svn://svn.open-ils.org/ILS/trunk@16428 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- Open-ILS/src/sql/Pg/012.schema.vandelay.sql | 116 +++++-- .../0261.schema.vandelay.record_overlay.sql | 290 ++++++++++++++++++ 3 files changed, 381 insertions(+), 27 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index ec005fab1f..25ad274eb9 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -65,7 +65,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0260'); -- berick +INSERT INTO config.upgrade_log (version) VALUES ('0261'); -- miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql index eecddc657f..e3535c425c 100644 --- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql +++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql @@ -311,7 +311,6 @@ CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT my $target_xml = shift; my $source_xml = shift; my $field_spec = shift; - $field_spec =~ s/\s+//sg; my $target_r = MARC::Record->new_from_xml( $target_xml ); my $source_r = MARC::Record->new_from_xml( $source_xml ); @@ -322,8 +321,23 @@ CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT my %fields; for my $f (@field_list) { - if ($f =~ /^(.{3})(.*)$/) { - $fields{$1} = [ split('', $2) ]; + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } } } @@ -331,6 +345,9 @@ CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT if ( @{$fields{$f}} ) { for my $from_field ($source_r->field( $f )) { for my $to_field ($target_r->field( $f )) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf})); + } my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}}; $to_field->add_subfields( @new_sf ); } @@ -361,22 +378,41 @@ CREATE OR REPLACE FUNCTION vandelay.strip_field ( xml TEXT, field TEXT ) RETURNS return $xml unless ($r); my $field_spec = shift; - $field_spec =~ s/\s+//sg; - my @field_list = split(',', $field_spec); my %fields; for my $f (@field_list) { - if ($f =~ /^(.{3})(.*)$/) { - $fields{$1} = [ split('', $2) ]; + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } } } for my $f ( keys %fields) { - if ( @{$fields{$f}} ) { - $_->delete_subfield(code => $fields{$f}) for ($r->field( $f )); - } else { - $r->delete_field( $_ ) for ( $r->field( $f ) ); + for my $to_field ($r->field( $f )) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf})); + } + + if ( @{$fields{$f}} ) { + $to_field->delete_subfield(code => $fields{$f}{sf}); + } else { + $r->delete_field( $to_field ); + } } } @@ -443,7 +479,7 @@ BEGIN END; $_$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$ +CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$ DECLARE merge_profile vandelay.merge_profile%ROWTYPE; dyn_profile vandelay.compile_profile%ROWTYPE; @@ -452,7 +488,6 @@ DECLARE source_marc TEXT; target_marc TEXT; eg_marc TEXT; - v_marc TEXT; replace_rule TEXT; match_count INT; BEGIN @@ -462,13 +497,8 @@ BEGIN JOIN vandelay.bib_match m ON (m.eg_record = b.id AND m.queued_record = import_id) LIMIT 1; - SELECT q.marc INTO v_marc - FROM vandelay.queued_record q - JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id) - LIMIT 1; - IF eg_marc IS NULL OR v_marc IS NULL THEN - -- RAISE NOTICE 'no marc for vandelay or bib record'; + -- RAISE NOTICE 'no marc for template or bib record'; RETURN FALSE; END IF; @@ -503,7 +533,45 @@ BEGIN SET marc = vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule ) WHERE id = eg_id; - IF FOUND THEN + IF NOT FOUND THEN + -- RAISE NOTICE 'update of biblio.record_entry failed'; + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT) RETURNS BOOL AS $$ + SELECT vandelay.template_overlay_bib_record( $1, $2, NULL); +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$ +DECLARE + merge_profile vandelay.merge_profile%ROWTYPE; + dyn_profile vandelay.compile_profile%ROWTYPE; + editor_string TEXT; + editor_id INT; + source_marc TEXT; + target_marc TEXT; + eg_marc TEXT; + v_marc TEXT; + replace_rule TEXT; + match_count INT; +BEGIN + + SELECT q.marc INTO v_marc + FROM vandelay.queued_record q + JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id) + LIMIT 1; + + IF v_marc IS NULL THEN + -- RAISE NOTICE 'no marc for vandelay or bib record'; + RETURN FALSE; + END IF; + + IF vandelay.template_overlay_bib_record( v_marc, eg_id, merge_profile_id) THEN UPDATE vandelay.queued_bib_record SET imported_as = eg_id, import_time = NOW() @@ -581,13 +649,11 @@ $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$ DECLARE queued_record vandelay.queued_bib_record%ROWTYPE; - success BOOL; BEGIN FOR queued_record IN SELECT * FROM vandelay.queued_bib_record WHERE queue = queue_id AND import_time IS NULL LOOP - success := vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id ); - IF success THEN + IF vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id ) THEN RETURN NEXT queued_record.id; END IF; @@ -1226,13 +1292,11 @@ $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$ DECLARE queued_record vandelay.queued_authority_record%ROWTYPE; - success BOOL; BEGIN FOR queued_record IN SELECT * FROM vandelay.queued_authority_record WHERE queue = queue_id AND import_time IS NULL LOOP - success := vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id ); - IF success THEN + IF vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id ) THEN RETURN NEXT queued_record.id; END IF; diff --git a/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql b/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql new file mode 100644 index 0000000000..dbd14e2ea2 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql @@ -0,0 +1,290 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0261'); --miker + +-- Teach add/delete field/subfield stored procs about data matching + +CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT ) RETURNS TEXT AS $_$ + + use MARC::Record; + use MARC::File::XML; + + my $target_xml = shift; + my $source_xml = shift; + my $field_spec = shift; + + my $target_r = MARC::Record->new_from_xml( $target_xml ); + my $source_r = MARC::Record->new_from_xml( $source_xml ); + + return $target_xml unless ($target_r && $source_r); + + my @field_list = split(',', $field_spec); + + my %fields; + for my $f (@field_list) { + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } + } + } + + for my $f ( keys %fields) { + if ( @{$fields{$f}} ) { + for my $from_field ($source_r->field( $f )) { + for my $to_field ($target_r->field( $f )) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf})); + } + my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}}; + $to_field->add_subfields( @new_sf ); + } + } + } else { + my @new_fields = map { $_->clone } $source_r->field( $f ); + $target_r->insert_fields_ordered( @new_fields ); + } + } + + $target_xml = $target_r->as_xml_record; + $target_xml =~ s/^<\?.+?\?>$//mo; + $target_xml =~ s/\n//sgo; + $target_xml =~ s/>\s+new_from_xml( $xml ); + + return $xml unless ($r); + + my $field_spec = shift; + my @field_list = split(',', $field_spec); + + my %fields; + for my $f (@field_list) { + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } + } + } + + for my $f ( keys %fields) { + for my $to_field ($r->field( $f )) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf})); + } + + if ( @{$fields{$f}} ) { + $to_field->delete_subfield(code => $fields{$f}{sf}); + } else { + $r->delete_field( $to_field ); + } + } + } + + $xml = $r->as_xml_record; + $xml =~ s/^<\?.+?\?>$//mo; + $xml =~ s/\n//sgo; + $xml =~ s/>\s+ '' AND dyn_profile.preserve_rule <> '' THEN + -- RAISE NOTICE 'both replace [%] and preserve [%] specified', dyn_profile.replace_rule, dyn_profile.preserve_rule; + RETURN FALSE; + END IF; + + IF dyn_profile.replace_rule <> '' THEN + source_marc = v_marc; + target_marc = eg_marc; + replace_rule = dyn_profile.replace_rule; + ELSE + source_marc = eg_marc; + target_marc = v_marc; + replace_rule = dyn_profile.preserve_rule; + END IF; + + UPDATE biblio.record_entry + SET marc = vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule ) + WHERE id = eg_id; + + IF NOT FOUND THEN + -- RAISE NOTICE 'update of biblio.record_entry failed'; + RETURN FALSE; + END IF; + + RETURN TRUE; + +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT) RETURNS BOOL AS $$ + SELECT vandelay.template_overlay_bib_record( $1, $2, NULL); +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$ +DECLARE + merge_profile vandelay.merge_profile%ROWTYPE; + dyn_profile vandelay.compile_profile%ROWTYPE; + editor_string TEXT; + editor_id INT; + source_marc TEXT; + target_marc TEXT; + eg_marc TEXT; + v_marc TEXT; + replace_rule TEXT; + match_count INT; +BEGIN + + SELECT q.marc INTO v_marc + FROM vandelay.queued_record q + JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id) + LIMIT 1; + + IF v_marc IS NULL THEN + -- RAISE NOTICE 'no marc for vandelay or bib record'; + RETURN FALSE; + END IF; + + IF vandelay.template_overlay_bib_record( v_marc, eg_id, merge_profile_id) THEN + UPDATE vandelay.queued_bib_record + SET imported_as = eg_id, + import_time = NOW() + WHERE id = import_id; + + editor_string := (oils_xpath('//*[@tag="905"]/*[@code="u"]/text()',v_marc))[1]; + + IF editor_string IS NOT NULL AND editor_string <> '' THEN + SELECT usr INTO editor_id FROM actor.card WHERE barcode = editor_string; + + IF editor_id IS NULL THEN + SELECT id INTO editor_id FROM actor.usr WHERE usrname = editor_string; + END IF; + + IF editor_id IS NOT NULL THEN + UPDATE biblio.record_entry SET editor = editor_id WHERE id = eg_id; + END IF; + END IF; + + RETURN TRUE; + END IF; + + -- RAISE NOTICE 'update of biblio.record_entry failed'; + + RETURN FALSE; + +END; +$$ LANGUAGE PLPGSQL; + +-- Minor cleanup of overlay test logic +CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$ +DECLARE + queued_record vandelay.queued_bib_record%ROWTYPE; +BEGIN + + FOR queued_record IN SELECT * FROM vandelay.queued_bib_record WHERE queue = queue_id AND import_time IS NULL LOOP + + IF vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id ) THEN + RETURN NEXT queued_record.id; + END IF; + + END LOOP; + + RETURN; + +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$ +DECLARE + queued_record vandelay.queued_authority_record%ROWTYPE; +BEGIN + + FOR queued_record IN SELECT * FROM vandelay.queued_authority_record WHERE queue = queue_id AND import_time IS NULL LOOP + + IF vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id ) THEN + RETURN NEXT queued_record.id; + END IF; + + END LOOP; + + RETURN; + +END; +$$ LANGUAGE PLPGSQL; + +COMMIT; + -- 2.43.2