3 SELECT evergreen.upgrade_deps_block_check('1157', :eg_version);
5 CREATE OR REPLACE FUNCTION vandelay.replace_field
6 (target_xml TEXT, source_xml TEXT, field TEXT) RETURNS TEXT AS $_$
11 use MARC::File::XML (BinaryEncoding => 'UTF-8');
14 MARC::Charset->assume_unicode(1);
16 my $target_xml = shift;
17 my $source_xml = shift;
18 my $field_spec = shift;
20 my $target_r = MARC::Record->new_from_xml($target_xml);
21 my $source_r = MARC::Record->new_from_xml($source_xml);
23 return $target_xml unless $target_r && $source_r;
25 # Extract the field_spec components into MARC tags, subfields,
26 # and regex matches. Copied wholesale from vandelay.strip_field()
28 my @field_list = split(',', $field_spec);
30 for my $f (@field_list) {
31 $f =~ s/^\s*//; $f =~ s/\s*$//;
32 if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
38 $match =~ s/^\s*//; $match =~ s/\s*$//;
39 $fields{$field} = { sf => [ split('', $sf) ] };
41 my ($msf,$mre) = split('~', $match);
42 if (length($msf) > 0 and length($mre) > 0) {
43 $msf =~ s/^\s*//; $msf =~ s/\s*$//;
44 $mre =~ s/^\s*//; $mre =~ s/\s*$//;
45 $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
51 # Returns a flat list of subfield (code, value, code, value, ...)
52 # suitable for adding to a MARC::Field.
53 sub generate_replacement_subfields {
54 my ($source_field, $target_field, @controlled_subfields) = @_;
56 # Performing a wholesale field replacment.
57 # Use the entire source field as-is.
58 return map {$_->[0], $_->[1]} $source_field->subfields
59 unless @controlled_subfields;
63 # Iterate over all target field subfields:
64 # 1. Keep uncontrolled subfields as is.
65 # 2. Replace values for controlled subfields when a
66 # replacement value exists on the source record.
67 # 3. Delete values for controlled subfields when no
68 # replacement value exists on the source record.
70 for my $target_sf ($target_field->subfields) {
71 my $subfield = $target_sf->[0];
72 my $target_val = $target_sf->[1];
74 if (grep {$_ eq $subfield} @controlled_subfields) {
75 if (my $source_val = $source_field->subfield($subfield)) {
76 # We have a replacement value
77 push(@new_subfields, $subfield, $source_val);
79 # no replacement value for controlled subfield, drop it.
82 # Field is not controlled. Copy it over as-is.
83 push(@new_subfields, $subfield, $target_val);
87 # Iterate over all subfields in the source field and back-fill
88 # any values that exist only in the source field. Insert these
89 # subfields in the same relative position they exist in the
93 for my $source_sf ($source_field->subfields) {
94 my $subfield = $source_sf->[0];
95 my $source_val = $source_sf->[1];
96 push(@seen_subfields, $subfield);
98 # target field already contains this subfield,
99 # so it would have been addressed above.
100 next if $target_field->subfield($subfield);
102 # Ignore uncontrolled subfields.
103 next unless grep {$_ eq $subfield} @controlled_subfields;
105 # Adding a new subfield. Find its relative position and add
106 # it to the list under construction. Work backwards from
107 # the list of already seen subfields to find the best slot.
110 for my $seen_sf (reverse(@seen_subfields)) {
111 my $idx = @new_subfields;
112 for my $new_sf (reverse(@new_subfields)) {
114 next if $idx % 2 == 1; # sf codes are in the even slots
116 if ($new_subfields[$idx] eq $seen_sf) {
117 splice(@new_subfields, $idx + 2, 0, $subfield, $source_val);
125 # if no slot was found, add to the end of the list.
126 push(@new_subfields, $subfield, $source_val) unless $done;
129 return @new_subfields;
133 for my $f (keys %fields) {
135 for my $target_field ($target_r->field($f)) {
137 # field spec contains a regex for this field. Confirm field on
138 # target record matches the specified regex before replacing.
139 if (exists($fields{$f}{match})) {
140 next unless (grep { $_ =~ $fields{$f}{match}{re} }
141 $target_field->subfield($fields{$f}{match}{sf}));
145 my @controlled_subfields = @{$fields{$f}{sf}};
147 # If the target record has multiple matching bib fields,
148 # replace them from matching fields on the source record
149 # in a predictable order to avoid replacing with them with
150 # same source field repeatedly.
151 my @source_fields = $source_r->field($f);
152 my $source_field = $source_fields[++$tag_idx];
154 if (!$source_field && @controlled_subfields) {
155 # When there are more target fields than source fields
156 # and we are replacing values for subfields and not
157 # performing wholesale field replacment, use the last
158 # available source field as the input for all remaining
160 $source_field = $source_fields[$#source_fields];
163 if (!$source_field) {
164 # No source field exists. Delete all affected target
165 # data. This is a little bit counterintuitive, but is
166 # backwards compatible with the previous version of this
167 # function which first deleted all affected data, then
168 # replaced values where possible.
169 if (@controlled_subfields) {
170 $target_field->delete_subfield($_) for @controlled_subfields;
172 $target_r->delete_field($target_field);
177 my @new_subfields = generate_replacement_subfields(
178 $source_field, $target_field, @controlled_subfields);
180 # Build the replacement field from scratch.
181 my $replacement_field = MARC::Field->new(
183 $target_field->indicator(1),
184 $target_field->indicator(2),
188 $target_field->replace_with($replacement_field);
192 $target_xml = $target_r->as_xml_record;
193 $target_xml =~ s/^<\?.+?\?>$//mo;
194 $target_xml =~ s/\n//sgo;
195 $target_xml =~ s/>\s+</></sgo;
199 $_$ LANGUAGE PLPERLU;