5 use MARC::File::XML (BinaryEncoding => 'UTF-8');
10 my ($out_enc, $in_enc, $filter) = ('UTF8','MARC8');
11 GetOptions('r=s' => \$filter, 'f=s' => \$in_enc, 't=s' => \$out_enc);
12 die("Please specify a filter with -r!\n") unless ($filter);
14 my $batch = MARC::Batch->new( 'USMARC', @ARGV );
17 my $parser = new XML::LibXML;
20 my $current_file = $ARGV[0];
22 print STDERR "\nWorking on file $current_file ";
24 my $marc = $batch->next;
27 my ($next,$xml,$doc,@nodes);
30 $xml = $marc->as_xml();
32 print STDERR "\n ARG! I couldn't parse the MARC record (number $counter): $@\n";
39 $doc = $parser->parse_string($xml);
41 print STDERR "\n ARG! I couldn't turn the MARC record into MARCXML (number $counter): $@\n";
48 @nodes = $doc->documentElement->findnodes($filter);
50 print STDERR "\n ARG! I couldn't prune the MARCXML record (number $counter): $@\n";
57 $n->parentNode->removeChild($n);
60 my $string = $doc->toStringC14N;
61 $string =~ s/\n/ /gso;
62 $string =~ s/\t/ /gso;
63 $string =~ s/>\s+</></gso;
67 unless ($counter % 1000) {
68 if ($current_file ne $batch->filename) {
69 $current_file = $batch->filename;
70 print STDERR "\nWorking on file $current_file ";
78 print STDERR "\n ARG! I couldn't parse the MARC record (number $counter): $@\n";