1 package MARC::File::SAX;
3 ## no POD here since you don't really want to use this module
4 ## directly. Look at MARC::File::XML instead.
6 ## MARC::File::SAX is a SAX handler for parsing XML encoded using the
7 ## MARC21slim XML schema from the Library of Congress. It builds a MARC::Record
8 ## object up from SAX events.
10 ## For more details see: http://www.loc.gov/standards/marcxml/
14 use base qw( XML::SAX::Base );
19 my $charset = MARC::Charset->new();
23 my ( $self, $element ) = @_;
24 my $name = $element->{ Name };
25 if ( $name eq 'leader' ) {
26 $self->{ tag } = 'LDR';
27 } elsif ( $name eq 'controlfield' ) {
28 $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
29 } elsif ( $name eq 'datafield' ) {
30 $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
31 $self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
32 $self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
33 } elsif ( $name eq 'subfield' ) {
34 $self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
39 my ( $self, $element ) = @_;
40 my $name = $element->{ Name };
41 if ( $name eq 'subfield' ) {
42 push( @{ $self->{ subfields } }, $self->{ subcode }, save_space_in_utf8($self->{ chars }) );
43 $self->{ chars } = '';
44 $self->{ subcode } = '';
45 } elsif ( $name eq 'controlfield' ) {
46 $self->{ record }->append_fields(
49 save_space_in_utf8($self->{ chars })
52 $self->{ chars } = '';
54 } elsif ( $name eq 'datafield' ) {
55 $self->{ record }->append_fields(
60 @{ $self->{ subfields } }
66 $self->{ subfields } = [];
67 $self->{ chars } = '';
68 } elsif ( $name eq 'leader' ) {
70 my $ldr = $self->{ chars };
71 $_is_unicode++ if (substr($ldr,9,1) eq 'a');
73 $self->{ record }->leader( save_space_in_utf8($ldr) );
74 $self->{ chars } = '';
80 sub save_space_in_utf8 {
83 while ($string =~ /(\s*)(\S*)(\s*)/gcsmo) {
84 $output .= $1 . Encode::encode('latin1',$charset->to_marc8($2)) . $3;# if ($_is_unicode);
85 #$output .= $1 . $2 . $3 unless ($_is_unicode);
91 my ( $self, $chars ) = @_;
92 if ( $self->{ subcode } or ( $self->{ tag } and
93 ( $self->{ tag } eq 'LDR' or $self->{ tag } < 10 ) ) ) {
94 $self->{ chars } .= $chars->{ Data };