LP1054322 - libparent-perl not needed for Ubuntu Precise
[working/Evergreen.git] / Open-ILS / src / extras / marc2html
1 #!/usr/bin/perl
2
3 use Error qw/:try/;
4 use MARC::Batch;
5 use MARC::File::XML (BinaryEncoding => 'UTF-8');
6 use XML::LibXSLT;
7 use XML::LibXML;
8 use Unicode::Normalize;
9 use Getopt::Long;
10 use FileHandle;
11
12 my ($split,$enc,$marc,$out,$bad) = (100);
13 GetOptions(
14         'split=i' => \$split,
15         'marc=s'  => \$marc,
16         'encoding=s'  => \$enc,
17         'out_dir=s'  => \$out,
18         'bad=s'  => \$bad,
19 );
20
21 if ($enc) {
22         MARC::Charset->ignore_errors(1);
23         MARC::Charset->assume_encoding($enc);
24 }
25
26 die "gimme some marc!\n" unless $marc;
27 die "gimme somewhere to put it!\n" unless $out;
28
29 my $xsl = join('',(<DATA>));
30
31 my $parser = XML::LibXML->new();
32 my $xslt = XML::LibXSLT->new();
33
34 $stylesheet = $xslt->parse_stylesheet( $parser->parse_string($xsl) );
35
36 $bad = new FileHandle( $bad => '>:raw' ) if ($bad);
37
38 my $xml = '';
39 my $current = 1;
40 my $prev = 0;
41 my $next = 2;
42
43 my $marc = MARC::Batch->new( USMARC => $marc );
44 $marc->strict_off;
45 $marc->warnings_off;
46
47 while (my $r = $marc->next) {
48         my $rxml = entityize(MARC::File::XML::record($r));
49         $rxml =~ s/[\x00-\x1f]//go;
50
51         try { $doc = $parser->parse_string($rxml); }
52         catch Error with {
53                 my $e = shift;
54                 warn "arg ... bad record $current, skipping: $e\n";
55                 $current++;
56                 print $bad $r->as_usmarc if ($bad);
57                 $r = undef;
58         };
59         next unless ($r);
60
61         $xml .= $rxml;
62
63         unless ($current % $split) {
64                 $xml = <<"              XML";
65                         <collection xmlns="http://www.loc.gov/MARC21/slim">
66                                 $xml
67                         </collection>
68                 XML
69
70                 my $doc;
71                 try { $doc = $parser->parse_string($xml); }
72                 catch Error with { my $e = shift; warn "ARG! Doc failed to parse:\n$e\n-------------------------------------------\n$xml\n"; };
73                 die unless $doc;
74
75                 $xml = '';
76
77                 my $results = $stylesheet->transform($doc, prev => "'$prev'", next => "'$next'");
78                 $prev++;
79                 $next++;
80
81                 open OUT, ">$out/$prev.html";
82                 print OUT $results->toString;
83                 close OUT;
84         }
85         $current++;
86 }
87
88 my $doc = $parser->parse_string(<<XML);
89 <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim">
90         $xml
91 </marc:collection>
92 XML
93
94 my $results = $stylesheet->transform($doc, prev => "'$prev'", next => "'0'");
95 $prev++;
96
97 $stylesheet->output_file($results, "$out/$prev.html");
98
99
100 sub entityize {
101         my $stuff = shift;
102         my $form = shift; 
103         
104         if ($form eq 'D') {
105                 $stuff = NFD($stuff);
106         } else {
107                 $stuff = NFC($stuff);
108         }
109         
110         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
111         $stuff =~ s/([\x00-\x19])//sgoe;
112         return $stuff;
113 }
114
115
116
117 __DATA__
118 <?xml version="1.0" encoding="UTF-8"?>
119 <xsl:stylesheet version="1.0" xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
120         <xsl:output method="html"/>
121         
122         <xsl:template match="/">
123                 <html>
124                         <head>
125
126                                 <style>
127
128                                         .marc_table {}
129                                         .marc_tag_row {}
130                                         .marc_tag_data {}
131                                         .marc_tag_col {}
132                                         .marc_tag_ind {}
133                                         .marc_subfields {}
134                                         .marc_subfield_code { 
135                                                 color: blue; 
136                                                 padding-left: 5px;
137                                                 padding-right: 5px; 
138                                         }
139
140                                 </style>
141
142                                 <link href='/css/opac_marc.css' rel='stylesheet' type='text/css'></link>
143                         </head>
144                         <body>
145                                 <xsl:if test="$prev &gt; 0">
146                                         <a>
147                                                 <xsl:attribute name="href">
148                                                         <xsl:value-of select="concat( $prev, '.html')"/>
149                                                 </xsl:attribute>
150                                                 <xsl:text>Previous page</xsl:text>
151                                         </a>
152                                 </xsl:if>
153                                 <span> | </span>
154                                 <xsl:if test="$next &gt; 0">
155                                         <a>
156                                                 <xsl:attribute name="href">
157                                                         <xsl:value-of select="concat( $next, '.html')"/>
158                                                 </xsl:attribute>
159                                                 <xsl:text>Next page</xsl:text>
160                                         </a>
161                                 </xsl:if>
162                                 <hr/>
163                                 <xsl:apply-templates select="//marc:record"/>
164                                 <xsl:if test="$prev &gt; 0">
165                                         <a>
166                                                 <xsl:attribute name="href">
167                                                         <xsl:value-of select="concat( $prev, '.html')"/>
168                                                 </xsl:attribute>
169                                                 <xsl:text>Previous page</xsl:text>
170                                         </a>
171                                 </xsl:if>
172                                 <span> | </span>
173                                 <xsl:if test="$next &gt; 0">
174                                         <a>
175                                                 <xsl:attribute name="href">
176                                                         <xsl:value-of select="concat( $next, '.html')"/>
177                                                 </xsl:attribute>
178                                                 <xsl:text>Next page</xsl:text>
179                                         </a>
180                                 </xsl:if>
181                         </body>
182                 </html>
183         </xsl:template>
184         
185         <xsl:template match="marc:record">
186                 <table class='marc_table'>
187                         <tr class='marc_tag_row'>
188                                 <th class='marc_tag_col' NOWRAP="TRUE" ALIGN="RIGHT" VALIGN="middle">
189                                         LDR
190                                 </th>
191                                 <td class='marc_tag_data' COLSPAN='3'>
192                                         <xsl:value-of select="marc:leader"/>
193                                 </td>
194                         </tr>
195                         <xsl:apply-templates select="marc:datafield|marc:controlfield"/>
196                 </table>
197                 <hr/>
198         </xsl:template>
199         
200         <xsl:template match="marc:controlfield">
201                 <tr class='marc_tag_row'>
202                         <th class='marc_tag_col' NOWRAP="TRUE" ALIGN="RIGHT" VALIGN="middle">
203                                 <xsl:value-of select="@tag"/>
204                         </th>
205                         <td class='marc_tag_data' COLSPAN='3'>
206                                 <xsl:value-of select="."/>
207                         </td>
208                 </tr>
209         </xsl:template>
210         
211         <xsl:template match="marc:datafield">
212                 <tr class='marc_tag_row'>
213                         <th class='marc_tag_col' NOWRAP="TRUE" ALIGN="RIGHT" VALIGN="middle">
214                                 <xsl:value-of select="@tag"/>
215                         </th>
216                         <td class='marc_tag_ind'>
217                                 <xsl:value-of select="@ind1"/>
218                         </td>
219
220                         <td class='marc_tag_ind' style='border-left: 1px solid #A0A0A0; padding-left: 3px;'>
221                                 <xsl:value-of select="@ind2"/>
222                                 <span style='color:#FFF'>.</span> 
223                         </td>
224
225                         <td class='marc_subfields'>
226                                 <xsl:apply-templates select="marc:subfield"/>
227                         </td>
228                 </tr>
229         </xsl:template>
230         
231         <xsl:template match="marc:subfield">
232                 <span class='marc_subfield_code' > 
233                         &#8225;<xsl:value-of select="@code"/>
234                 </span><xsl:value-of select="."/>       
235         </xsl:template>
236
237 </xsl:stylesheet>
238