]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/extras/marcdumper/marcdumper.c
We now set nodeTab[x] to NULL to prevent libxml2 on AMD/64 from dying.
[Evergreen.git] / Open-ILS / src / extras / marcdumper / marcdumper.c
1 /*
2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
4 *
5 * $Id$
6 */
7
8 #if HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <libxml/parser.h>
13 #include <libxml/tree.h>
14
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <assert.h>
23
24 #if HAVE_LOCALE_H
25 #include <locale.h>
26 #endif
27 #if HAVE_LANGINFO_H
28 #include <langinfo.h>
29 #endif
30
31 #include <yaz/marcdisp.h>
32 #include <yaz/yaz-util.h>
33 #include <yaz/xmalloc.h>
34 #include <yaz/options.h>
35
36 #ifndef SEEK_SET
37 #define SEEK_SET 0
38 #endif
39 #ifndef SEEK_END
40 #define SEEK_END 2
41 #endif
42
43 #include <fcntl.h>
44
45 char* clean_marc_xpath  = "//*[@tag=\"999\"]";
46 char* holdings_xpath            = "/*/*[(local-name()='datafield' and "
47                                                                         "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
48
49 void prune_doc( xmlDocPtr doc, char* xpath );
50 char* _xml_to_string( xmlDocPtr doc );
51
52 static void usage(const char *prog) {
53         fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
54
55
56 int main (int argc, char **argv) {
57         int counter = 0;
58
59         int r;
60         int libxml_dom_test = 0;
61         int print_offset = 0;
62         char *arg;
63         int verbose = 0;
64         FILE *inf;
65         char buf[100001];
66         char *prog = *argv;
67         int no = 0;
68         int xml = 0;
69         FILE *cfile = 0;
70         char *from = 0, *to = 0;
71         int num = 1;
72         
73         #if HAVE_LOCALE_H
74         setlocale(LC_CTYPE, "");
75         #endif
76         #if HAVE_LANGINFO_H
77         #ifdef CODESET
78         to = nl_langinfo(CODESET);
79         #endif
80         #endif
81         
82         char* prune = NULL;
83         while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
84                         
85                 int count;
86                 no++;
87
88                 switch (r) {
89                         case 'r':
90                                 prune = arg;
91                                 xmlKeepBlanksDefault(0);
92                                 break;
93                         case 'f':
94                                 from = arg;
95                                 break;
96                         case 't':
97                                 to = arg;
98                                 break;
99                         case 'c':
100                                 if (cfile)
101                                 fclose (cfile);
102                                 cfile = fopen (arg, "w");
103                         break;
104                                 case 'x':
105                                 xml = YAZ_MARC_SIMPLEXML;
106                                 break;
107                         case 'O':
108                                 xml = YAZ_MARC_OAIMARC;
109                                 break;
110                         case 'e': /* not supported on older versions of yaz */
111                                 xml = YAZ_MARC_XCHANGE;
112                                 break;
113                         case 'X':
114                                 xml = YAZ_MARC_MARCXML;
115                                 break;
116                         case 'I':
117                                 xml = YAZ_MARC_ISO2709;
118                                 break;
119                         case 'p':
120                                 print_offset = 1;
121                                 break;
122                         case '2':
123                                 libxml_dom_test = 1;
124                                 break;
125                         case 0:
126
127                                 inf = fopen (arg, "rb");
128                                 count = 0;
129                                 if (!inf) {
130                                         fprintf (stderr, "%s: cannot open %s:%s\n",
131                                         prog, arg, strerror (errno));
132                                         exit(1);
133                                 }
134                                 if (cfile)
135                                         fprintf (cfile, "char *marc_records[] = {\n");
136
137                                 if (1) {
138                                         yaz_marc_t mt = yaz_marc_create();
139                                         yaz_iconv_t cd = 0;
140                         
141                                         if (from && to) {
142                                                 cd = yaz_iconv_open(to, from);
143                                                 if (!cd) {
144                                                         fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
145                                                         exit(2);
146                                                 }
147                                                 yaz_marc_iconv(mt, cd);
148                                         }
149                                         yaz_marc_xml(mt, xml);
150                                         yaz_marc_debug(mt, verbose);
151
152                                         while (1) {
153                                                 counter++;
154                                                 int len;
155                                                 char *result;
156                                                 int rlen;
157                                                 
158                                                 r = fread (buf, 1, 5, inf);
159
160                                                 if (r < 5) {
161                                                         if (r && print_offset)
162                                                                 printf ("Extra %d bytes", r);
163                                                         break;
164                                                 }
165
166                                                 if (print_offset) {
167                                                         long off = ftell(inf);
168                                                         printf ("Record %d offset %ld\n", num, (long) off);
169                                                 }
170
171                                                 len = atoi_n(buf, 5);
172
173                                                 if (len < 25 || len > 100000) break;
174
175                                                 len = len - 5;
176                                                 r = fread (buf + 5, 1, len, inf);
177                 
178                                                 if (r < len) break;
179
180                                                 r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
181                 
182                                                 if (r <= 0) break;
183                 
184
185
186                                                 if(!prune) {
187
188                                                         fwrite (result, rlen, 1, stdout);
189
190                                                 } else {
191
192
193                                                         xmlDocPtr doc = xmlParseMemory(result, rlen);
194
195                                                         if (doc) {
196                                                                 prune_doc( doc, prune );
197                                                                 char* marc = _xml_to_string(doc);
198                                                                 fprintf(stdout, "%s", marc);
199
200                                                                 free(marc);
201                                                                 xmlFreeDoc(doc);
202
203                                                         } else {
204
205                                                                 fprintf(stderr, "xmLParseMemory failed for record %d\n", counter);
206                                                         }
207
208                                                 }
209
210
211                                                 if (cfile) {
212                                 
213                                                         char *p = buf;
214                                                         int i;
215                                                         if (count)
216                                                                 fprintf (cfile, ",");
217                                                         fprintf (cfile, "\n");
218                                                         for (i = 0; i < r; i++) {
219                                                                 if ((i & 15) == 0)
220                                                                         fprintf (cfile, "  \"");
221                                                                 fprintf (cfile, "\\x%02X", p[i] & 255);
222                                         
223                                                                 if (i < r - 1 && (i & 15) == 15)
224                                                                         fprintf (cfile, "\"\n");
225                                         
226                                                         }
227                                                         fprintf (cfile, "\"\n");
228                                                 }
229                                                 num++;
230                                         }
231                                 
232                                         count++;
233                 
234                                         if (cd)
235                                                 yaz_iconv_close(cd);
236                                         yaz_marc_destroy(mt);
237                                 }
238
239
240                                 if (cfile)
241                                         fprintf (cfile, "};\n");
242                                 fclose(inf);
243                                 break;
244                         case 'v':
245                                 verbose++;
246                                 break;
247                         default:
248                                 usage(prog);
249                                 exit (1);
250                 }
251         }
252
253         if (cfile)
254                 fclose (cfile);
255         if (!no) {
256                 usage(prog);
257                 exit (1);
258         }
259
260         fprintf(stderr, "\nProcessed %d Records\n", counter - 1 );
261         exit (0);
262 }
263
264
265 void prune_doc( xmlDocPtr doc, char* xpath ) {
266
267         xmlXPathContextPtr xpathctx;
268         xmlXPathObjectPtr object;
269
270         xpathctx = xmlXPathNewContext(doc);
271         if(xpathctx == NULL) {
272                 fprintf(stderr, "XPATH FAILED");
273                 return;
274         }
275
276         object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
277         if(object == NULL) return;
278
279         int i;
280         int size = object->nodesetval->nodeNr;
281         for(i=0; i!= size; i++ ) {
282                 xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
283                 xmlUnlinkNode( cur_node );
284                 xmlFreeNode( cur_node );
285                 object->nodesetval->nodeTab[i] = NULL;
286         }
287
288         xmlXPathFreeObject(object);
289    xmlXPathFreeContext(xpathctx);       
290
291         /* remove all comments and PI nodes */
292         xmlNodePtr cur = doc->children;
293         while(cur) {
294                 if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
295                         xmlUnlinkNode( cur );
296                         xmlFreeNode( cur );
297                 }
298                 cur = cur->next;
299         }
300
301
302 }
303
304 char* _xml_to_string( xmlDocPtr doc ) {
305         
306         int                     bufsize;
307         xmlChar*                xmlbuf;
308         xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
309
310         char* xml = strdup(xmlbuf);
311         xmlFree(xmlbuf);
312
313         /*** remove the XML declaration */
314         int len = strlen(xml);
315         char tmp[len];
316         memset( tmp, 0, len );
317         int i;
318         int found_at = 0;
319                                                 
320         /* when we reach the first >, take everything after it */
321         for( i = 0; i!= len; i++ ) {
322                 if( xml[i] == 62) { /* ascii > */
323         
324                         /* found_at holds the starting index of the rest of the doc*/
325                         found_at = i + 1; 
326                         break;
327                 }
328         }
329
330         if( found_at ) {
331
332                 /* move the shortened doc into the tmp buffer */
333                 strncpy( tmp, xml + found_at, len - found_at );
334                 /* move the tmp buffer back into the allocated space */
335                 memset( xml, 0, len );
336                 strcpy( xml, tmp );
337         }
338
339         int l = strlen(xml)-1;
340         if( xml[l] == 10 || xml[l] == 13 )
341                 xml[l] = '\0';
342
343         return xml;
344
345 }