]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/extras/marcdumper/marcdumper.c
the marcdumper! takes command line params
[OpenSRF.git] / src / extras / marcdumper / marcdumper.c
1 /*
2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
4 *
5 * $Id$
6 */
7
8 #if HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <libxml/parser.h>
13 #include <libxml/tree.h>
14
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <assert.h>
23
24 #if HAVE_LOCALE_H
25 #include <locale.h>
26 #endif
27 #if HAVE_LANGINFO_H
28 #include <langinfo.h>
29 #endif
30
31 #include <yaz/marcdisp.h>
32 #include <yaz/yaz-util.h>
33 #include <yaz/xmalloc.h>
34 #include <yaz/options.h>
35
36 #ifndef SEEK_SET
37 #define SEEK_SET 0
38 #endif
39 #ifndef SEEK_END
40 #define SEEK_END 2
41 #endif
42
43 #include <fcntl.h>
44
45 char* clean_marc_xpath  = "//*[@tag=\"999\"]";
46 char* holdings_xpath            = "/*/*[(local-name()='datafield' and "
47                                                                         "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
48
49 void prune_doc( xmlDocPtr doc, char* xpath );
50 char* _xml_to_string( xmlDocPtr doc );
51
52 static void usage(const char *prog) {
53         fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
54
55
56 int main (int argc, char **argv) {
57         int counter = 0;
58
59         int r;
60         int libxml_dom_test = 0;
61         int print_offset = 0;
62         char *arg;
63         int verbose = 0;
64         FILE *inf;
65         char buf[100001];
66         char *prog = *argv;
67         int no = 0;
68         int xml = 0;
69         FILE *cfile = 0;
70         char *from = 0, *to = 0;
71         int num = 1;
72         
73         #if HAVE_LOCALE_H
74         setlocale(LC_CTYPE, "");
75         #endif
76         #if HAVE_LANGINFO_H
77         #ifdef CODESET
78         to = nl_langinfo(CODESET);
79         #endif
80         #endif
81         
82         char* prune = NULL;
83         while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
84                         
85                 int count;
86                 no++;
87
88                 switch (r) {
89                         case 'r':
90                                 prune = arg;
91                                 xmlKeepBlanksDefault(0);
92                                 break;
93                         case 'f':
94                                 from = arg;
95                                 break;
96                         case 't':
97                                 to = arg;
98                                 break;
99                         case 'c':
100                                 if (cfile)
101                                 fclose (cfile);
102                                 cfile = fopen (arg, "w");
103                         break;
104                                 case 'x':
105                                 xml = YAZ_MARC_SIMPLEXML;
106                                 break;
107                         case 'O':
108                                 xml = YAZ_MARC_OAIMARC;
109                                 break;
110                         case 'e': /* not supported on older versions of yaz */
111                                 xml = YAZ_MARC_XCHANGE;
112                                 break;
113                         case 'X':
114                                 xml = YAZ_MARC_MARCXML;
115                                 break;
116                         case 'I':
117                                 xml = YAZ_MARC_ISO2709;
118                                 break;
119                         case 'p':
120                                 print_offset = 1;
121                                 break;
122                         case '2':
123                                 libxml_dom_test = 1;
124                                 break;
125                         case 0:
126
127                                 inf = fopen (arg, "rb");
128                                 count = 0;
129                                 if (!inf) {
130                                         fprintf (stderr, "%s: cannot open %s:%s\n",
131                                         prog, arg, strerror (errno));
132                                         exit(1);
133                                 }
134                                 if (cfile)
135                                         fprintf (cfile, "char *marc_records[] = {\n");
136
137                                 if (1) {
138                                         yaz_marc_t mt = yaz_marc_create();
139                                         yaz_iconv_t cd = 0;
140                         
141                                         if (from && to) {
142                                                 cd = yaz_iconv_open(to, from);
143                                                 if (!cd) {
144                                                         fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
145                                                         exit(2);
146                                                 }
147                                                 yaz_marc_iconv(mt, cd);
148                                         }
149                                         yaz_marc_xml(mt, xml);
150                                         yaz_marc_debug(mt, verbose);
151
152                                         while (1) {
153                                                 counter++;
154                                                 int len;
155                                                 char *result;
156                                                 int rlen;
157                                                 
158                                                 r = fread (buf, 1, 5, inf);
159
160                                                 if (r < 5) {
161                                                         if (r && print_offset)
162                                                                 printf ("Extra %d bytes", r);
163                                                         break;
164                                                 }
165
166                                                 if (print_offset) {
167                                                         long off = ftell(inf);
168                                                         printf ("Record %d offset %ld\n", num, (long) off);
169                                                 }
170
171                                                 len = atoi_n(buf, 5);
172
173                                                 if (len < 25 || len > 100000) break;
174
175                                                 len = len - 5;
176                                                 r = fread (buf + 5, 1, len, inf);
177                 
178                                                 if (r < len) break;
179
180                                                 r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
181                 
182                                                 if (r <= 0) break;
183                 
184
185
186                                                 if(!prune) {
187
188                                                         fwrite (result, rlen, 1, stdout);
189
190                                                 } else {
191
192
193                                                         xmlDocPtr doc = xmlParseMemory(result, rlen);
194
195                                                         if (!doc) {
196                                                                 fprintf(stderr, "xmLParseMemory failed\n");
197                                                                 continue;
198                                                         }
199
200                                                         //      xmlDocPtr doc_copy = xmlCopyDoc( doc, 1 );
201                                                         //char* holdings_expr = "/*/*[(local-name()='datafield' and "
202                                                         //      "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
203
204                                                         //char* marc_expr = "//*[@tag=\"999\"]";
205
206                                                         prune_doc( doc, prune );
207                                                         //prune_doc( doc_copy, holdings_expr );
208
209                                                         char* marc = _xml_to_string(doc);
210                                                         //char* holdings = _xml_to_string(doc_copy);
211
212                                                         fprintf(stdout, "%s", marc);
213                                                         //fprintf(stderr, "%s", holdings);
214
215                                                         free(marc);
216                                                         //free(holdings);
217                                                         xmlFreeDoc(doc);
218                                                         //xmlFreeDoc(doc_copy);
219
220                                                 }
221
222
223                                                 if (cfile) {
224                                 
225                                                         char *p = buf;
226                                                         int i;
227                                                         if (count)
228                                                                 fprintf (cfile, ",");
229                                                         fprintf (cfile, "\n");
230                                                         for (i = 0; i < r; i++) {
231                                                                 if ((i & 15) == 0)
232                                                                         fprintf (cfile, "  \"");
233                                                                 fprintf (cfile, "\\x%02X", p[i] & 255);
234                                         
235                                                                 if (i < r - 1 && (i & 15) == 15)
236                                                                         fprintf (cfile, "\"\n");
237                                         
238                                                         }
239                                                         fprintf (cfile, "\"\n");
240                                                 }
241                                                 num++;
242                                         }
243                                 
244                                         count++;
245                 
246                                         if (cd)
247                                                 yaz_iconv_close(cd);
248                                         yaz_marc_destroy(mt);
249                                 }
250
251
252                                 if (cfile)
253                                         fprintf (cfile, "};\n");
254                                 fclose(inf);
255                                 break;
256                         case 'v':
257                                 verbose++;
258                                 break;
259                         default:
260                                 usage(prog);
261                                 exit (1);
262                 }
263         }
264
265         if (cfile)
266                 fclose (cfile);
267         if (!no) {
268                 usage(prog);
269                 exit (1);
270         }
271
272         fprintf(stderr, "\nProcessed %d Records\n", counter - 1 );
273         exit (0);
274 }
275
276
277 void prune_doc( xmlDocPtr doc, char* xpath ) {
278
279         xmlXPathContextPtr xpathctx;
280         xmlXPathObjectPtr object;
281
282         xpathctx = xmlXPathNewContext(doc);
283         if(xpathctx == NULL) {
284                 fprintf(stderr, "XPATH FAILED");
285                 return;
286         }
287
288         object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
289         if(object == NULL) return;
290
291         int i;
292         int size = object->nodesetval->nodeNr;
293         for(i=0; i!= size; i++ ) {
294                 xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
295                 xmlUnlinkNode( cur_node );
296                 xmlFreeNode( cur_node );
297         }
298
299         /* remove all comments and PI nodes */
300         xmlNodePtr cur = doc->children;
301         while(cur) {
302                 if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
303                         xmlUnlinkNode( cur );
304                         xmlFreeNode( cur );
305                 }
306                 cur = cur->next;
307         }
308
309         xmlXPathFreeObject(object);
310    xmlXPathFreeContext(xpathctx);       
311
312 }
313
314 char* _xml_to_string( xmlDocPtr doc ) {
315         
316         int                     bufsize;
317         xmlChar*                xmlbuf;
318         xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
319
320         char* xml = strdup(xmlbuf);
321         xmlFree(xmlbuf);
322
323         /*** remove the XML declaration */
324         int len = strlen(xml);
325         char tmp[len];
326         memset( tmp, 0, len );
327         int i;
328         int found_at = 0;
329                                                 
330         /* when we reach the first >, take everything after it */
331         for( i = 0; i!= len; i++ ) {
332                 if( xml[i] == 62) { /* ascii > */
333         
334                         /* found_at holds the starting index of the rest of the doc*/
335                         found_at = i + 1; 
336                         break;
337                 }
338         }
339
340         if( found_at ) {
341
342                 /* move the shortened doc into the tmp buffer */
343                 strncpy( tmp, xml + found_at, len - found_at );
344                 /* move the tmp buffer back into the allocated space */
345                 memset( xml, 0, len );
346                 strcpy( xml, tmp );
347         }
348
349         int l = strlen(xml)-1;
350         if( xml[l] == 10 || xml[l] == 13 )
351                 xml[l] = '\0';
352
353         return xml;
354
355 }