2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
12 #include <libxml/parser.h>
13 #include <libxml/tree.h>
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
31 #include <yaz/marcdisp.h>
32 #include <yaz/yaz-util.h>
33 #include <yaz/xmalloc.h>
34 #include <yaz/options.h>
45 char* clean_marc_xpath = "//*[@tag=\"999\"]";
46 char* holdings_xpath = "/*/*[(local-name()='datafield' and "
47 "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
49 void prune_doc( xmlDocPtr doc, char* xpath );
50 char* _xml_to_string( xmlDocPtr doc );
52 static void usage(const char *prog) {
53 fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
56 int main (int argc, char **argv) {
60 int libxml_dom_test = 0;
70 char *from = 0, *to = 0;
74 setlocale(LC_CTYPE, "");
78 to = nl_langinfo(CODESET);
83 while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
91 xmlKeepBlanksDefault(0);
102 cfile = fopen (arg, "w");
105 xml = YAZ_MARC_SIMPLEXML;
108 xml = YAZ_MARC_OAIMARC;
110 case 'e': /* not supported on older versions of yaz */
111 xml = YAZ_MARC_XCHANGE;
114 xml = YAZ_MARC_MARCXML;
117 xml = YAZ_MARC_ISO2709;
127 inf = fopen (arg, "rb");
130 fprintf (stderr, "%s: cannot open %s:%s\n",
131 prog, arg, strerror (errno));
135 fprintf (cfile, "char *marc_records[] = {\n");
138 yaz_marc_t mt = yaz_marc_create();
142 cd = yaz_iconv_open(to, from);
144 fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
147 yaz_marc_iconv(mt, cd);
149 yaz_marc_xml(mt, xml);
150 yaz_marc_debug(mt, verbose);
157 r = fread (buf, 1, 5, inf);
160 if (r && print_offset)
161 printf ("Extra %d bytes", r);
166 long off = ftell(inf);
167 printf ("Record %d offset %ld\n", num, (long) off);
170 len = atoi_n(buf, 5);
172 if (len < 25 || len > 100000) break;
175 r = fread (buf + 5, 1, len, inf);
179 r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
188 fwrite (result, rlen, 1, stdout);
193 xmlDocPtr doc = xmlParseMemory(result, rlen);
196 prune_doc( doc, prune );
197 char* marc = _xml_to_string(doc);
198 fprintf(stdout, "%s", marc);
205 fprintf(stderr, "xmLParseMemory failed for record %d\n", counter);
216 fprintf (cfile, ",");
217 fprintf (cfile, "\n");
218 for (i = 0; i < r; i++) {
220 fprintf (cfile, " \"");
221 fprintf (cfile, "\\x%02X", p[i] & 255);
223 if (i < r - 1 && (i & 15) == 15)
224 fprintf (cfile, "\"\n");
227 fprintf (cfile, "\"\n");
236 yaz_marc_destroy(mt);
241 fprintf (cfile, "};\n");
260 fprintf(stderr, "\nProcessed %d Records\n", counter );
265 void prune_doc( xmlDocPtr doc, char* xpath ) {
267 xmlXPathContextPtr xpathctx;
268 xmlXPathObjectPtr object;
270 xpathctx = xmlXPathNewContext(doc);
271 if(xpathctx == NULL) {
272 fprintf(stderr, "XPATH FAILED");
276 object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
277 if(object == NULL) return;
280 int size = object->nodesetval->nodeNr;
281 for(i=0; i!= size; i++ ) {
282 xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
283 xmlUnlinkNode( cur_node );
284 xmlFreeNode( cur_node );
285 object->nodesetval->nodeTab[i] = NULL;
288 xmlXPathFreeObject(object);
289 xmlXPathFreeContext(xpathctx);
291 /* remove all comments and PI nodes */
292 xmlNodePtr cur = doc->children;
294 if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
295 xmlUnlinkNode( cur );
304 char* _xml_to_string( xmlDocPtr doc ) {
308 xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
310 char* xml = strdup(xmlbuf);
313 /*** remove the XML declaration */
314 int len = strlen(xml);
316 memset( tmp, 0, len );
320 /* when we reach the first >, take everything after it */
321 for( i = 0; i!= len; i++ ) {
322 if( xml[i] == 62) { /* ascii > */
324 /* found_at holds the starting index of the rest of the doc*/
332 /* move the shortened doc into the tmp buffer */
333 strncpy( tmp, xml + found_at, len - found_at );
334 /* move the tmp buffer back into the allocated space */
335 memset( xml, 0, len );
339 int l = strlen(xml)-1;
340 if( xml[l] == 10 || xml[l] == 13 )