2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
12 #include <libxml/parser.h>
13 #include <libxml/tree.h>
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
31 #include <yaz/marcdisp.h>
32 #include <yaz/yaz-util.h>
33 #include <yaz/xmalloc.h>
34 #include <yaz/options.h>
45 char* clean_marc_xpath = "//*[@tag=\"999\"]";
46 char* holdings_xpath = "/*/*[(local-name()='datafield' and "
47 "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
49 void prune_doc( xmlDocPtr doc, char* xpath );
50 char* _xml_to_string( xmlDocPtr doc );
52 static void usage(const char *prog) {
53 fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
56 int main (int argc, char **argv) {
60 int libxml_dom_test = 0;
70 char *from = 0, *to = 0;
74 setlocale(LC_CTYPE, "");
78 to = nl_langinfo(CODESET);
83 while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
91 xmlKeepBlanksDefault(0);
102 cfile = fopen (arg, "w");
105 xml = YAZ_MARC_SIMPLEXML;
108 xml = YAZ_MARC_OAIMARC;
110 case 'e': /* not supported on older versions of yaz */
111 xml = YAZ_MARC_XCHANGE;
114 xml = YAZ_MARC_MARCXML;
117 xml = YAZ_MARC_ISO2709;
127 inf = fopen (arg, "rb");
130 fprintf (stderr, "%s: cannot open %s:%s\n",
131 prog, arg, strerror (errno));
135 fprintf (cfile, "char *marc_records[] = {\n");
138 yaz_marc_t mt = yaz_marc_create();
142 cd = yaz_iconv_open(to, from);
144 fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
147 yaz_marc_iconv(mt, cd);
149 yaz_marc_xml(mt, xml);
150 yaz_marc_debug(mt, verbose);
158 r = fread (buf, 1, 5, inf);
161 if (r && print_offset)
162 printf ("Extra %d bytes", r);
167 long off = ftell(inf);
168 printf ("Record %d offset %ld\n", num, (long) off);
171 len = atoi_n(buf, 5);
173 if (len < 25 || len > 100000) break;
176 r = fread (buf + 5, 1, len, inf);
180 r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
188 fwrite (result, rlen, 1, stdout);
193 xmlDocPtr doc = xmlParseMemory(result, rlen);
196 fprintf(stderr, "xmLParseMemory failed\n");
200 // xmlDocPtr doc_copy = xmlCopyDoc( doc, 1 );
201 //char* holdings_expr = "/*/*[(local-name()='datafield' and "
202 // "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
204 //char* marc_expr = "//*[@tag=\"999\"]";
206 prune_doc( doc, prune );
207 //prune_doc( doc_copy, holdings_expr );
209 char* marc = _xml_to_string(doc);
210 //char* holdings = _xml_to_string(doc_copy);
212 fprintf(stdout, "%s", marc);
213 //fprintf(stderr, "%s", holdings);
218 //xmlFreeDoc(doc_copy);
228 fprintf (cfile, ",");
229 fprintf (cfile, "\n");
230 for (i = 0; i < r; i++) {
232 fprintf (cfile, " \"");
233 fprintf (cfile, "\\x%02X", p[i] & 255);
235 if (i < r - 1 && (i & 15) == 15)
236 fprintf (cfile, "\"\n");
239 fprintf (cfile, "\"\n");
248 yaz_marc_destroy(mt);
253 fprintf (cfile, "};\n");
272 fprintf(stderr, "\nProcessed %d Records\n", counter - 1 );
277 void prune_doc( xmlDocPtr doc, char* xpath ) {
279 xmlXPathContextPtr xpathctx;
280 xmlXPathObjectPtr object;
282 xpathctx = xmlXPathNewContext(doc);
283 if(xpathctx == NULL) {
284 fprintf(stderr, "XPATH FAILED");
288 object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
289 if(object == NULL) return;
292 int size = object->nodesetval->nodeNr;
293 for(i=0; i!= size; i++ ) {
294 xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
295 xmlUnlinkNode( cur_node );
296 xmlFreeNode( cur_node );
299 /* remove all comments and PI nodes */
300 xmlNodePtr cur = doc->children;
302 if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
303 xmlUnlinkNode( cur );
309 xmlXPathFreeObject(object);
310 xmlXPathFreeContext(xpathctx);
314 char* _xml_to_string( xmlDocPtr doc ) {
318 xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
320 char* xml = strdup(xmlbuf);
323 /*** remove the XML declaration */
324 int len = strlen(xml);
326 memset( tmp, 0, len );
330 /* when we reach the first >, take everything after it */
331 for( i = 0; i!= len; i++ ) {
332 if( xml[i] == 62) { /* ascii > */
334 /* found_at holds the starting index of the rest of the doc*/
342 /* move the shortened doc into the tmp buffer */
343 strncpy( tmp, xml + found_at, len - found_at );
344 /* move the tmp buffer back into the allocated space */
345 memset( xml, 0, len );
349 int l = strlen(xml)-1;
350 if( xml[l] == 10 || xml[l] == 13 )