marc dumper, added a command line param for specifying xpath to
authorerickson <erickson@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Mon, 28 Feb 2005 15:27:39 +0000 (15:27 +0000)
committererickson <erickson@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Mon, 28 Feb 2005 15:27:39 +0000 (15:27 +0000)
remove from the doc.

git-svn-id: svn://svn.open-ils.org/ILS/trunk@140 dcc99617-32d9-48b4-a31d-7c20da2025e4

Open-ILS/src/extras/marcdumper/Makefile [new file with mode: 0644]
Open-ILS/src/extras/marcdumper/marcdumper.c [new file with mode: 0644]

diff --git a/Open-ILS/src/extras/marcdumper/Makefile b/Open-ILS/src/extras/marcdumper/Makefile
new file mode 100644 (file)
index 0000000..c1158be
--- /dev/null
@@ -0,0 +1,17 @@
+#
+# This utility code requires libxml2 and yaz version 2.0.34 or greater.
+#
+
+CC = gcc
+LD = -lxml2 -lyaz 
+COMP = -O2 -I /usr/include/libxml2/ 
+
+all: marcdumper
+
+marcdumper:  marcdumper.c
+       $(CC) marcdumper.c $(COMP) $(LD) -o $@
+
+clean:
+       /bin/rm -f *.o marcdumper
+
+
diff --git a/Open-ILS/src/extras/marcdumper/marcdumper.c b/Open-ILS/src/extras/marcdumper/marcdumper.c
new file mode 100644 (file)
index 0000000..90adf62
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+* Copyright (C) 1995-2005, Index Data ApS
+* See the file LICENSE for details.
+*
+* $Id$
+*/
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#if HAVE_LOCALE_H
+#include <locale.h>
+#endif
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+
+#include <yaz/marcdisp.h>
+#include <yaz/yaz-util.h>
+#include <yaz/xmalloc.h>
+#include <yaz/options.h>
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+#ifndef SEEK_END
+#define SEEK_END 2
+#endif
+
+#include <fcntl.h>
+
+char* clean_marc_xpath = "//*[@tag=\"999\"]";
+char* holdings_xpath           = "/*/*[(local-name()='datafield' and "
+                                                                       "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
+
+void prune_doc( xmlDocPtr doc, char* xpath );
+char* _xml_to_string( xmlDocPtr doc );
+
+static void usage(const char *prog) {
+       fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
+} 
+
+int main (int argc, char **argv) {
+       int counter = 0;
+
+       int r;
+       int libxml_dom_test = 0;
+       int print_offset = 0;
+       char *arg;
+       int verbose = 0;
+       FILE *inf;
+       char buf[100001];
+       char *prog = *argv;
+       int no = 0;
+       int xml = 0;
+       FILE *cfile = 0;
+       char *from = 0, *to = 0;
+       int num = 1;
+       
+       #if HAVE_LOCALE_H
+       setlocale(LC_CTYPE, "");
+       #endif
+       #if HAVE_LANGINFO_H
+       #ifdef CODESET
+       to = nl_langinfo(CODESET);
+       #endif
+       #endif
+       
+       char* prune = NULL;
+       while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
+                       
+               int count;
+               no++;
+
+               switch (r) {
+                       case 'r':
+                               prune = arg;
+                               xmlKeepBlanksDefault(0);
+                               break;
+                       case 'f':
+                               from = arg;
+                               break;
+                       case 't':
+                               to = arg;
+                               break;
+                       case 'c':
+                               if (cfile)
+                               fclose (cfile);
+                               cfile = fopen (arg, "w");
+                       break;
+                               case 'x':
+                               xml = YAZ_MARC_SIMPLEXML;
+                               break;
+                       case 'O':
+                               xml = YAZ_MARC_OAIMARC;
+                               break;
+                       case 'e': /* not supported on older versions of yaz */
+                               xml = YAZ_MARC_XCHANGE;
+                               break;
+                       case 'X':
+                               xml = YAZ_MARC_MARCXML;
+                               break;
+                       case 'I':
+                               xml = YAZ_MARC_ISO2709;
+                               break;
+                       case 'p':
+                               print_offset = 1;
+                               break;
+                       case '2':
+                               libxml_dom_test = 1;
+                               break;
+                       case 0:
+
+                               inf = fopen (arg, "rb");
+                               count = 0;
+                               if (!inf) {
+                                       fprintf (stderr, "%s: cannot open %s:%s\n",
+                                       prog, arg, strerror (errno));
+                                       exit(1);
+                               }
+                               if (cfile)
+                                       fprintf (cfile, "char *marc_records[] = {\n");
+
+                               if (1) {
+                                       yaz_marc_t mt = yaz_marc_create();
+                                       yaz_iconv_t cd = 0;
+                       
+                                       if (from && to) {
+                                               cd = yaz_iconv_open(to, from);
+                                               if (!cd) {
+                                                       fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
+                                                       exit(2);
+                                               }
+                                               yaz_marc_iconv(mt, cd);
+                                       }
+                                       yaz_marc_xml(mt, xml);
+                                       yaz_marc_debug(mt, verbose);
+
+                                       while (1) {
+                                               counter++;
+                                               int len;
+                                               char *result;
+                                               int rlen;
+                                               
+                                               r = fread (buf, 1, 5, inf);
+
+                                               if (r < 5) {
+                                                       if (r && print_offset)
+                                                               printf ("Extra %d bytes", r);
+                                                       break;
+                                               }
+
+                                               if (print_offset) {
+                                                       long off = ftell(inf);
+                                                       printf ("Record %d offset %ld\n", num, (long) off);
+                                               }
+
+                                               len = atoi_n(buf, 5);
+
+                                               if (len < 25 || len > 100000) break;
+
+                                               len = len - 5;
+                                               r = fread (buf + 5, 1, len, inf);
+               
+                                               if (r < len) break;
+
+                                               r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
+               
+                                               if (r <= 0) break;
+               
+
+
+                                               if(!prune) {
+
+                                                       fwrite (result, rlen, 1, stdout);
+
+                                               } else {
+
+
+                                                       xmlDocPtr doc = xmlParseMemory(result, rlen);
+
+                                                       if (!doc) {
+                                                               fprintf(stderr, "xmLParseMemory failed\n");
+                                                               continue;
+                                                       }
+
+                                                       //      xmlDocPtr doc_copy = xmlCopyDoc( doc, 1 );
+                                                       //char* holdings_expr = "/*/*[(local-name()='datafield' and "
+                                                       //      "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
+
+                                                       //char* marc_expr = "//*[@tag=\"999\"]";
+
+                                                       prune_doc( doc, prune );
+                                                       //prune_doc( doc_copy, holdings_expr );
+
+                                                       char* marc = _xml_to_string(doc);
+                                                       //char* holdings = _xml_to_string(doc_copy);
+
+                                                       fprintf(stdout, "%s", marc);
+                                                       //fprintf(stderr, "%s", holdings);
+
+                                                       free(marc);
+                                                       //free(holdings);
+                                                       xmlFreeDoc(doc);
+                                                       //xmlFreeDoc(doc_copy);
+
+                                               }
+
+
+                                               if (cfile) {
+                               
+                                                       char *p = buf;
+                                                       int i;
+                                                       if (count)
+                                                               fprintf (cfile, ",");
+                                                       fprintf (cfile, "\n");
+                                                       for (i = 0; i < r; i++) {
+                                                               if ((i & 15) == 0)
+                                                                       fprintf (cfile, "  \"");
+                                                               fprintf (cfile, "\\x%02X", p[i] & 255);
+                                       
+                                                               if (i < r - 1 && (i & 15) == 15)
+                                                                       fprintf (cfile, "\"\n");
+                                       
+                                                       }
+                                                       fprintf (cfile, "\"\n");
+                                               }
+                                               num++;
+                                       }
+                               
+                                       count++;
+               
+                                       if (cd)
+                                               yaz_iconv_close(cd);
+                                       yaz_marc_destroy(mt);
+                               }
+
+
+                               if (cfile)
+                                       fprintf (cfile, "};\n");
+                               fclose(inf);
+                               break;
+                       case 'v':
+                               verbose++;
+                               break;
+                       default:
+                               usage(prog);
+                               exit (1);
+               }
+       }
+
+       if (cfile)
+               fclose (cfile);
+       if (!no) {
+               usage(prog);
+               exit (1);
+       }
+
+       fprintf(stderr, "\nProcessed %d Records\n", counter - 1 );
+       exit (0);
+}
+
+
+void prune_doc( xmlDocPtr doc, char* xpath ) {
+
+       xmlXPathContextPtr xpathctx;
+       xmlXPathObjectPtr object;
+
+       xpathctx = xmlXPathNewContext(doc);
+       if(xpathctx == NULL) {
+               fprintf(stderr, "XPATH FAILED");
+               return;
+       }
+
+       object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
+       if(object == NULL) return;
+
+       int i;
+       int size = object->nodesetval->nodeNr;
+       for(i=0; i!= size; i++ ) {
+               xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
+               xmlUnlinkNode( cur_node );
+               xmlFreeNode( cur_node );
+       }
+
+       /* remove all comments and PI nodes */
+       xmlNodePtr cur = doc->children;
+       while(cur) {
+               if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
+                       xmlUnlinkNode( cur );
+                       xmlFreeNode( cur );
+               }
+               cur = cur->next;
+       }
+
+       xmlXPathFreeObject(object);
+   xmlXPathFreeContext(xpathctx);      
+
+}
+
+char* _xml_to_string( xmlDocPtr doc ) {
+       
+       int                     bufsize;
+       xmlChar*                xmlbuf;
+       xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
+
+       char* xml = strdup(xmlbuf);
+       xmlFree(xmlbuf);
+
+       /*** remove the XML declaration */
+       int len = strlen(xml);
+       char tmp[len];
+       memset( tmp, 0, len );
+       int i;
+       int found_at = 0;
+                                               
+       /* when we reach the first >, take everything after it */
+       for( i = 0; i!= len; i++ ) {
+               if( xml[i] == 62) { /* ascii > */
+       
+                       /* found_at holds the starting index of the rest of the doc*/
+                       found_at = i + 1; 
+                       break;
+               }
+       }
+
+       if( found_at ) {
+
+               /* move the shortened doc into the tmp buffer */
+               strncpy( tmp, xml + found_at, len - found_at );
+               /* move the tmp buffer back into the allocated space */
+               memset( xml, 0, len );
+               strcpy( xml, tmp );
+       }
+
+       int l = strlen(xml)-1;
+       if( xml[l] == 10 || xml[l] == 13 )
+               xml[l] = '\0';
+
+       return xml;
+
+}