From a1b904ec123615ecfdddd3d8d616a711d70fce06 Mon Sep 17 00:00:00 2001 From: miker Date: Thu, 27 Jul 2006 07:09:59 +0000 Subject: [PATCH] importer parts git-svn-id: svn://svn.open-ils.org/ILS/trunk@5117 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../extras/import/create-batgirl-usermap.pl | 18 + .../src/extras/import/drain-batgirl-bill.pl | 37 ++ .../src/extras/import/drain-batgirl-charge.pl | 37 ++ .../src/extras/import/drain-batgirl-cn.pl | 37 ++ .../src/extras/import/drain-batgirl-hold.pl | 46 +++ .../extras/import/drain-batgirl-intransit.pl | 37 ++ .../src/extras/import/drain-batgirl-item.pl | 39 +++ Evergreen/src/extras/import/lib_spliter.pl | 2 +- Evergreen/src/extras/import/load_legacy.sql | 171 ++++++++++ .../src/extras/import/parse_patron_xml.pl | 319 ++++++++++++++++++ .../src/extras/import/piece_count_loader.pl | 20 ++ Open-ILS/src/extras/import/direct_ingest.pl | 30 +- Open-ILS/src/extras/import/marc2are.pl | 135 ++++++++ .../perlmods/OpenILS/Application/Ingest.pm | 4 +- 14 files changed, 916 insertions(+), 16 deletions(-) create mode 100755 Evergreen/src/extras/import/create-batgirl-usermap.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-bill.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-charge.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-cn.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-hold.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-intransit.pl create mode 100755 Evergreen/src/extras/import/drain-batgirl-item.pl create mode 100644 Evergreen/src/extras/import/load_legacy.sql create mode 100755 Evergreen/src/extras/import/parse_patron_xml.pl create mode 100755 Evergreen/src/extras/import/piece_count_loader.pl create mode 100755 Open-ILS/src/extras/import/marc2are.pl diff --git a/Evergreen/src/extras/import/create-batgirl-usermap.pl b/Evergreen/src/extras/import/create-batgirl-usermap.pl new file mode 100755 index 0000000000..c2706ba8ab --- /dev/null +++ b/Evergreen/src/extras/import/create-batgirl-usermap.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +warn "going for the data..."; + +my $sth = $dbh->prepare('select USER_BARCODE,USER_KEY from USER'); +$sth->execute; + +warn "got it, writing file..."; + +while (my $data = $sth->fetchrow_arrayref) { + print join('|', @$data) . "\n"; +} + diff --git a/Evergreen/src/extras/import/drain-batgirl-bill.pl b/Evergreen/src/extras/import/drain-batgirl-bill.pl new file mode 100755 index 0000000000..a5cb3a8890 --- /dev/null +++ b/Evergreen/src/extras/import/drain-batgirl-bill.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare('select * from BILL'); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{uc($_)} } qw/bill_amount balance bill_date cat_key call_key item_key user_key paid reason library bill_key1 bill_key2/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print "\\.\n"; + + diff --git a/Evergreen/src/extras/import/drain-batgirl-charge.pl b/Evergreen/src/extras/import/drain-batgirl-charge.pl new file mode 100755 index 0000000000..1cdfc1f60e --- /dev/null +++ b/Evergreen/src/extras/import/drain-batgirl-charge.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare('select * from CHARGE'); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{uc($_)} } qw/charge_date due_date renewal_date charge_key1 charge_key2 charge_key3 charge_key4 user_key overdue library claim_return_date/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print "\\.\n"; + + diff --git a/Evergreen/src/extras/import/drain-batgirl-cn.pl b/Evergreen/src/extras/import/drain-batgirl-cn.pl new file mode 100755 index 0000000000..2d3f632068 --- /dev/null +++ b/Evergreen/src/extras/import/drain-batgirl-cn.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare('select * from CALLNUM'); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{$_} } qw/CALL_NUM CAT_KEY CALL_KEY SHADOW/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print "\\.\n"; + + diff --git a/Evergreen/src/extras/import/drain-batgirl-hold.pl b/Evergreen/src/extras/import/drain-batgirl-hold.pl new file mode 100755 index 0000000000..4249fb827f --- /dev/null +++ b/Evergreen/src/extras/import/drain-batgirl-hold.pl @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare("select * from HOLD where STATUS = 'ACTIVE'"); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{uc($_)} } qw/available status notified num_of_notices cat_key call_key item_key hold_key user_key hold_date hold_range pickup_lib placing_lib owning_lib inactive_date inactive_reason hold_level/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print <connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare("select CAT_KEY, CALL_KEY, ITEM_KEY, HOLD_KEY, DESTINATION_LIB, OWNING_LIB, STARTING_LIB, concat(substring(TRANSIT_DATE,1,8),'T',substring(TRANSIT_DATE,9,4)) AS TRANSIT_DATE, TRANSIT_REASON from INTRANSIT"); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{uc($_)} } qw/destination_lib owning_lib starting_lib transit_date transit_reason cat_key call_key item_key hold_key/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print "\\.\n"; + + diff --git a/Evergreen/src/extras/import/drain-batgirl-item.pl b/Evergreen/src/extras/import/drain-batgirl-item.pl new file mode 100755 index 0000000000..8cf7dc7d96 --- /dev/null +++ b/Evergreen/src/extras/import/drain-batgirl-item.pl @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +use strict; +use DBI; + +my $dbh = DBI->connect('DBI:mysql:database=reports;host=batgirl.gsu.edu','miker','poopie'); + +print <prepare('select * from ITEM'); +$sth->execute; + +warn "got it, writing file..."; + +while (my $cn = $sth->fetchrow_hashref) { + my @data = map { $$cn{$_} } qw/CAT_1 CREATION_DATE CAT_2 CURRENT_LOCATION ITEM_ID CAT_KEY CALL_KEY ITEM_KEY PRICE ITEM_TYPE OWNING_LIBRARY SHADOW ITEM_COMMENT LAST_IMPORT_DATE HOME_LOCATION/; + for (@data) { + if (defined($_)) { + s/\\/\\\\/go; + s/\t/ /go; + } else { + $_ = '\N'; + } + } + print join("\t", @data) . "\n"; +} + +print "\\.\n"; + + diff --git a/Evergreen/src/extras/import/lib_spliter.pl b/Evergreen/src/extras/import/lib_spliter.pl index bace05723f..186c8dbe01 100755 --- a/Evergreen/src/extras/import/lib_spliter.pl +++ b/Evergreen/src/extras/import/lib_spliter.pl @@ -23,7 +23,7 @@ while () { push @{ $libs{$sys_pol}{libs} }, {name => $lib, shortname => $policy, type => 3 }; } -my $dbh = DBI->connect("dbi:Pg:host=localhost;dbname=$ARGV[0]",'postgres'); +my $dbh = DBI->connect("dbi:Pg:host=fw4;port=5432;dbname=$ARGV[0]",'postgres'); $dbh->begin_work; diff --git a/Evergreen/src/extras/import/load_legacy.sql b/Evergreen/src/extras/import/load_legacy.sql new file mode 100644 index 0000000000..61875f1e4e --- /dev/null +++ b/Evergreen/src/extras/import/load_legacy.sql @@ -0,0 +1,171 @@ +DROP TABLE joined_legacy; +DROP TABLE legacy_copy_status_map; + +BEGIN; + +-- Build the status map ... by hand, which suxorz. +CREATE TABLE legacy_copy_status_map (id int, name text); +COPY legacy_copy_status_map FROM STDIN; +0 ADULT +0 AV +0 AWARDBOOKS +0 BHDESK +2 BINDERY +0 BIOGRAPHY +0 BOOKMOBILE +0 BROWSING +11 CATALOGING +1 CHECKEDOUT +0 DATALOAD +13 DISCARD +0 DISPLAY +0 EASY +0 EASY-RDR +0 FANTASY +0 FIC +0 FIXLIB +0 FOREIGNL +0 GA-CIRC +0 GENEALOGY +0 GEORGIA +0 GOV-DOC +8 HOLDS +10 ILL +10 ILS-ILL +0 INDEX +5 INPROCESS +6 INTRANSIT +0 JUV +0 LEASE +0 LG-PRNT +0 LIB-CLOSED +0 LITERACY +0 LONGOVRDUE +3 LOST +3 LOST-PAID +0 MAG +0 MAPS +4 MISSING +0 MYSTERY +0 NEW-BKS +0 NEWS +0 NONFIC +0 OFFICE +9 ON-ORDER +10 ONLOAN +0 OVERSIZED +0 PBK +0 PICTURE +0 REF +11 REPAIR +0 RESERVES +7 RESHELVING +0 ROTATING +0 SCIFI +0 SHORTSTORY +0 SPEC-COL-R +0 SPEC-COLL +0 SPECNEEDS +0 STACKS +0 STATELIBGA +0 STORAGE +0 THRILLER +0 TODDLER +0 UNAVAILBLE +0 UNKNOWN +0 VRTICLFILE +0 WEBSITE +0 WESTERN +0 YA +\. + + +-- First, we build shelving location +INSERT INTO asset.copy_location (name, owning_lib) + SELECT DISTINCT l.home_location, ou.id + FROM legacy_item l JOIN actor.org_unit ou + ON (l.owning_library = ou.shortname); + + +-- Now set their flags +UPDATE asset.copy_location + SET holdable = FALSE + WHERE name IN ('BINDERY','DISCARD','GENEALOGY','GOV-DOC','INDEX', + 'LIB-CLOSED','LONGOVERDUE','LOST','LOST-PAID','MAG', + 'NEWS','ONLOAN','REF','REPAIR','SPEC-COL-R'); + +UPDATE asset.copy_location + SET opac_visible = FALSE + WHERE name IN ('DATALOA','DISCARD','FIXLIB','LIB-CLOSED', 'LONGOVERDUE', + 'LOST','LOST-PAID','STORAGE', 'UNKNOWN'); + + +-- Now the old stat-cat stuff +INSERT INTO asset.stat_cat (owner, name) VALUES (1, 'Legacy CAT1'); +INSERT INTO asset.stat_cat_entry (stat_cat, owner, value) + SELECT DISTINCT currval('asset.stat_cat_id_seq'::regclass), 1, cat_1 + FROM legacy_item; + +INSERT INTO asset.stat_cat (owner, name) VALUES (1, 'Legacy CAT2'); +INSERT INTO asset.stat_cat_entry (stat_cat, owner, value) + SELECT DISTINCT currval('asset.stat_cat_id_seq'::regclass), 1, cat_2 + FROM legacy_item; + + +-- Create a temp table to speed up CN and copy inserts +CREATE TEMP TABLE joined_legacy AS + SELECT i.*, c.call_num + FROM legacy_item i + JOIN legacy_callnum c USING (cat_key,call_key); + +-- Import the call numbers +-- Getting the owning lib from the first available copy on the CN +INSERT INTO asset.call_number (creator,editor,record,label,owning_lib) + SELECT DISTINCT 1, 1, l.cat_key , l.call_num, ou.id + FROM joined_legacy l + JOIN biblio.record_entry b ON (cat_key = b.id) + JOIN actor.org_unit ou ON (l.owning_library = ou.shortname); + + + +-- Import base copy data +-- CREATE TEMP TABLE legacy_copy_list AS +INSERT INTO asset.copy (circ_lib,creator,editor,create_date,barcode,status,location,loan_duration,fine_level,opac_visible,price,circ_modifier,call_number) + SELECT DISTINCT ou.id AS circ_lib, + 1 AS creator, + 1 AS editor, + l.creation_date AS create_date, + l.item_id AS barcode, + s_map.id AS status, + cl.id AS location, + 2 AS loan_duration, + 2 AS fine_level, + CASE WHEN l.shadow IS TRUE THEN FALSE ELSE TRUE END AS opac_visible, + (l.price/100::numeric)::numeric(8,2) AS price, + l.item_type AS circ_modifier, + cn.id AS call_number, + pc.cnt || " pieces" as alert_message + FROM joined_legacy l + JOIN legacy_copy_status_map s_map + ON (s_map.name = l.current_location) + JOIN actor.org_unit ou + ON (l.owning_library = ou.shortname) + JOIN asset.copy_location cl + ON (ou.id = cl.owning_lib AND l.home_location = cl.name) + JOIN asset.call_number cn + ON (ou.id = cn.owning_lib AND l.cat_key = cn.record AND l.call_num = cn.label) + LEFT JOIN legacy_piece_count pc ON (pc.barcode = l.item_id); + +-- Move copy notes into the notes table ... non-public +INSERT INTO asset.copy_note (owning_copy,creator,title,value) + SELECT cp.id, + 1, + 'Legacy Note', + l.item_comment + FROM legacy_item l + JOIN asset.copy cp ON (cp.barcode = l.item_id) + WHERE l.item_comment IS NOT NULL + AND l.item_comment <> ''; + +COMMIT; + diff --git a/Evergreen/src/extras/import/parse_patron_xml.pl b/Evergreen/src/extras/import/parse_patron_xml.pl new file mode 100755 index 0000000000..54f589ec05 --- /dev/null +++ b/Evergreen/src/extras/import/parse_patron_xml.pl @@ -0,0 +1,319 @@ +#!/usr/bin/perl +use strict; +use warnings; + +use lib '/openils/lib/perl5'; + +use OpenSRF::System; +use OpenSRF::EX qw/:try/; +use OpenSRF::AppSession; +use OpenSRF::Utils::SettingsClient; +use OpenILS::Utils::Fieldmapper; +use Digest::MD5 qw/md5_hex/; +use Getopt::Long; +use JSON; +use DateTime; +use Time::HiRes qw/time/; +use XML::LibXML; + +my ($file,$config,$profileid,$identtypeid,$default_profile,$profile_map,$usermap) = + ('return_file_0623-2.xml', '/openils/conf/bootstrap.conf', 1, 1, 1, 'profile.map'); + +GetOptions( + 'usermap=s' => \$usermap, + 'file=s' => \$file, + 'config=s' => \$config, + 'default_profile=i' => \$default_profile, + 'profile_map=s' => \$profile_map, + 'profile_statcat_id=i' => \$profileid, + 'identtypeid=i' => \$identtypeid, +); + +my %u_map; +if ($usermap) { + open F, $usermap; + while (my $line = ) { + chomp($line); + my ($b,$i) = split(/\|/, $line); + $b =~ s/^\s*(\S+)\s*$/$1/o; + $i =~ s/^\s*(\S+)\s*$/$1/o; + $u_map{$b} = $i; + } + close F; +} + +my %p_map; +if ($profile_map) { + open F, $profile_map; + while (my $line = ) { + chomp($line); + my ($b,$i) = split(/\|/, $line); + $b =~ s/^\s*(\S+)\s*$/$1/o; + $i =~ s/^\s*(\S+)\s*$/$1/o; + $p_map{$b} = $i; + } + close F; +} + +my $doc = XML::LibXML->new->parse_file($file); + +OpenSRF::System->bootstrap_client( config_file => $config ); +Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); + +my $cstore = OpenSRF::AppSession->create( 'open-ils.cstore' ); + +my $profiles = $cstore->request( + 'open-ils.cstore.direct.permission.grp_tree.search.atomic', + { id => { '!=' => undef } }, +)->gather(1); + +my $orgs = $cstore->request( + 'open-ils.cstore.direct.actor.org_unit.search.atomic', + { id => { '!=' => undef } }, +)->gather(1); + +$profiles = { map { ($_->name => $_->id) } @$profiles }; +$orgs = { map { ($_->shortname => $_->id) } @$orgs }; + +my $starttime = time; +my $count = 1; +for my $patron ( $doc->documentElement->childNodes ) { + my $p = new Fieldmapper::actor::user; + my $card = new Fieldmapper::actor::card; + my $profile_sce = new Fieldmapper::actor::stat_cat_entry_user_map; + + my $old_profile = $patron->findvalue( 'user_profile' ); + + my $bc = $patron->findvalue( 'user_id' ); + + next unless $bc; + + my $uid; + if (keys %u_map) { + $uid = $u_map{$bc}; + unless ($uid) { + $count++; + next; + } + } else { + $uid = $count; + } + + unless ($uid > 1) { + $count++; + next; + } + + $card->barcode( $bc ); + $card->usr( $uid ); + $card->active( 't' ); + + $p->id( $uid ); + $p->usrname( $bc ); + $p->passwd( $patron->findvalue( 'user_pin' ) ); + + my $new_profile = $p_map{$old_profile}; + unless ($new_profile) { + $count++; + next; + } + + $p->profile( $$profiles{$new_profile} || $default_profile ); + + # some defaults + $p->standing(1); + $p->active('t'); + $p->master_account('f'); + $p->super_user('f'); + $p->usrgroup($uid); + $p->claims_returned_count(0); + $p->credit_forward_balance(0); + $p->last_xact_id('IMPORT-'.$starttime); + + $p->barred('f'); + $p->barred('t') if ( $patron->findvalue( 'user_status' ) eq 'BARRED' ); + + $p->ident_type( $identtypeid ); + my $id_val = $patron->findvalue( 'user_altid' ); + $p->ident_value( $id_val ) if ($id_val); + + $p->first_given_name( $patron->findvalue( 'firstname' ) ); + $p->second_given_name( $patron->findvalue( 'middle_name' ) ); + $p->family_name( $patron->findvalue( 'last_name' ) ); + + $p->day_phone( $patron->findvalue( 'Address/dayphone' ) ); + $p->evening_phone( $patron->findvalue( 'Address/homephone' ) ); + $p->other_phone( $patron->findvalue( 'Address/workphone' ) ); + $p->email( $patron->findvalue( 'email' ) ); + + my $hlib = $$orgs{$patron->findvalue( 'user_library' )}; + unless ($hlib) { + $count++; + next; + } + $p->home_ou( $hlib ); + + $p->dob( parse_date( $patron->findvalue( 'birthdate' ) ) ); + $p->create_date( parse_date( $patron->findvalue( 'user_priv_granted' ) ) ); + $p->expire_date( parse_date( $patron->findvalue( 'user_priv_expires' ) ) ); + + $p->alert_message("Legacy Import Message: old profile was FIXME") + if ($old_profile eq 'FIXME'); + + my $net_access = 1; + $net_access = 2 if ($old_profile =~ /^U.I/o); + $net_access = 3 if ($old_profile =~ /^X.I/o); + + $p->net_access_level( $net_access ); + + $profile_sce->target_usr( $uid ); + $profile_sce->stat_cat( $profileid ); + $profile_sce->stat_cat_entry( $old_profile ); + + my @addresses; + my $mailing_addr_id = $patron->findvalue( 'user_mailingaddr' ); + + for my $addr ( $patron->findnodes( "Address" ) ) { + my $prefix = 'coa_'; + + my $line1 = $addr->findvalue( "${prefix}line1" ); + $prefix = 'std_' if (!$line1); + + $line1 = $addr->findvalue( "${prefix}line1" ); + next unless ($line1); + + my $a = new Fieldmapper::actor::user_address; + $a->usr( $uid ); + $a->street1( $line1 ); + $a->street2( $addr->findvalue( "${prefix}line2" ) ); + $a->city( $addr->findvalue( "${prefix}city" ) ); + $a->state( $addr->findvalue( "${prefix}state" ) ); + $a->post_code( + $addr->findvalue( "${prefix}zip" ) . + '-' . $addr->findvalue( "${prefix}zip4" ) + ); + + $a->valid( 'f' ); + $a->valid( 't' ) if ($prefix eq 'std_'); + + $a->within_city_limits( 'f' ); + $a->country('USA'); + + if ($addr->getAttribute('addr_type') == $mailing_addr_id) { + $a->address_type( 'LEGACY MAILING' ); + } else { + $a->address_type( 'LEGACY' ); + } + + push @addresses, $a; + + if ($prefix eq 'coa_') { + $prefix = 'std_'; + + $line1 = $addr->findvalue( "${prefix}line1" ); + next unless ($line1); + + $a = new Fieldmapper::actor::user_address; + $a->usr( $uid ); + $a->street1( $line1 ); + $a->street2( $addr->findvalue( "${prefix}line2" ) ); + $a->city( $addr->findvalue( "${prefix}city" ) ); + $a->state( $addr->findvalue( "${prefix}state" ) ); + $a->post_code( + $addr->findvalue( "${prefix}zip" ) . + '-' . $addr->findvalue( "${prefix}zip4" ) + ); + + $a->valid( 'f' ); + $a->valid( 't' ) if ($prefix eq 'std_'); + + $a->within_city_limits( 'f' ); + $a->country('USA'); + + $a->address_type( 'LEGACY' ); + + push @addresses, $a; + } + } + + my @notes; + for my $note_field ( qw#note comment voter bus_school Address/phone1 Address/phone2# ) { + for my $note ( $patron->findnodes( $note_field) ) { + my $a = new Fieldmapper::actor::usr_note; + + $a->creator(1); + $a->usr( $uid ); + $a->title( "Legacy ".$note->localName ); + $a->value( $note->textContent ); + $a->pub( 'f' ); + push @notes, $a; + } + } + + print STDERR "\r$count ".$count/(time - $starttime) unless ($count % 100); + print JSON->perl2JSON( $_ )."\n" for ($p,$card,$profile_sce,@addresses,@notes); + + $count++; +} + +print STDERR "\n"; + + +sub parse_date { + my $string = shift; + my $group = shift; + + my ($y,$m,$d); + + if ($string eq 'NEVER') { + my (undef,undef,undef,$d,$m,$y) = localtime(); + return sprintf('%04d-%02d-%02d', $y + 1920, $m + 1, $d); + } elsif (length($string) == 8 && $string =~ /^(\d{4})(\d{2})(\d{2})$/o) { + ($y,$m,$d) = ($1,$2,$3); + } elsif ($string =~ /(\d+)\D(\d+)\D(\d+)/o) { #looks like it's parsable + if ( length($3) > 2 ) { # looks like mm.dd.yyyy + if ( $1 < 99 && $2 < 99 && $1 > 0 && $2 > 0 && $3 > 0) { + if ($1 > 12 && $1 < 31 && $2 < 13) { # well, actually it looks like dd.mm.yyyy + ($y,$m,$d) = ($3,$2,$1); + } elsif ($2 > 12 && $2 < 31 && $1 < 13) { + ($y,$m,$d) = ($3,$1,$2); + } + } + } elsif ( length($1) > 3 ) { # format probably yyyy.mm.dd + if ( $3 < 99 && $2 < 99 && $1 > 0 && $2 > 0 && $3 > 0) { + if ($2 > 12 && $2 < 32 && $3 < 13) { # well, actually it looks like yyyy.dd.mm -- why, I don't konw + ($y,$m,$d) = ($1,$3,$2); + } elsif ($3 > 12 && $3 < 31 && $2 < 13) { + ($y,$m,$d) = ($1,$2,$3); + } + } + } elsif ( $1 < 99 && $2 < 99 && $3 < 99 && $1 > 0 && $2 > 0 && $3 > 0) { + if ($3 < 7) { # probably 2000 or greater, mm.dd.yy + my $y = $3 + 2000; + if ($1 > 12 && $1 < 32 && $2 < 13) { # well, actually it looks like dd.mm.yyyy + ($m,$d) = ($2,$1); + } elsif ($2 > 12 && $2 < 32 && $1 < 13) { + ($m,$d) = ($1,$2); + } + } else { # probably before 2000, mm.dd.yy + my $y = $3 + 1900; + if ($1 > 12 && $1 < 32 && $2 < 13) { # well, actually it looks like dd.mm.yyyy + ($m,$d) = ($2,$1); + } elsif ($2 > 12 && $2 < 32 && $1 < 13) { + ($m,$d) = ($1,$2); + } + } + } + } + + my $date; + if ($y && $m && $d) { + try { + $date = sprintf('%04d-%02d-%-2d',$y, $m, $d) + if (new DateTime ( year => $y, month => $m, day => $d )); + } otherwise {}; + } + + return $date; +} + diff --git a/Evergreen/src/extras/import/piece_count_loader.pl b/Evergreen/src/extras/import/piece_count_loader.pl new file mode 100755 index 0000000000..1fa545bcf2 --- /dev/null +++ b/Evergreen/src/extras/import/piece_count_loader.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +print <) { + chomp; + my ($bc,$c) = split '\|'; + $bc =~ s/\s*$//o; + print "$bc\t$c\n" if ($c > 1); +} + +print '\.'."\n"; +print "CREATE INDEX pc_bc_idx ON legacy_piece_count (barcode);\n"; + diff --git a/Open-ILS/src/extras/import/direct_ingest.pl b/Open-ILS/src/extras/import/direct_ingest.pl index dff72abdea..6fcae1ad1b 100755 --- a/Open-ILS/src/extras/import/direct_ingest.pl +++ b/Open-ILS/src/extras/import/direct_ingest.pl @@ -26,13 +26,14 @@ use MARC::Charset; MARC::Charset->ignore_errors(1); -my ($workers, $config, $prefix) = - (1, '/openils/conf/bootstrap.conf', 'marc-out-'); +my ($auth, $workers, $config, $prefix) = + (0, 1, '/openils/conf/bootstrap.conf', 'marc-out-'); GetOptions( 'threads=i' => \$workers, 'config=s' => \$config, 'prefix=s' => \$prefix, + 'authority' => \$auth, ); my @ses; @@ -78,15 +79,18 @@ sub worker { my $f = new FileHandle(">$fname"); + my $meth = 'open-ils.ingest.full.biblio.object.readonly'; + $meth = 'open-ils.ingest.full.authority.object.readonly' if ($auth); + + $meth = OpenILS::Application::Ingest->method_lookup( $meth ); + while (my $rec = <$pipe>) { my $bib = JSON->JSON2perl($rec); my $data; try { - ($data) = OpenILS::Application::Ingest - ->method_lookup( 'open-ils.ingest.full.biblio.object.readonly' ) - ->run( $bib ); + ($data) = $meth->run( $bib ); } catch Error with { my $e = shift; warn "Couldn't process record: $e\n >>> $rec\n"; @@ -124,17 +128,19 @@ sub postprocess { my $f = shift; my $bib = $data->{bib}; - my $field_entries = $data->{worm_data}->{field_entries}; + my $field_entries = $data->{worm_data}->{field_entries} unless ($auth); my $full_rec = $data->{worm_data}->{full_rec}; - my $fp = $data->{worm_data}->{fingerprint}; - my $rd = $data->{worm_data}->{descriptor}; + my $fp = $data->{worm_data}->{fingerprint} unless ($auth); + my $rd = $data->{worm_data}->{descriptor} unless ($auth); - $bib->fingerprint( $fp->{fingerprint} ); - $bib->quality( $fp->{quality} ); + $bib->fingerprint( $fp->{fingerprint} ) unless ($auth); + $bib->quality( $fp->{quality} ) unless ($auth); $f->printflush( JSON->perl2JSON($bib)."\n" ); - $f->printflush( JSON->perl2JSON($rd)."\n" ); - $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$field_entries); + unless ($auth) { + $f->printflush( JSON->perl2JSON($rd)."\n" ); + $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$field_entries); + } $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$full_rec); } diff --git a/Open-ILS/src/extras/import/marc2are.pl b/Open-ILS/src/extras/import/marc2are.pl new file mode 100755 index 0000000000..1b627d9a6b --- /dev/null +++ b/Open-ILS/src/extras/import/marc2are.pl @@ -0,0 +1,135 @@ +#!/usr/bin/perl +use strict; +use warnings; + +use lib '/openils/lib/perl5/'; + +use OpenSRF::System; +use OpenSRF::Application; +use OpenSRF::EX qw/:try/; +use OpenSRF::AppSession; +use OpenSRF::MultiSession; +use OpenSRF::Utils::SettingsClient; +use OpenILS::Application::AppUtils; +use OpenILS::Utils::Fieldmapper; +use Digest::MD5 qw/md5_hex/; +use JSON; +use Data::Dumper; +use Unicode::Normalize; + +use Time::HiRes qw/time/; +use Getopt::Long; +use MARC::Batch; +use MARC::File::XML; +use MARC::Charset; +use UNIVERSAL::require; + +MARC::Charset->ignore_errors(1); + +my ($utf8, $id_field, $count, $user, $password, $config, $keyfile, @files, @trash_fields) = + (0, '998', 1, 'admin', 'open-ils', '/openils/conf/bootstrap.conf'); + +GetOptions( + 'startid=i' => \$count, + 'user=s' => \$user, + 'password=s' => \$password, + 'config=s' => \$config, + 'file=s' => \@files, +); + +@files = @ARGV if (!@files); + +my @ses; +my @req; +my %processing_cache; + +OpenSRF::System->bootstrap_client( config_file => $config ); +Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); + +$user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id; + +select STDERR; $| = 1; +select STDOUT; $| = 1; + +my $batch = new MARC::Batch ( 'USMARC', @files ); +$batch->strict_off(); +$batch->warnings_off(); + +my $starttime = time; +my $rec; +while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { + next if ($rec == -1); + my $id = $count; + + (my $xml = $rec->as_xml_record()) =~ s/\n//sog; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+id($id); + $bib->active('t'); + $bib->deleted('f'); + $bib->marc($xml); + $bib->creator($user); + $bib->create_date('now'); + $bib->editor($user); + $bib->edit_date('now'); + $bib->arn_source('LEGACY'); + $bib->arn_value($count); + $bib->last_xact_id('IMPORT-'.$starttime); + + print JSON->perl2JSON($bib)."\n"; + + $count++; + + if (!($count % 20)) { + print STDERR "\r$count\t". $count / (time - $starttime); + } +} + +sub login { + my( $username, $password, $type ) = @_; + + $type |= "staff"; + + my $seed = OpenILS::Application::AppUtils->simplereq( + 'open-ils.auth', + 'open-ils.auth.authenticate.init', + $username + ); + + die("No auth seed. Couldn't talk to the auth server") unless $seed; + + my $response = OpenILS::Application::AppUtils->simplereq( + 'open-ils.auth', + 'open-ils.auth.authenticate.complete', + { username => $username, + password => md5_hex($seed . md5_hex($password)), + type => $type }); + + die("No auth response returned on login.") unless $response; + + my $authtime = $response->{payload}->{authtime}; + my $authtoken = $response->{payload}->{authtoken}; + + die("Login failed for user $username!") unless $authtoken; + + return $authtoken; +} + +sub entityize { + my $stuff = shift; + my $form = shift; + + if ($form and $form eq 'D') { + $stuff = NFD($stuff); + } else { + $stuff = NFC($stuff); + } + + $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + return $stuff; +} + diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm b/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm index fe215b442b..e5e27e9cc4 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm @@ -288,7 +288,7 @@ sub ro_authority_ingest_single_object { $_->record($bib->id) for (@mfr); - return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd }; + return { full_rec => \@mfr }; } __PACKAGE__->register_method( api_name => "open-ils.ingest.full.authority.object.readonly", @@ -358,7 +358,6 @@ sub ro_authority_ingest_stream_record { $log->debug("Running open-ils.ingest.full.authority.record.readonly ..."); my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec); - $_->source($rec) for (@{$res->{field_entries}}); $_->record($rec) for (@{$res->{full_rec}}); $client->respond( $res ); @@ -417,7 +416,6 @@ sub rw_authority_ingest_stream_import { $log->debug("Running open-ils.ingest.full.authority.xml.readonly ..."); my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc); - $_->source($bib->id) for (@{$res->{field_entries}}); $_->record($bib->id) for (@{$res->{full_rec}}); $client->respond( $res ); -- 2.43.2