From 78a0ca402923dd6302accd354317e537c440670f Mon Sep 17 00:00:00 2001 From: Robert Soulliere Date: Tue, 1 Feb 2011 16:00:23 -0500 Subject: [PATCH] Add bib importing information provided by Michael Peters and Repke De Vries. --- 1.6/admin/migratingdata.xml | 322 +++++++++++++++++++++++++++++++----- 1 file changed, 283 insertions(+), 39 deletions(-) diff --git a/1.6/admin/migratingdata.xml b/1.6/admin/migratingdata.xml index eaa51051d5..af49df8cb2 100644 --- a/1.6/admin/migratingdata.xml +++ b/1.6/admin/migratingdata.xml @@ -14,8 +14,8 @@ migratingimporting bibliographic records One of the most important and challenging tasks is migrating your bibliographic records to a new system. The procedure may be different depending on the system from which you - are migrating and the content of the marc records exported from the existing system. The proecedures in this section deal with the process once the data from the existing system - is exporterd into marc records. It does not cover exporting data from your existing non-Evergreen system. + are migrating and the content of the marc records exported from the existing system. The procedures in this section deal with the process once the data from the existing system + is exported into marc records. It does not cover exporting data from your existing non-Evergreen system. Several tools for importing bibliographic records into Evergreen can be found in the Evergreen installation folder (/home/opensrf/Evergreen-ILS-1.6.1.6/Open-ILS/src/extras/import/ ) and are also available from the Evergreen repository ( @@ -123,45 +123,289 @@ http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"> Metarecords are required to place holds on items, among other actions. + + + +
+ Migrating Bibliographic Records Using the ESI MigUration Tools + migratingMigration Tools + The following procedure explains how to migrate bibliographic records from marc records into Evergreen. It does not cover exporting records from specific proprietary ILS + systems. For assistance with exporting records from your current system please refer to the manuals for your system or you might try to ask for help from the Evergreen community. + + + + Download the Evergreen migration utilities from the git repository. + Use the command git clone git://git.esilibrary.com/git/migration-tools.git to clone the migration tools. + Install the migration tools: + + + + + + + + Dump marc records into MARCXML using yaz-marcdump + + + +' > imported_marc_records.xml +yaz-marcdump -f MARC-8 -t UTF-8 -o marcxml imported_marc_records.mrc >> imported_marc_records.xml +]]> + + + + + Test validity of XML file using xmllint + + + + + marc.xml.err +]]> + + + + + + Clean up the marc xml file using the marc_cleanup utility: + + +marc_cleanup --marcfile=imported_marc_records.xml --fullauto [--renumber-from #] -ot 001 + + + The is required if you have bibliographic records already in your system. Use this to set the starting id number higher + then the last id in the biblio.record_entry table. The marc_cleanup command will generate a file called clean.marc.xml + + + Create a fingerprinter file using the fingerprinter utility: + + +fingerprinter -o incumbent.fp -x incumbent.ex clean.marc.xml + + fingerprinter is used for deduplification of the incumbent records. The option specifies the + output file and the option is used to specify the error output file. + + + + Create a fingerprinter file for existing Evergreen bibliographic records using the fingerprinter utility if you + have existing bibliographic records in your system previously imported: + + +fingerprinter -o production.fp -x production.fp.ex --marctype=MARC21 existing_marc_records.mrc --tag=901 --subfield=c + + + fingerprinter is used for deduplification of the incumbant records. + + + + Create a merged fingerprint file removing duplicate records. + + +cat cat production.fp incumbent.fp | sort -r > dedupe.fp +match_fingerprints [-t start id] -o records.merge dedupe.fp + + + + + + Create a new import XML file using the extract_loadset utility + +extract_loadset -l 1 -i clean.marc.xml -o merged.xml records.merge + + + + Extract all of the currently used TCN's an generate the .bre and .ingest files to prepare for the bibliographic record load. + + +psql -U evergreen -c "select tcn_value from biblio.record_entry where not deleted" | perl -npe 's/^\s+//;' > used_tcns +marc2bre.pl --idfield 903 [--startid=#] --marctype=XML -f final.xml --used_tcn_file=used_tcns > evergreen_bre_import_file.bre + + + + The option needs to match the start id used in earlier steps and must be higher than largest id value + in the biblio.record_entry table. the option should match the marc datafield used to store your records ids. + + + + + Ingest the bibliographic records into the Evergreen database. + + + evergreen_ingest_file.ingest +parallel_pg_loader.pl \ +-or bre \ +-or mrd \ +-or mfr \ +-or mtfe \ +-or mafe \ +-or msfe \ +-or mkfe \ +-or msefe \ +-a mrd \ +-a mfr \ +-a mtfe \ +-a mafe \ +-a msfe \ +-a mkfe \ +-a msefe evergreen_ingest_file.ingest +]]> + + + + + Load the records using psql and the sql scripts generated from the previous step. + + + load_pg_loader-output +psql -U evergreen < create_metabib.sql > log.create_metabib +]]> + + + + + Extract holdings from marc records for importing copies into Evergreen using the extract_holdings utility. + + +extract_holdings --marcfile=marc.clean.xml --holding 999 --copyid 999i --map holdings.map + + + This command would extract holdings based on the 949 datafield in the marc records. The copy id is generated from the subfile b in the 949 datafield. You may + need to adjust these options based on the field used for holdings informatiom in your marc records. + The option holdings.map refers to a file to be used for mapping subfields to the holdings data you would like extracted. Here is an example based on mapping holdings data to the 999 data field: + + + + Running the extract holdings script should produce the sql file HOLDINGS.pg similar to: + +BEGIN; + +egid, hseq, l_call_num, l_barcode, l_location, l_owning_lib, l_circ_modifier, +40 0 HD3616.K853 U54 1997 30731100751928 STACKS FENNELL BOOK +41 1 HV6548.C3 S984 1998 30731100826613 STACKS FENNELL BOOK +41 2 HV6548.C3 S984 1998 30731100804958 STACKS BRANTFORD BOOK +... + + + This file can be used for importing holdings into Evergreen. the egid is a critical column. It is used to link the volume and copy to + the bibliographic record. Please refer to for the steps to import your holdings into Evergreen. + + + + +
Adding Copies to Bibliographic Records - migratingadding copies - Once you've loaded the bibliographic records in Evergreen, you can search and view the records in the staff client, but they will not be visible in the catalogue. By - default, bibliographic records will not be visible in the catalogue until you add a copy representing a physical manifestation of that resource. You can add a copy manually through - the staff client via the Holdings maintenance screen, but if you're bulk-importing MARC records you probably want to bulk load the associated copies, call numbers, and barcodes as - well. - - Importing volumes and copies from <systemitem>MARC21XML</systemitem> holdings - migratingimporting volumes - There is currently no simple method for importing holdings based on the contents of the MARC holdings field (852, as specified by - http://www.loc.gov/marc/holdings/). - However, a more or less automated method could be built that performs the following steps: - - Create a tab-delimited file that contains your holdings information - - Required fields: bibliographic ID, barcode, and call number - Optional fields: shelving location (text) – see the asset.copy table for - possible fields to include - - - Create a staging table that matches the contents of your tab-delimited file. - - Insert the contents of your tab-delimited file into the table. - - Generate SQL scripts for item import to match the staging table that you created. - - Run the SQL scripts to create the holdings in Evergreen. - - - If an ILS has the concept of item categories, these may be mapped to Evergreen via statistical categories in the - asset.stat_cat table . Note that statistical categories cannot be used as search filters; individual branches can define - their own statistical categories; and define their own statistical category entries for individual items - best use case for statistical categories is probably for gifts. - In 2009, Conifer placed their migration tools - in the Conifer ILS-Contrib SVN repository, which might be useful samples augmenting the - basic staging table import approach. - In 2010, Equinox contributed a set of migration utilities - + Before bibliographic records can be found in an OPAC search copies will need to be created. It is very important to understand how various tables related to each other in regards + to holdings maintenance. + The following procedure will guide you through the process of populating Evergreen with volumes and copies. This is a very simple example. The SQL queries may need to be adjusted + for the specific data in your holdings. + + + Create a staging_items staging table to hold the holdings data: + +CREATE TABLE staging_items ( + callnum text, -- call number label + bibkey int, -- biblio.record_entry_id + createdate date, + location text, + barcode text, + item_type text, + owning_lib text -- actor.org_unit.shortname +); + + + + Login to the Evergreen using psql and run the following COPY command to copy the items generated from extract_holdings utility: + +COPY staging_items (bibkey, id, callnum, barcode, location, owning_lib, item_type) FROM 'HOLDINGS.pg'; + + the file HOLDINGS.pg and/or the COPY query may need to be adjusted for your particular circumstances. + + + + + Generate shelving locations from your staging table. + +INSERT INTO asset.copy_location (name, owning_lib) +SELECT DISTINCT l.location, ou.id +FROM staging_items l + JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname); + + + + Generate circulation modifiers from your staging table. + +INSERT INTO config.circ_modifier (code, name, description, sip2_media_type, magnetic_media) + SELECT DISTINCT item_type AS code, + item_type AS name, + LOWER(item_type) AS description, + '001' AS sip2_media_type, + FALSE AS magnetic_media + FROM staging_items + WHERE item_type NOT IN (SELECT code FROM config.circ_modifier); + + + + Generate call numbers from your staging table: + +INSERT INTO asset.call_number (creator,editor,record,label,owning_lib) +SELECT DISTINCT 1, 1, b.id, l.callnum, ou.id +FROM staging.staging_items l +JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname); + + + + Generate copies from your staging table: + +INSERT INTO asset.copy ( +circ_lib, creator, editor, create_date, barcode, +STATUS, location, loan_duration, fine_level, circ_modifier, deposit, ref, call_number) + +SELECT DISTINCT ou.id AS circ_lib, + 1 AS creator, + 1 AS editor, + l.createdate AS create_date, + l.barcode AS barcode, + 0 AS STATUS, + cl.id AS location, + 2 AS loan_duration, + 2 AS fine_level, + l.item_type AS circ_modifier, + FALSE AS deposit, + CASE + WHEN l.item_type = 'REFERENCE' THEN TRUE + ELSE FALSE + END AS ref, + cn.id AS call_number + FROM staging.staging_items l + JOIN actor.org_unit ou + ON (l.owning_lib = ou.shortname) + JOIN asset.copy_location cl + ON (ou.id = cl.owning_lib AND l.location = cl.name) + JOIN asset.call_number cn + ON (ou.id = cn.owning_lib + AND l.callnum = cn.label); + + You should now have copies in your Evergreen database and should be able to search and find the bibliographic records with attached copies. + +
Migrating Patron Data -- 2.43.2