From 4118424b8d80dfb34f3e32c12e1f538a20f8aa92 Mon Sep 17 00:00:00 2001 From: Jason Stephenson Date: Sat, 2 Jun 2018 20:33:23 -0400 Subject: [PATCH] LP 1768715: pingest.pl Release Notes and Installation Add pingest.pl to Open-ILS/src/Makefile.am so it will be installed by default. Add release notes for pingest.pl. Signed-off-by: Jason Stephenson Signed-off-by: Bill Erickson --- Open-ILS/src/Makefile.am | 3 +- .../Cataloging/pingest.adoc | 70 +++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 docs/RELEASE_NOTES_NEXT/Cataloging/pingest.adoc diff --git a/Open-ILS/src/Makefile.am b/Open-ILS/src/Makefile.am index 21508e898e..444846b914 100644 --- a/Open-ILS/src/Makefile.am +++ b/Open-ILS/src/Makefile.am @@ -129,7 +129,8 @@ gen_scripts = \ $(supportscr)/authority_authority_linker.pl \ $(supportscr)/eg_db_config \ $(supportscr)/marc_export \ - $(supportscr)/offline-blocked-list.pl + $(supportscr)/offline-blocked-list.pl \ + $(supportscr)/pingest.pl gen_docs = \ $(examples)/apache/eg.conf \ diff --git a/docs/RELEASE_NOTES_NEXT/Cataloging/pingest.adoc b/docs/RELEASE_NOTES_NEXT/Cataloging/pingest.adoc new file mode 100644 index 0000000000..5f381b9d03 --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/Cataloging/pingest.adoc @@ -0,0 +1,70 @@ +Parallel Ingest with pingest.pl +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A program named pingest.pl is now installed to allow faster record +ingest. It performs ingest in parallel so that multiple batches can +be done simultaneously. It operates by splitting the records to be +ingested up into batches and running all of the ingest methods on each +batch. You may pass in options to control how many batches are run at +the same time, how many records there are per batch, and which ingest +operations to skip. + +NOTE: The browse ingest is presently done in a single process over all +of the input records as it cannot run in parallel with itself. It +does, however, run in parallel with the other ingests. + +Command Line Options +++++++++++++++++++++ +pingest.pl accepts the following command line options: + +--host:: + The server where PostgreSQL runs (either host name or IP address). + The default is read from the PGHOST environment variable or + "localhost." + +--port:: + The port that PostgreSQL listens to on host. The default is read + from the PGPORT environment variable or 5432. + +--db:: + The database to connect to on the host. The default is read from + the PGDATABASE environment variable or "evergreen." + +--user:: + The username for database connections. The default is read from + the PGUSER environment variable or "evergreen." + +--password:: + The password for database connections. The default is read from + the PGPASSWORD environment variable or "evergreen." + +--batch-size:: + Number of records to process per batch. The default is 10,000. + +--max-child:: + Max number of worker processes (i.e. the number of batches to + process simultaneously). The default is 8. + +--skip-browse:: +--skip-attrs:: +--skip-search:: +--skip-facets:: +--skip-display:: + Skip the selected reingest component. + +--start-id:: + Start processing at this record ID. + +--end-id:: + Stop processing when this record ID is reached. + +--pipe:: + Read record IDs to reingest from standard input. This option + conflicts with --start-id and/or --end-id. + +--max-duration:: + Stop processing after this many total seconds have passed. The + default is to run until all records have been processed. + +--help:: + Show the help text. + -- 2.43.2