From 8af7e4263cd2e97e0c54dde61e78c473bcb8b64e Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Thu, 19 Jun 2014 15:52:42 -0400 Subject: [PATCH] LP#1330784 Add a sitemap generator for Evergreen Following the requirements at sitemaps.org, generate a set of sitemaps that reflect the bib record's last edit date, with 50,000 records per sitemap file. Users can run this script targeting different libraries and generating different output filenames using the documented options in the script. Signed-off-by: Dan Scott Signed-off-by: Ben Shum --- Open-ILS/src/Makefile.am | 1 + .../src/support-scripts/sitemap_generator | 232 ++++++++++++++++++ 2 files changed, 233 insertions(+) create mode 100755 Open-ILS/src/support-scripts/sitemap_generator diff --git a/Open-ILS/src/Makefile.am b/Open-ILS/src/Makefile.am index 5d159de43d..330f5abeaf 100644 --- a/Open-ILS/src/Makefile.am +++ b/Open-ILS/src/Makefile.am @@ -72,6 +72,7 @@ core_scripts = $(examples)/oils_ctl.sh \ $(supportscr)/long-overdue-status-update.pl \ $(supportscr)/purge_holds.srfsh \ $(supportscr)/purge_circulations.srfsh \ + $(supportscr)/sitemap_generator \ $(srcdir)/extras/eg_config \ $(srcdir)/extras/openurl_map.pl \ $(srcdir)/extras/import/marc_add_ids diff --git a/Open-ILS/src/support-scripts/sitemap_generator b/Open-ILS/src/support-scripts/sitemap_generator new file mode 100755 index 0000000000..2758971861 --- /dev/null +++ b/Open-ILS/src/support-scripts/sitemap_generator @@ -0,0 +1,232 @@ +#!/usr/bin/perl +# Copyright (C) 2014 Laurentian University +# Author: Dan Scott +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +use strict; use warnings; +use XML::LibXML; +use File::Copy; +use Getopt::Long; +use File::Spec; +use File::Basename; +use DBI qw(:sql_types); + +my ($dbhost, $dbport, $dbname, $dbuser, $dbpw, $help); +my $config_file = ''; +my $sysconfdir = ''; + +=item create_sitemaps() - Write the sitemap files + +With a maximum of 50,000 URLs per sitemap, this method +automatically increments the sitemap file numbers and +generates a corresponding sitemap index that lists all +of the individual sitemap files. + +See http://www.sitemaps.org/ for the specification + +=cut +sub create_sitemaps { + my ($settings, $bibs, $aou_id) = @_; + + my $f_cnt = 1; + my $r_cnt = 0; + my @sitemaps; + my $fn = $settings->{'prefix'} . "sitemap$f_cnt.xml"; + push(@sitemaps, $fn); + open(FH, '>', $fn) or die "Could not write sitemap $f_cnt\n"; + print FH '' . "\n"; + print FH '' . "\n"; + + foreach my $bib (@$bibs) { + print FH "" . $settings->{'lib-hostname'} . "/eg/opac/record/" . $bib->[0]; + if ($aou_id) { + print FH "?locg=$aou_id"; + } + print FH "" . $bib->[1] . "\n"; + $r_cnt++; + if ($r_cnt % 50000 == 0) { + $f_cnt++; + print FH "\n"; + close(FH); + my $fn = $settings->{'prefix'} . "sitemap$f_cnt.xml"; + push(@sitemaps, $fn); + open(FH, '>', $fn) or die "Could not write bibs\n"; + print FH '' . "\n"; + print FH '' . "\n"; + } + } + print FH "\n"; + close(FH); + + open(INDEXFH, '>', $settings->{'prefix'} . "sitemapindex.xml") or die "Could not write sitemap index\n"; + print INDEXFH '' . "\n"; + print INDEXFH '' . "\n"; + foreach my $fn (@sitemaps) { + print INDEXFH "" . $settings->{'lib-hostname'} . "/$fn\n"; + } + print INDEXFH "\n"; + close(INDEXFH); + + +} + +=item get_settings() - Extracts database settings from opensrf.xml +=cut +sub get_settings { + my $settings = shift; + + my $host = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/host/text()"; + my $port = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/port/text()"; + my $dbname = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/db/text()"; + my $user = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/user/text()"; + my $pw = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/pw/text()"; + + my $parser = XML::LibXML->new(); + my $opensrf_config = $parser->parse_file($config_file); + + # If the user passed in settings at the command line, + # we don't want to override them + $settings->{host} = $settings->{host} || $opensrf_config->findnodes($host); + $settings->{port} = $settings->{port} || $opensrf_config->findnodes($port); + $settings->{db} = $settings->{db} || $opensrf_config->findnodes($dbname); + $settings->{user} = $settings->{user} || $opensrf_config->findnodes($user); + $settings->{pw} = $settings->{pw} || $opensrf_config->findnodes($pw); +} + +=item get_record_ids() - Gets a list of record IDs +=cut +sub get_record_ids { + my $settings = shift; + my $aou_id; + + my $dbh = DBI->connect('dbi:Pg:dbname=' . $settings->{db} . + ';host=' . $settings->{host} . ';port=' . $settings->{port} . ';', + $settings->{user} . "", $settings->{pw} . "", {AutoCommit => 1} + ); + if ($dbh->err) { + print STDERR "Could not connect to database. "; + print STDERR "Error was " . $dbh->errstr . "\n"; + return; + } + + if ($settings->{'lib-shortname'}) { + my $stmt = $dbh->prepare("SELECT id FROM actor.org_unit WHERE shortname = ?"); + $stmt->execute(($settings->{'lib-shortname'})); + my $rv = $stmt->bind_columns(\$aou_id); + $stmt->fetch(); + } + + my $q = " + SELECT DISTINCT bre.id, edit_date::date AS edit_date + FROM biblio.record_entry bre + INNER JOIN asset.opac_visible_copies aovc ON bre.id = aovc.record + "; + if ($aou_id) { + $q .= " WHERE circ_lib IN (SELECT id FROM actor.org_unit WHERE id = ? OR parent_ou = ?)"; + } + $q .= " ORDER BY edit_date DESC"; + my $stmt = $dbh->prepare($q); + if ($aou_id) { + $stmt->bind_param(1, $aou_id, { TYPE => SQL_INTEGER }); + $stmt->bind_param(2, $aou_id, { TYPE => SQL_INTEGER }); + $stmt->execute(); + } else { + $stmt->execute(); + } + + my $bibs = $stmt->fetchall_arrayref([0, 1]); + + if ($dbh->err) { + print STDERR "Error was " . $dbh->errstr . "\n"; + return; + } + return ($bibs, $aou_id); +} + +my $hostname; +my $aou_shortname; +my %settings = ( + prefix => '' +); + +GetOptions( + "lib-hostname=s" => \$settings{'lib-hostname'}, + "lib-shortname=s" => \$settings{'lib-shortname'}, + "prefix=s" => \$settings{'prefix'}, + "config-file=s" => \$config_file, + "user=s" => \$settings{'user'}, + "password=s" => \$settings{'pw'}, + "database=s" => \$settings{'db'}, + "hostname=s" => \$settings{'host'}, + "port=i" => \$settings{'port'}, + "help" => \$help +); + +if (!$config_file) { + my @temp = `eg_config --sysconfdir`; + chomp $temp[0]; + $sysconfdir = $temp[0]; + $config_file = File::Spec->catfile($sysconfdir, "opensrf.xml"); +} + +unless (-e $config_file) { die "Error: $config_file does not exist. \n"; } + +if ($settings{'lib-hostname'}) { + # Get additional settings from the config file + get_settings(\%settings); + + my ($bibs, $aou_id) = get_record_ids(\%settings); + create_sitemaps(\%settings, $bibs, $aou_id); +} else { + $help = 1; +} + +if ($help) { + print <