#!/usr/bin/perl # Copyright (C) 2014 Laurentian University # Author: Dan Scott # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. use strict; use warnings; use XML::LibXML; use File::Copy; use Getopt::Long; use File::Spec; use File::Basename; use DBI qw(:sql_types); use DBD::Pg qw(:pg_types); my ($dbhost, $dbport, $dbname, $dbuser, $dbpw, $help); my $config_file = ''; my $sysconfdir = ''; =item create_sitemaps() - Write the sitemap files With a maximum of 50,000 URLs per sitemap, this method automatically increments the sitemap file numbers and generates a corresponding sitemap index that lists all of the individual sitemap files. See http://www.sitemaps.org/ for the specification =cut sub create_sitemaps { my ($settings, $bibs, $aou_id) = @_; my $f_cnt = 1; my $r_cnt = 0; my @sitemaps; my $fn = $settings->{'prefix'} . "sitemap$f_cnt.xml"; push(@sitemaps, $fn); open(FH, '>', $fn) or die "Could not write sitemap $f_cnt\n"; print FH '' . "\n"; print FH '' . "\n"; foreach my $bib (@$bibs) { print FH "" . $settings->{'lib-hostname'} . "/eg/opac/record/" . $bib->[0]; if ($aou_id) { print FH "?locg=$aou_id"; } print FH "" . $bib->[1] . "\n"; $r_cnt++; if ($r_cnt % 50000 == 0) { $f_cnt++; print FH "\n"; close(FH); my $fn = $settings->{'prefix'} . "sitemap$f_cnt.xml"; push(@sitemaps, $fn); open(FH, '>', $fn) or die "Could not write bibs\n"; print FH '' . "\n"; print FH '' . "\n"; } } print FH "\n"; close(FH); open(INDEXFH, '>', $settings->{'prefix'} . "sitemapindex.xml") or die "Could not write sitemap index\n"; print INDEXFH '' . "\n"; print INDEXFH '' . "\n"; foreach my $fn (@sitemaps) { print INDEXFH "" . $settings->{'lib-hostname'} . "/$fn\n"; } print INDEXFH "\n"; close(INDEXFH); } =item get_settings() - Extracts database settings from opensrf.xml =cut sub get_settings { my $settings = shift; my $host = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/host/text()"; my $port = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/port/text()"; my $dbname = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/db/text()"; my $user = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/user/text()"; my $pw = "/opensrf/default/apps/open-ils.reporter-store/app_settings/database/pw/text()"; my $parser = XML::LibXML->new(); my $opensrf_config = $parser->parse_file($config_file); # If the user passed in settings at the command line, # we don't want to override them $settings->{host} = $settings->{host} || $opensrf_config->findnodes($host); $settings->{port} = $settings->{port} || $opensrf_config->findnodes($port); $settings->{db} = $settings->{db} || $opensrf_config->findnodes($dbname); $settings->{user} = $settings->{user} || $opensrf_config->findnodes($user); $settings->{pw} = $settings->{pw} || $opensrf_config->findnodes($pw); } =item get_record_ids() - Gets a list of record IDs =cut sub get_record_ids { my $settings = shift; my $aou_id; my $dbh = DBI->connect('dbi:Pg:dbname=' . $settings->{db} . ';host=' . $settings->{host} . ';port=' . $settings->{port} . ';', $settings->{user} . "", $settings->{pw} . "", {AutoCommit => 1} ); if ($dbh->err) { print STDERR "Could not connect to database. "; print STDERR "Error was " . $dbh->errstr . "\n"; return; } if ($settings->{'lib-shortname'}) { my $stmt = $dbh->prepare("SELECT id FROM actor.org_unit WHERE shortname = ?"); $stmt->execute(($settings->{'lib-shortname'})); my $rv = $stmt->bind_columns(\$aou_id); $stmt->fetch(); } my $q = " WITH date_floor AS ( SELECT ?::date AS val ) "; if ($aou_id) { $q .= " , copy_orgs AS ( SELECT id FROM actor.org_unit WHERE id IN (SELECT id FROM actor.org_unit_descendants(?)) ), uri_orgs AS ( SELECT id FROM actor.org_unit WHERE id IN (SELECT id FROM actor.org_unit_ancestors(?)) AND id NOT IN (SELECT id FROM org_top()) ) "; } $q .= " SELECT DISTINCT id, edit_date FROM ( SELECT bre.id, CASE WHEN bre.edit_date::date < (SELECT val FROM date_floor LIMIT 1) THEN (SELECT val FROM date_floor LIMIT 1) ELSE bre.edit_date::date END AS edit_date FROM biblio.record_entry bre INNER JOIN asset.opac_visible_copies aovc ON bre.id = aovc.record "; if ($aou_id) { $q .= " WHERE circ_lib IN (SELECT id FROM copy_orgs)"; } $q .= " UNION SELECT bre.id, CASE WHEN bre.edit_date::date < (SELECT val FROM date_floor LIMIT 1) THEN (SELECT val FROM date_floor LIMIT 1) ELSE bre.edit_date::date END AS edit_date FROM biblio.record_entry bre INNER JOIN asset.call_number acn ON bre.id = acn.record WHERE bre.deleted IS FALSE AND acn.deleted IS FALSE "; if ($aou_id) { $q .= " AND owning_lib IN (SELECT id FROM uri_orgs) AND label = '##URI##' "; } $q .= " ) x ORDER BY edit_date DESC, id DESC "; my $stmt = $dbh->prepare($q); if ($aou_id) { $stmt->bind_param(1, $settings->{'date'}, { pg_type => PG_DATE }); $stmt->bind_param(2, $aou_id, SQL_INTEGER); $stmt->bind_param(3, $aou_id, SQL_INTEGER); } else { $stmt->bind_param(1, $settings->{'date'}, { pg_type => PG_DATE }); } $stmt->execute(); my $bibs = $stmt->fetchall_arrayref([0, 1]); if ($dbh->err) { print STDERR "Error was " . $dbh->errstr . "\n"; return; } return ($bibs, $aou_id); } my $hostname; my $aou_shortname; my %settings = ( prefix => '', date => '2010-01-01' ); GetOptions( "lib-hostname=s" => \$settings{'lib-hostname'}, "lib-shortname=s" => \$settings{'lib-shortname'}, "prefix=s" => \$settings{'prefix'}, "date-floor=s" => \$settings{'date'}, "config-file=s" => \$config_file, "user=s" => \$settings{'user'}, "password=s" => \$settings{'pw'}, "database=s" => \$settings{'db'}, "hostname=s" => \$settings{'host'}, "port=i" => \$settings{'port'}, "help" => \$help ); if (!$config_file) { my @temp = `eg_config --sysconfdir`; chomp $temp[0]; $sysconfdir = $temp[0]; $config_file = File::Spec->catfile($sysconfdir, "opensrf.xml"); } unless (-e $config_file) { die "Error: $config_file does not exist. \n"; } if ($settings{'lib-hostname'}) { # Get additional settings from the config file get_settings(\%settings); my ($bibs, $aou_id) = get_record_ids(\%settings); create_sitemaps(\%settings, $bibs, $aou_id); } else { $help = 1; } if ($help) { print <