From 23101c610ccd606764e4f531ee2a9994c935f81f Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Mon, 6 Aug 2012 15:51:04 -0400 Subject: [PATCH] Link checker: DB layer and similar changes for URL Verification Schema and IDL changes for URL Verification functionality Start building URL-Validation related funcitons Add Rose::URI to the CPAN_MODULES list Add ON INSERT trigger to parse the URL as it is added Signed-off-by: Mike Rylander --- Open-ILS/examples/fm_IDL.xml | 291 ++++++++++++++++++ Open-ILS/src/extras/Makefile.install | 3 +- Open-ILS/src/sql/Pg/075.schema.url_verify.sql | 102 ++++++ .../src/sql/Pg/076.functions.url_verify.sql | 64 ++++ .../sql/Pg/upgrade/XXXX.schema.url_verify.sql | 82 +++++ .../Pg/upgrade/YYYY.functions.url_verify.sql | 22 ++ 6 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 Open-ILS/src/sql/Pg/075.schema.url_verify.sql create mode 100644 Open-ILS/src/sql/Pg/076.functions.url_verify.sql create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql create mode 100644 Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index a7a765dbfb..c7741224ae 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -19,6 +19,56 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +############################ + TEMPLATE +############################ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + --> @@ -9287,6 +9337,247 @@ SELECT usr, + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Open-ILS/src/extras/Makefile.install b/Open-ILS/src/extras/Makefile.install index 60c8d66862..344d417579 100644 --- a/Open-ILS/src/extras/Makefile.install +++ b/Open-ILS/src/extras/Makefile.install @@ -219,7 +219,8 @@ CPAN_MODULES = \ Library::CallNumber::LC \ Net::Z3950::Simple2ZOOM \ Template::Plugin::POSIX \ - SRU + SRU \ + Rose::URI # More chronically unpackaged CPAN modules (available in Squeeze though) CPAN_MODULES_MORE = \ diff --git a/Open-ILS/src/sql/Pg/075.schema.url_verify.sql b/Open-ILS/src/sql/Pg/075.schema.url_verify.sql new file mode 100644 index 0000000000..753c769c94 --- /dev/null +++ b/Open-ILS/src/sql/Pg/075.schema.url_verify.sql @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2012 Equinox Software, Inc. + * Mike Rylander + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +BEGIN; + +DROP SCHEMA IF EXISTS url_verify CASCADE; + +CREATE SCHEMA url_verify; + +CREATE TABLE url_verify.session ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + owning_lib INT NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED, + creator INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + container INT NOT NULL REFERENCES container.biblio_record_entry_bucket (id) DEFERRABLE INITIALLY DEFERRED, + create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + search TEXT NOT NULL, + CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib) +); + +CREATE TABLE url_verify.url_selector ( + id SERIAL PRIMARY KEY, + xpath TEXT NOT NULL, + session INT NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED, + CONSTRAINT tag_once_per_sess UNIQUE (xpath, session) +); + +CREATE TABLE url_verify.url ( + id SERIAL PRIMARY KEY, + redirect_from INT REFERENCES url_verify.url(id) DEFERRABLE INITIALLY DEFERRED, + item INT NOT NULL REFERENCES container.biblio_record_entry_bucket_item (id) DEFERRABLE INITIALLY DEFERRED, + url_selector INT NOT NULL REFERENCES url_verify.url_selector (id) DEFERRABLE INITIALLY DEFERRED, + tag TEXT NOT NULL, + subfield TEXT NOT NULL, + ord INT NOT NULL, -- ordinal position of this url within the record as found by url_selector, for later update + full_url TEXT NOT NULL, + scheme TEXT, + username TEXT, + password TEXT, + host TEXT, + domain TEXT, + tld TEXT, + port TEXT, + path TEXT, + page TEXT, + query TEXT, + fragment TEXT, + CONSTRAINT redirect_or_from_item CHECK ( + redirect_from IS NOT NULL OR ( + item IS NOT NULL AND + url_selector IS NOT NULL AND + tag IS NOT NULL AND + subfield IS NOT NULL AND + ord IS NOT NULL + ) + ) +); + +CREATE TABLE url_verify.verification_attempt ( + id SERIAL PRIMARY KEY, + usr INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + session INT NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED, + start_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + finish_time TIMESTAMP WITH TIME ZONE +); + +CREATE TABLE url_verify.url_verification ( + id SERIAL PRIMARY KEY, + url INT NOT NULL REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED, + attempt INT NOT NULL REFERENCES url_verify.verification_attempt (id) DEFERRABLE INITIALLY DEFERRED, + req_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + res_time TIMESTAMP WITH TIME ZONE, + res_code INT CHECK (res_code BETWEEN 100 AND 599), + res_text TEXT, + redirect_to INT REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED -- if redirected +); + +CREATE TABLE url_verify.filter_set ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + owning_lib INT NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED, + creator INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + filter TEXT NOT NULL, + CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib) +); + +COMMIT; + diff --git a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql new file mode 100644 index 0000000000..23dedab0ab --- /dev/null +++ b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2012 Equinox Software, Inc. + * Mike Rylander + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +BEGIN; + +CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$ + +use Rose::URI; + +my $url_in = shift; +my $url = Rose::URI->new($url_in); + +my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/; + +$parts{full_url} = $url_in; +($parts{domain} = $parts{host}) =~ s/^[^.]+\.//; +($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//; +($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##; + +return \%parts; + +$$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$ +DECLARE + tmp_row url_verify.url%ROWTYPE; +BEGIN + SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url); + + NEW.scheme := tmp_row.scheme; + NEW.username := tmp_row.username; + NEW.password := tmp_row.password; + NEW.host := tmp_row.host; + NEW.domain := tmp_row.domain; + NEW.tld := tmp_row.tld; + NEW.port := tmp_row.port; + NEW.path := tmp_row.path; + NEW.page := tmp_row.page; + NEW.query := tmp_row.query; + NEW.fragment := tmp_row.fragment; + + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + +CREATE TRIGGER ingest_url_tgr + BEFORE INSERT ON url_verify.url + FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); + +COMMIT; + diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql new file mode 100644 index 0000000000..7850b495ec --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql @@ -0,0 +1,82 @@ +DROP SCHEMA IF EXISTS url_verify CASCADE; + +CREATE SCHEMA url_verify; + +CREATE TABLE url_verify.session ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + owning_lib INT NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED, + creator INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + container INT NOT NULL REFERENCES container.biblio_record_entry_bucket (id) DEFERRABLE INITIALLY DEFERRED, + create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + search TEXT NOT NULL, + CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib) +); + +CREATE TABLE url_verify.url_selector ( + id SERIAL PRIMARY KEY, + xpath TEXT NOT NULL, + session INT NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED, + CONSTRAINT tag_once_per_sess UNIQUE (xpath, session) +); + +CREATE TABLE url_verify.url ( + id SERIAL PRIMARY KEY, + redirect_from INT REFERENCES url_verify.url(id) DEFERRABLE INITIALLY DEFERRED, + item INT NOT NULL REFERENCES container.biblio_record_entry_bucket_item (id) DEFERRABLE INITIALLY DEFERRED, + url_selector INT NOT NULL REFERENCES url_verify.url_selector (id) DEFERRABLE INITIALLY DEFERRED, + tag TEXT NOT NULL, + subfield TEXT NOT NULL, + ord INT NOT NULL, -- ordinal position of this url within the record as found by url_selector, for later update + full_url TEXT NOT NULL, + scheme TEXT, + username TEXT, + password TEXT, + host TEXT, + domain TEXT, + tld TEXT, + port TEXT, + path TEXT, + page TEXT, + query TEXT, + fragment TEXT, + CONSTRAINT redirect_or_from_item CHECK ( + redirect_from IS NOT NULL OR ( + item IS NOT NULL AND + url_selector IS NOT NULL AND + tag IS NOT NULL AND + subfield IS NOT NULL AND + ord IS NOT NULL + ) + ) +); + +CREATE TABLE url_verify.verification_attempt ( + id SERIAL PRIMARY KEY, + usr INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + session INT NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED, + start_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + finish_time TIMESTAMP WITH TIME ZONE +); + +CREATE TABLE url_verify.url_verification ( + id SERIAL PRIMARY KEY, + url INT NOT NULL REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED, + attempt INT NOT NULL REFERENCES url_verify.verification_attempt (id) DEFERRABLE INITIALLY DEFERRED, + req_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + res_time TIMESTAMP WITH TIME ZONE, + res_code INT CHECK (res_code BETWEEN 100 AND 599), + res_text TEXT, + redirect_to INT REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED -- if redirected +); + +CREATE TABLE url_verify.filter_set ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + owning_lib INT NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED, + creator INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED, + create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + filter TEXT NOT NULL, + CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib) +); + diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql new file mode 100644 index 0000000000..2087794714 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql @@ -0,0 +1,22 @@ +BEGIN; + +CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$ + +use Rose::URI; + +my $url_in = shift; +my $url = Rose::URI->new($url_in); + +my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/; + +$parts{full_url} = $url_in; +($parts{domain} = $parts{host}) =~ s/^[^.]+\.//; +($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//; +($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##; + +return \%parts; + +$$ LANGUAGE PLPERLU; + +COMMIT; + -- 2.43.2