2 * Copyright (C) 2012 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
19 CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
24 my $url = Rose::URI->new($url_in);
26 my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
28 $parts{full_url} = $url_in;
29 ($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
30 ($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
31 ($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
37 CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$
39 tmp_row url_verify.url%ROWTYPE;
41 SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);
43 NEW.scheme := tmp_row.scheme;
44 NEW.username := tmp_row.username;
45 NEW.password := tmp_row.password;
46 NEW.host := tmp_row.host;
47 NEW.domain := tmp_row.domain;
48 NEW.tld := tmp_row.tld;
49 NEW.port := tmp_row.port;
50 NEW.path := tmp_row.path;
51 NEW.page := tmp_row.page;
52 NEW.query := tmp_row.query;
53 NEW.fragment := tmp_row.fragment;
59 CREATE TRIGGER ingest_url_tgr
60 BEFORE INSERT ON url_verify.url
61 FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url();
63 CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
71 current_selector url_verify.url_selector%ROWTYPE;
75 FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
78 SELECT (oils_xpath(current_selector.xpath || '/text()', b.marc))[current_url_pos] INTO current_url
79 FROM biblio.record_entry b
80 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
83 EXIT WHEN current_url IS NULL;
85 SELECT (oils_xpath(current_selector.xpath || '/../@tag', b.marc))[current_url_pos] INTO current_tag
86 FROM biblio.record_entry b
87 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
90 IF current_tag IS NULL THEN
91 current_tag := last_seen_tag;
93 last_seen_tag := current_tag;
96 SELECT (oils_xpath(current_selector.xpath || '/@code', b.marc))[current_url_pos] INTO current_sf
97 FROM biblio.record_entry b
98 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
101 INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
102 VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
104 current_url_pos := current_url_pos + 1;
105 current_ord := current_ord + 1;
109 RETURN current_ord - 1;