2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2008 Equinox Software, Inc.
4 * Copyright (C) 2010 Laurentian University
5 * Mike Rylander <miker@esilibrary.com>
6 * Dan Scott <dscott@laurentian.ca>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
20 DROP SCHEMA IF EXISTS authority CASCADE;
23 CREATE SCHEMA authority;
25 CREATE TABLE authority.control_set (
26 id SERIAL PRIMARY KEY,
27 name TEXT NOT NULL UNIQUE, -- i18n
28 description TEXT -- i18n
31 CREATE TABLE authority.control_set_authority_field (
32 id SERIAL PRIMARY KEY,
33 main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
34 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
36 sf_list TEXT NOT NULL,
37 name TEXT NOT NULL, -- i18n
38 description TEXT -- i18n
41 CREATE TABLE authority.control_set_bib_field (
42 id SERIAL PRIMARY KEY,
43 authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
47 CREATE TABLE authority.thesaurus (
48 code TEXT PRIMARY KEY, -- MARC21 thesaurus code
49 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
50 name TEXT NOT NULL UNIQUE, -- i18n
51 description TEXT -- i18n
54 CREATE TABLE authority.browse_axis (
55 code TEXT PRIMARY KEY,
56 name TEXT UNIQUE NOT NULL, -- i18n
60 CREATE TABLE authority.browse_axis_authority_field_map (
61 id SERIAL PRIMARY KEY,
62 axis TEXT NOT NULL REFERENCES authority.browse_axis (code) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
63 field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED
66 CREATE TABLE authority.record_entry (
67 id BIGSERIAL PRIMARY KEY,
68 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
69 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
70 creator INT NOT NULL DEFAULT 1,
71 editor INT NOT NULL DEFAULT 1,
72 active BOOL NOT NULL DEFAULT TRUE,
73 deleted BOOL NOT NULL DEFAULT FALSE,
75 control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
77 last_xact_id TEXT NOT NULL,
80 CREATE INDEX authority_record_entry_creator_idx ON authority.record_entry ( creator );
81 CREATE INDEX authority_record_entry_editor_idx ON authority.record_entry ( editor );
82 CREATE INDEX authority_record_deleted_idx ON authority.record_entry(deleted) WHERE deleted IS FALSE OR deleted = false;
83 CREATE TRIGGER a_marcxml_is_well_formed BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE biblio.check_marcxml_well_formed();
84 CREATE TRIGGER b_maintain_901 BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE evergreen.maintain_901();
85 CREATE TRIGGER c_maintain_control_numbers BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_control_numbers();
87 CREATE TABLE authority.bib_linking (
88 id BIGSERIAL PRIMARY KEY,
89 bib BIGINT NOT NULL REFERENCES biblio.record_entry (id),
90 authority BIGINT NOT NULL REFERENCES authority.record_entry (id)
92 CREATE INDEX authority_bl_bib_idx ON authority.bib_linking ( bib );
93 CREATE UNIQUE INDEX authority_bl_bib_authority_once_idx ON authority.bib_linking ( authority, bib );
95 CREATE TABLE authority.record_note (
96 id BIGSERIAL PRIMARY KEY,
97 record BIGINT NOT NULL REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED,
99 creator INT NOT NULL DEFAULT 1,
100 editor INT NOT NULL DEFAULT 1,
101 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
102 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now()
104 CREATE INDEX authority_record_note_record_idx ON authority.record_note ( record );
105 CREATE INDEX authority_record_note_creator_idx ON authority.record_note ( creator );
106 CREATE INDEX authority_record_note_editor_idx ON authority.record_note ( editor );
108 CREATE TABLE authority.rec_descriptor (
109 id BIGSERIAL PRIMARY KEY,
115 CREATE INDEX authority_rec_descriptor_record_idx ON authority.rec_descriptor (record);
117 CREATE TABLE authority.full_rec (
118 id BIGSERIAL PRIMARY KEY,
119 record BIGINT NOT NULL,
120 tag CHAR(3) NOT NULL,
125 index_vector tsvector NOT NULL
127 CREATE INDEX authority_full_rec_record_idx ON authority.full_rec (record);
128 CREATE INDEX authority_full_rec_tag_subfield_idx ON authority.full_rec (tag, subfield);
129 CREATE INDEX authority_full_rec_tag_part_idx ON authority.full_rec (SUBSTRING(tag FROM 2));
130 CREATE INDEX authority_full_rec_subfield_a_idx ON authority.full_rec (value) WHERE subfield = 'a';
131 CREATE TRIGGER authority_full_rec_fti_trigger
132 BEFORE UPDATE OR INSERT ON authority.full_rec
133 FOR EACH ROW EXECUTE PROCEDURE tsearch2(index_vector, value);
135 CREATE INDEX authority_full_rec_index_vector_idx ON authority.full_rec USING GIST (index_vector);
136 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
137 CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (value text_pattern_ops);
138 /* But we still need this (boooo) for paging using >, <, etc */
139 CREATE INDEX authority_full_rec_value_index ON authority.full_rec (value);
141 CREATE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id; DELETE FROM authority.full_rec WHERE record = OLD.id);
143 -- Intended to be used in a unique index on authority.record_entry like so:
144 -- CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
145 -- ON authority.record_entry (authority.normalize_heading(marc))
146 -- WHERE deleted IS FALSE or deleted = FALSE;
147 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$
149 acsaf authority.control_set_authority_field%ROWTYPE;
157 thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj');
158 IF thes_code IS NULL THEN
162 SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code;
168 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP
169 tag_used := acsaf.tag;
170 FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP
171 tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml);
172 IF tmp_text IS NOT NULL AND tmp_text <> '' THEN
173 heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text;
176 EXIT WHEN heading_text <> '';
179 IF heading_text <> '' THEN
180 IF no_thesaurus IS TRUE THEN
181 heading_text := tag_used || ' ' || public.naco_normalize(heading_text);
183 heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text);
186 heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml);
191 $func$ LANGUAGE PLPGSQL IMMUTABLE;
193 CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
194 SELECT authority.normalize_heading($1, TRUE);
195 $func$ LANGUAGE SQL IMMUTABLE;
197 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
198 SELECT authority.normalize_heading($1, FALSE);
199 $func$ LANGUAGE SQL IMMUTABLE;
201 COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
202 Extract the authority heading, thesaurus, and NACO-normalized values
203 from an authority record. The primary purpose is to build a unique
204 index to defend against duplicated authority records from the same
208 -- Adding indexes using oils_xpath_string() for the main entry tags described in
209 -- authority.control_set_authority_field would speed this up, if we ever want to use it, though
210 -- the existing index on authority.normalize_heading() helps already with a record in hand
211 CREATE OR REPLACE VIEW authority.tracing_links AS
212 SELECT main.record AS record,
214 main.tag AS main_tag,
215 oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value,
216 authority.normalize_heading(are.marc) AS normalized_main_value,
217 substr(link.value,1,1) AS relationship,
218 substr(link.value,2,1) AS use_restriction,
219 substr(link.value,3,1) AS deprecation,
220 substr(link.value,4,1) AS display_restriction,
222 link.tag AS link_tag,
223 oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value
224 FROM authority.full_rec main
225 JOIN authority.record_entry are ON (main.record = are.id)
226 JOIN authority.control_set_authority_field main_entry
227 ON (main_entry.tag = main.tag
228 AND main_entry.main_entry IS NULL
229 AND main.subfield = 'a' )
230 JOIN authority.control_set_authority_field sub_entry
231 ON (main_entry.id = sub_entry.main_entry)
232 JOIN authority.full_rec link
233 ON (link.record = main.record
234 AND link.tag = sub_entry.tag
235 AND link.subfield = 'w' );
237 -- Function to generate an ephemeral overlay template from an authority record
238 CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$
241 main_entry authority.control_set_authority_field%ROWTYPE;
242 bib_field authority.control_set_bib_field%ROWTYPE;
243 auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT;
244 replace_data XML[] DEFAULT '{}'::XML[];
245 replace_rules TEXT[] DEFAULT '{}'::TEXT[];
248 IF auth_id IS NULL THEN
252 -- Default to the LoC controll set
253 SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id;
255 FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
256 auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml::XML);
257 IF ARRAY_LENGTH(auth_field,1) > 0 THEN
258 FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP
259 replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]);
260 replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' );
268 XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns),
269 XMLELEMENT( name leader, '00881nam a2200193 4500'),
273 XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2),
276 XMLATTRIBUTES('r' AS code),
277 ARRAY_TO_STRING(replace_rules,',')
282 $f$ STABLE LANGUAGE PLPGSQL;
284 CREATE OR REPLACE FUNCTION authority.merge_records ( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
286 moved_objects INT := 0;
288 bib_rec biblio.record_entry%ROWTYPE;
289 auth_link authority.bib_linking%ROWTYPE;
293 -- Defining our terms:
294 -- "target record" = the record that will survive the merge
295 -- "source record" = the record that is sacrifing its existence and being
296 -- replaced by the target record
298 -- 1. Update all bib records with the ID from target_record in their $0
301 FROM biblio.record_entry bre
302 JOIN authority.bib_linking abl ON abl.bib = bre.id
303 WHERE abl.authority = source_record
306 UPDATE biblio.record_entry
307 SET marc = REGEXP_REPLACE(
309 E'(<subfield\\s+code="0"\\s*>[^<]*?\\))' || source_record || '<',
310 E'\\1' || target_record || '<',
313 WHERE id = bib_rec.id;
315 moved_objects := moved_objects + 1;
318 -- 2. Grab the current value of reingest on same MARC flag
319 SELECT enabled INTO ingest_same
320 FROM config.internal_flag
321 WHERE name = 'ingest.reingest.force_on_same_marc'
324 -- 3. Temporarily set reingest on same to TRUE
325 UPDATE config.internal_flag
327 WHERE name = 'ingest.reingest.force_on_same_marc'
330 -- 4. Make a harmless update to target_record to trigger auto-update
331 -- in linked bibliographic records
332 UPDATE authority.record_entry
334 WHERE id = target_record;
336 -- 5. "Delete" source_record
337 DELETE FROM authority.record_entry WHERE id = source_record;
339 -- 6. Set "reingest on same MARC" flag back to initial value
340 UPDATE config.internal_flag
341 SET enabled = ingest_same
342 WHERE name = 'ingest.reingest.force_on_same_marc'
345 RETURN moved_objects;
347 $func$ LANGUAGE plpgsql;