2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2008 Equinox Software, Inc.
4 * Copyright (C) 2010 Laurentian University
5 * Mike Rylander <miker@esilibrary.com>
6 * Dan Scott <dscott@laurentian.ca>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
20 DROP SCHEMA IF EXISTS authority CASCADE;
23 CREATE SCHEMA authority;
25 CREATE TABLE authority.control_set (
26 id SERIAL PRIMARY KEY,
27 name TEXT NOT NULL UNIQUE, -- i18n
28 description TEXT -- i18n
31 CREATE TABLE authority.control_set_authority_field (
32 id SERIAL PRIMARY KEY,
33 main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
34 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
36 sf_list TEXT NOT NULL,
37 name TEXT NOT NULL, -- i18n
38 description TEXT -- i18n
41 CREATE TABLE authority.control_set_bib_field (
42 id SERIAL PRIMARY KEY,
43 authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
47 CREATE TABLE authority.thesaurus (
48 code TEXT PRIMARY KEY, -- MARC21 thesaurus code
49 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
50 name TEXT NOT NULL UNIQUE, -- i18n
51 description TEXT -- i18n
54 CREATE TABLE authority.browse_axis (
55 code TEXT PRIMARY KEY,
56 name TEXT UNIQUE NOT NULL, -- i18n
60 CREATE TABLE authority.browse_axis_authority_field_map (
61 id SERIAL PRIMARY KEY,
62 axis TEXT NOT NULL REFERENCES authority.browse_axis (code) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
63 field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED
66 CREATE TABLE authority.record_entry (
67 id BIGSERIAL PRIMARY KEY,
68 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
69 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
70 creator INT NOT NULL DEFAULT 1,
71 editor INT NOT NULL DEFAULT 1,
72 active BOOL NOT NULL DEFAULT TRUE,
73 deleted BOOL NOT NULL DEFAULT FALSE,
75 control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
77 last_xact_id TEXT NOT NULL,
80 CREATE INDEX authority_record_entry_creator_idx ON authority.record_entry ( creator );
81 CREATE INDEX authority_record_entry_editor_idx ON authority.record_entry ( editor );
82 CREATE INDEX authority_record_deleted_idx ON authority.record_entry(deleted) WHERE deleted IS FALSE OR deleted = false;
83 CREATE TRIGGER a_marcxml_is_well_formed BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE biblio.check_marcxml_well_formed();
84 CREATE TRIGGER b_maintain_901 BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE evergreen.maintain_901();
85 CREATE TRIGGER c_maintain_control_numbers BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_control_numbers();
86 CREATE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id);
88 CREATE TABLE authority.bib_linking (
89 id BIGSERIAL PRIMARY KEY,
90 bib BIGINT NOT NULL REFERENCES biblio.record_entry (id),
91 authority BIGINT NOT NULL REFERENCES authority.record_entry (id)
93 CREATE INDEX authority_bl_bib_idx ON authority.bib_linking ( bib );
94 CREATE UNIQUE INDEX authority_bl_bib_authority_once_idx ON authority.bib_linking ( authority, bib );
96 CREATE TABLE authority.record_note (
97 id BIGSERIAL PRIMARY KEY,
98 record BIGINT NOT NULL REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED,
100 creator INT NOT NULL DEFAULT 1,
101 editor INT NOT NULL DEFAULT 1,
102 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
103 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now()
105 CREATE INDEX authority_record_note_record_idx ON authority.record_note ( record );
106 CREATE INDEX authority_record_note_creator_idx ON authority.record_note ( creator );
107 CREATE INDEX authority_record_note_editor_idx ON authority.record_note ( editor );
109 CREATE TABLE authority.rec_descriptor (
110 id BIGSERIAL PRIMARY KEY,
116 CREATE INDEX authority_rec_descriptor_record_idx ON authority.rec_descriptor (record);
118 CREATE TABLE authority.full_rec (
119 id BIGSERIAL PRIMARY KEY,
120 record BIGINT NOT NULL,
121 tag CHAR(3) NOT NULL,
126 index_vector tsvector NOT NULL
128 CREATE INDEX authority_full_rec_record_idx ON authority.full_rec (record);
129 CREATE INDEX authority_full_rec_tag_subfield_idx ON authority.full_rec (tag, subfield);
130 CREATE INDEX authority_full_rec_tag_part_idx ON authority.full_rec (SUBSTRING(tag FROM 2));
131 CREATE INDEX authority_full_rec_subfield_a_idx ON authority.full_rec (value) WHERE subfield = 'a';
132 CREATE TRIGGER authority_full_rec_fti_trigger
133 BEFORE UPDATE OR INSERT ON authority.full_rec
134 FOR EACH ROW EXECUTE PROCEDURE tsearch2(index_vector, value);
136 CREATE INDEX authority_full_rec_index_vector_idx ON authority.full_rec USING GIST (index_vector);
137 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
138 CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (value text_pattern_ops);
140 -- Intended to be used in a unique index on authority.record_entry like so:
141 -- CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
142 -- ON authority.record_entry (authority.normalize_heading(marc))
143 -- WHERE deleted IS FALSE or deleted = FALSE;
144 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$
146 acsaf authority.control_set_authority_field%ROWTYPE;
154 thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj');
155 IF thes_code IS NULL THEN
159 SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code;
165 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP
166 tag_used := acsaf.tag;
167 FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP
168 tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml);
169 IF tmp_text IS NOT NULL AND tmp_text <> '' THEN
170 heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text;
173 EXIT WHEN heading_text <> '';
176 IF heading_text <> '' THEN
177 IF no_thesaurus IS TRUE THEN
178 heading_text := tag_used || ' ' || public.naco_normalize(heading_text);
180 heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text);
183 heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml);
188 $func$ LANGUAGE PLPGSQL IMMUTABLE;
190 CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
191 SELECT authority.normalize_heading($1, TRUE);
192 $func$ LANGUAGE SQL IMMUTABLE;
194 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
195 SELECT authority.normalize_heading($1, FALSE);
196 $func$ LANGUAGE SQL IMMUTABLE;
198 COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
199 Extract the authority heading, thesaurus, and NACO-normalized values
200 from an authority record. The primary purpose is to build a unique
201 index to defend against duplicated authority records from the same
205 -- Adding indexes using oils_xpath_string() for the main entry tags described in
206 -- authority.control_set_authority_field would speed this up, if we ever want to use it, though
207 -- the existing index on authority.normalize_heading() helps already with a record in hand
208 CREATE OR REPLACE VIEW authority.tracing_links AS
209 SELECT main.record AS record,
211 main.tag AS main_tag,
212 oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value,
213 authority.normalize_heading(are.marc) AS normalized_main_value,
214 substr(link.value,1,1) AS relationship,
215 substr(link.value,2,1) AS use_restriction,
216 substr(link.value,3,1) AS deprecation,
217 substr(link.value,4,1) AS display_restriction,
219 link.tag AS link_tag,
220 oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value
221 FROM authority.full_rec main
222 JOIN authority.record_entry are ON (main.record = are.id)
223 JOIN authority.control_set_authority_field main_entry
224 ON (main_entry.tag = main.tag
225 AND main_entry.main_entry IS NULL
226 AND main.subfield = 'a' )
227 JOIN authority.control_set_authority_field sub_entry
228 ON (main_entry.id = sub_entry.main_entry)
229 JOIN authority.full_rec link
230 ON (link.record = main.record
231 AND link.tag = sub_entry.tag
232 AND link.subfield = 'w' );
234 -- Function to generate an ephemeral overlay template from an authority record
235 CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$
238 main_entry authority.control_set_authority_field%ROWTYPE;
239 bib_field authority.control_set_bib_field%ROWTYPE;
240 auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT;
241 replace_data XML[] DEFAULT '{}'::XML[];
242 replace_rules TEXT[] DEFAULT '{}'::TEXT[];
245 IF auth_id IS NULL THEN
249 -- Default to the LoC controll set
250 SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id;
252 FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
253 auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml);
254 IF ARRAY_LENGTH(auth_field) > 0 THEN
255 FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP
256 replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]);
257 replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' );
265 XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns),
266 XMLELEMENT( name leader, '00881nam a2200193 4500'),
270 XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2),
273 XMLATTRIBUTES('r' AS code),
274 ARRAY_TO_STRING(replace_rules,',')
279 $f$ STABLE LANGUAGE PLPGSQL;
281 CREATE OR REPLACE FUNCTION authority.merge_records ( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
283 moved_objects INT := 0;
285 bib_rec biblio.record_entry%ROWTYPE;
286 auth_link authority.bib_linking%ROWTYPE;
290 -- Defining our terms:
291 -- "target record" = the record that will survive the merge
292 -- "source record" = the record that is sacrifing its existence and being
293 -- replaced by the target record
295 -- 1. Update all bib records with the ID from target_record in their $0
298 FROM biblio.record_entry bre
299 JOIN authority.bib_linking abl ON abl.bib = bre.id
300 WHERE abl.authority = source_record
303 UPDATE biblio.record_entry
304 SET marc = REGEXP_REPLACE(
306 E'(<subfield\\s+code="0"\\s*>[^<]*?\\))' || source_record || '<',
307 E'\\1' || target_record || '<',
310 WHERE id = bib_rec.id;
312 moved_objects := moved_objects + 1;
315 -- 2. Grab the current value of reingest on same MARC flag
316 SELECT enabled INTO ingest_same
317 FROM config.internal_flag
318 WHERE name = 'ingest.reingest.force_on_same_marc'
321 -- 3. Temporarily set reingest on same to TRUE
322 UPDATE config.internal_flag
324 WHERE name = 'ingest.reingest.force_on_same_marc'
327 -- 4. Make a harmless update to target_record to trigger auto-update
328 -- in linked bibliographic records
329 UPDATE authority.record_entry
331 WHERE id = target_record;
333 -- 5. "Delete" source_record
334 DELETE FROM authority.record_entry WHERE id = source_record;
336 -- 6. Set "reingest on same MARC" flag back to initial value
337 UPDATE config.internal_flag
338 SET enabled = ingest_same
339 WHERE name = 'ingest.reingest.force_on_same_marc'
342 RETURN moved_objects;
344 $func$ LANGUAGE plpgsql;