2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2008 Equinox Software, Inc.
4 * Copyright (C) 2010 Laurentian University
5 * Mike Rylander <miker@esilibrary.com>
6 * Dan Scott <dscott@laurentian.ca>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
20 DROP SCHEMA IF EXISTS authority CASCADE;
23 CREATE SCHEMA authority;
25 CREATE TABLE authority.control_set (
26 id SERIAL PRIMARY KEY,
27 name TEXT NOT NULL UNIQUE, -- i18n
28 description TEXT -- i18n
31 CREATE TABLE authority.control_set_authority_field (
32 id SERIAL PRIMARY KEY,
33 main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
34 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
36 sf_list TEXT NOT NULL,
37 name TEXT NOT NULL, -- i18n
38 description TEXT -- i18n
41 CREATE TABLE authority.control_set_bib_field (
42 id SERIAL PRIMARY KEY,
43 authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
47 CREATE TABLE authority.thesaurus (
48 code TEXT PRIMARY KEY, -- MARC21 thesaurus code
49 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
50 name TEXT NOT NULL UNIQUE, -- i18n
51 description TEXT -- i18n
54 CREATE TABLE authority.browse_axis (
55 code TEXT PRIMARY KEY,
56 name TEXT UNIQUE NOT NULL, -- i18n
60 CREATE TABLE authority.browse_axis_authority_field_map (
61 id SERIAL PRIMARY KEY,
62 axis TEXT NOT NULL REFERENCES authority.browse_axis (code) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
63 field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED
66 CREATE TABLE authority.record_entry (
67 id BIGSERIAL PRIMARY KEY,
68 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
69 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
70 creator INT NOT NULL DEFAULT 1,
71 editor INT NOT NULL DEFAULT 1,
72 active BOOL NOT NULL DEFAULT TRUE,
73 deleted BOOL NOT NULL DEFAULT FALSE,
75 control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
77 last_xact_id TEXT NOT NULL,
80 CREATE INDEX authority_record_entry_creator_idx ON authority.record_entry ( creator );
81 CREATE INDEX authority_record_entry_editor_idx ON authority.record_entry ( editor );
82 CREATE INDEX authority_record_deleted_idx ON authority.record_entry(deleted) WHERE deleted IS FALSE OR deleted = false;
83 CREATE TRIGGER a_marcxml_is_well_formed BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE biblio.check_marcxml_well_formed();
84 CREATE TRIGGER b_maintain_901 BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE evergreen.maintain_901();
85 CREATE TRIGGER c_maintain_control_numbers BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_control_numbers();
87 CREATE TABLE authority.bib_linking (
88 id BIGSERIAL PRIMARY KEY,
89 bib BIGINT NOT NULL REFERENCES biblio.record_entry (id),
90 authority BIGINT NOT NULL REFERENCES authority.record_entry (id)
92 CREATE INDEX authority_bl_bib_idx ON authority.bib_linking ( bib );
93 CREATE UNIQUE INDEX authority_bl_bib_authority_once_idx ON authority.bib_linking ( authority, bib );
95 CREATE TABLE authority.record_note (
96 id BIGSERIAL PRIMARY KEY,
97 record BIGINT NOT NULL REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED,
99 creator INT NOT NULL DEFAULT 1,
100 editor INT NOT NULL DEFAULT 1,
101 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
102 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now()
104 CREATE INDEX authority_record_note_record_idx ON authority.record_note ( record );
105 CREATE INDEX authority_record_note_creator_idx ON authority.record_note ( creator );
106 CREATE INDEX authority_record_note_editor_idx ON authority.record_note ( editor );
108 CREATE TABLE authority.rec_descriptor (
109 id BIGSERIAL PRIMARY KEY,
115 CREATE INDEX authority_rec_descriptor_record_idx ON authority.rec_descriptor (record);
117 CREATE TABLE authority.full_rec (
118 id BIGSERIAL PRIMARY KEY,
119 record BIGINT NOT NULL,
120 tag CHAR(3) NOT NULL,
125 index_vector tsvector NOT NULL
127 CREATE INDEX authority_full_rec_record_idx ON authority.full_rec (record);
128 CREATE INDEX authority_full_rec_tag_subfield_idx ON authority.full_rec (tag, subfield);
129 CREATE INDEX authority_full_rec_tag_part_idx ON authority.full_rec (SUBSTRING(tag FROM 2));
130 CREATE INDEX authority_full_rec_subfield_a_idx ON authority.full_rec (value) WHERE subfield = 'a';
131 CREATE TRIGGER authority_full_rec_fti_trigger
132 BEFORE UPDATE OR INSERT ON authority.full_rec
133 FOR EACH ROW EXECUTE PROCEDURE tsearch2(index_vector, value);
135 CREATE INDEX authority_full_rec_index_vector_idx ON authority.full_rec USING GIST (index_vector);
136 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
137 CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (value text_pattern_ops);
138 /* But we still need this (boooo) for paging using >, <, etc */
139 CREATE INDEX authority_full_rec_value_index ON authority.full_rec (value);
141 CREATE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id; DELETE FROM authority.full_rec WHERE record = OLD.id);
143 -- Intended to be used in a unique index on authority.record_entry like so:
144 -- CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
145 -- ON authority.record_entry (authority.normalize_heading(marc))
146 -- WHERE deleted IS FALSE or deleted = FALSE;
147 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$
149 acsaf authority.control_set_authority_field%ROWTYPE;
157 thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj');
158 IF thes_code IS NULL THEN
162 SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code;
168 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP
169 tag_used := acsaf.tag;
170 FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP
171 tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml);
172 IF tmp_text IS NOT NULL AND tmp_text <> '' THEN
173 heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text;
176 EXIT WHEN heading_text <> '';
179 IF thes_code = 'z' THEN
180 thes_code := oils_xpath_string('//*[@tag="040"]/*[@code="f"][1]', marcxml);
183 IF heading_text <> '' THEN
184 IF no_thesaurus IS TRUE THEN
185 heading_text := tag_used || ' ' || public.naco_normalize(heading_text);
187 heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text);
190 heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml);
195 $func$ LANGUAGE PLPGSQL IMMUTABLE;
197 CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
198 SELECT authority.normalize_heading($1, TRUE);
199 $func$ LANGUAGE SQL IMMUTABLE;
201 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
202 SELECT authority.normalize_heading($1, FALSE);
203 $func$ LANGUAGE SQL IMMUTABLE;
205 COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
206 Extract the authority heading, thesaurus, and NACO-normalized values
207 from an authority record. The primary purpose is to build a unique
208 index to defend against duplicated authority records from the same
212 -- Adding indexes using oils_xpath_string() for the main entry tags described in
213 -- authority.control_set_authority_field would speed this up, if we ever want to use it, though
214 -- the existing index on authority.normalize_heading() helps already with a record in hand
215 CREATE OR REPLACE VIEW authority.tracing_links AS
216 SELECT main.record AS record,
218 main.tag AS main_tag,
219 oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value,
220 authority.normalize_heading(are.marc) AS normalized_main_value,
221 substr(link.value,1,1) AS relationship,
222 substr(link.value,2,1) AS use_restriction,
223 substr(link.value,3,1) AS deprecation,
224 substr(link.value,4,1) AS display_restriction,
226 link.tag AS link_tag,
227 oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value
228 FROM authority.full_rec main
229 JOIN authority.record_entry are ON (main.record = are.id)
230 JOIN authority.control_set_authority_field main_entry
231 ON (main_entry.tag = main.tag
232 AND main_entry.main_entry IS NULL
233 AND main.subfield = 'a' )
234 JOIN authority.control_set_authority_field sub_entry
235 ON (main_entry.id = sub_entry.main_entry)
236 JOIN authority.full_rec link
237 ON (link.record = main.record
238 AND link.tag = sub_entry.tag
239 AND link.subfield = 'w' );
241 -- Function to generate an ephemeral overlay template from an authority record
242 CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$
245 main_entry authority.control_set_authority_field%ROWTYPE;
246 bib_field authority.control_set_bib_field%ROWTYPE;
247 auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT;
248 replace_data XML[] DEFAULT '{}'::XML[];
249 replace_rules TEXT[] DEFAULT '{}'::TEXT[];
252 IF auth_id IS NULL THEN
256 -- Default to the LoC controll set
257 SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id;
259 FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
260 auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml::XML);
261 IF ARRAY_LENGTH(auth_field,1) > 0 THEN
262 FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP
263 replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]);
264 replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' );
272 XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns),
273 XMLELEMENT( name leader, '00881nam a2200193 4500'),
277 XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2),
280 XMLATTRIBUTES('r' AS code),
281 ARRAY_TO_STRING(replace_rules,',')
286 $f$ STABLE LANGUAGE PLPGSQL;
288 CREATE OR REPLACE FUNCTION authority.merge_records ( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
290 moved_objects INT := 0;
292 bib_rec biblio.record_entry%ROWTYPE;
293 auth_link authority.bib_linking%ROWTYPE;
297 -- Defining our terms:
298 -- "target record" = the record that will survive the merge
299 -- "source record" = the record that is sacrifing its existence and being
300 -- replaced by the target record
302 -- 1. Update all bib records with the ID from target_record in their $0
305 FROM biblio.record_entry bre
306 JOIN authority.bib_linking abl ON abl.bib = bre.id
307 WHERE abl.authority = source_record
310 UPDATE biblio.record_entry
311 SET marc = REGEXP_REPLACE(
313 E'(<subfield\\s+code="0"\\s*>[^<]*?\\))' || source_record || '<',
314 E'\\1' || target_record || '<',
317 WHERE id = bib_rec.id;
319 moved_objects := moved_objects + 1;
322 -- 2. Grab the current value of reingest on same MARC flag
323 SELECT enabled INTO ingest_same
324 FROM config.internal_flag
325 WHERE name = 'ingest.reingest.force_on_same_marc'
328 -- 3. Temporarily set reingest on same to TRUE
329 UPDATE config.internal_flag
331 WHERE name = 'ingest.reingest.force_on_same_marc'
334 -- 4. Make a harmless update to target_record to trigger auto-update
335 -- in linked bibliographic records
336 UPDATE authority.record_entry
338 WHERE id = target_record;
340 -- 5. "Delete" source_record
341 DELETE FROM authority.record_entry WHERE id = source_record;
343 -- 6. Set "reingest on same MARC" flag back to initial value
344 UPDATE config.internal_flag
345 SET enabled = ingest_same
346 WHERE name = 'ingest.reingest.force_on_same_marc'
349 RETURN moved_objects;
351 $func$ LANGUAGE plpgsql;