2 * Copyright (C) 2004-2008 Georgia Public Library Service
3 * Copyright (C) 2008 Equinox Software, Inc.
4 * Copyright (C) 2010 Laurentian University
5 * Mike Rylander <miker@esilibrary.com>
6 * Dan Scott <dscott@laurentian.ca>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
20 DROP SCHEMA IF EXISTS authority CASCADE;
23 CREATE SCHEMA authority;
25 CREATE TABLE authority.control_set (
26 id SERIAL PRIMARY KEY,
27 name TEXT NOT NULL UNIQUE, -- i18n
28 description TEXT -- i18n
31 CREATE TABLE authority.control_set_authority_field (
32 id SERIAL PRIMARY KEY,
33 main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
34 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
36 sf_list TEXT NOT NULL,
37 name TEXT NOT NULL, -- i18n
38 description TEXT -- i18n
41 CREATE TABLE authority.control_set_bib_field (
42 id SERIAL PRIMARY KEY,
43 authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
47 CREATE TABLE authority.thesaurus (
48 code TEXT PRIMARY KEY, -- MARC21 thesaurus code
49 control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
50 name TEXT NOT NULL UNIQUE, -- i18n
51 description TEXT -- i18n
54 CREATE TABLE authority.record_entry (
55 id BIGSERIAL PRIMARY KEY,
56 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
57 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
58 creator INT NOT NULL DEFAULT 1,
59 editor INT NOT NULL DEFAULT 1,
60 active BOOL NOT NULL DEFAULT TRUE,
61 deleted BOOL NOT NULL DEFAULT FALSE,
63 control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
65 last_xact_id TEXT NOT NULL,
68 CREATE INDEX authority_record_entry_creator_idx ON authority.record_entry ( creator );
69 CREATE INDEX authority_record_entry_editor_idx ON authority.record_entry ( editor );
70 CREATE INDEX authority_record_deleted_idx ON authority.record_entry(deleted) WHERE deleted IS FALSE OR deleted = false;
71 CREATE TRIGGER a_marcxml_is_well_formed BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE biblio.check_marcxml_well_formed();
72 CREATE TRIGGER b_maintain_901 BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE evergreen.maintain_901();
73 CREATE TRIGGER c_maintain_control_numbers BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_control_numbers();
74 CREATE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id);
76 CREATE TABLE authority.bib_linking (
77 id BIGSERIAL PRIMARY KEY,
78 bib BIGINT NOT NULL REFERENCES biblio.record_entry (id),
79 authority BIGINT NOT NULL REFERENCES authority.record_entry (id)
81 CREATE INDEX authority_bl_bib_idx ON authority.bib_linking ( bib );
82 CREATE UNIQUE INDEX authority_bl_bib_authority_once_idx ON authority.bib_linking ( authority, bib );
84 CREATE TABLE authority.record_note (
85 id BIGSERIAL PRIMARY KEY,
86 record BIGINT NOT NULL REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED,
88 creator INT NOT NULL DEFAULT 1,
89 editor INT NOT NULL DEFAULT 1,
90 create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
91 edit_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now()
93 CREATE INDEX authority_record_note_record_idx ON authority.record_note ( record );
94 CREATE INDEX authority_record_note_creator_idx ON authority.record_note ( creator );
95 CREATE INDEX authority_record_note_editor_idx ON authority.record_note ( editor );
97 CREATE TABLE authority.rec_descriptor (
98 id BIGSERIAL PRIMARY KEY,
104 CREATE INDEX authority_rec_descriptor_record_idx ON authority.rec_descriptor (record);
106 CREATE TABLE authority.full_rec (
107 id BIGSERIAL PRIMARY KEY,
108 record BIGINT NOT NULL,
109 tag CHAR(3) NOT NULL,
114 index_vector tsvector NOT NULL
116 CREATE INDEX authority_full_rec_record_idx ON authority.full_rec (record);
117 CREATE INDEX authority_full_rec_tag_subfield_idx ON authority.full_rec (tag, subfield);
118 CREATE INDEX authority_full_rec_tag_part_idx ON authority.full_rec (SUBSTRING(tag FROM 2));
119 CREATE INDEX authority_full_rec_subfield_a_idx ON authority.full_rec (value) WHERE subfield = 'a';
120 CREATE TRIGGER authority_full_rec_fti_trigger
121 BEFORE UPDATE OR INSERT ON authority.full_rec
122 FOR EACH ROW EXECUTE PROCEDURE tsearch2(index_vector, value);
124 CREATE INDEX authority_full_rec_index_vector_idx ON authority.full_rec USING GIST (index_vector);
125 /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */
126 CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (value text_pattern_ops);
128 -- Intended to be used in a unique index on authority.record_entry like so:
129 -- CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
130 -- ON authority.record_entry (authority.normalize_heading(marc))
131 -- WHERE deleted IS FALSE or deleted = FALSE;
132 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$
134 acsaf authority.control_set_authority_field%ROWTYPE;
142 thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj');
143 IF thes_code IS NULL THEN
147 SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code;
153 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP
154 tag_use := acsaf.tag;
155 FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP
156 tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml);
157 IF tmp_text IS NOT NULL THEN
158 heading_text := heading_text || E'\U2021' || sf || ' ' || tmp_text;
161 EXIT WHEN heading_text <> '';
164 IF heading_text <> '' THEN
165 IF no_thesaurus IS TRUE THEN
166 heading_text := tag_used || ' ' || public.naco_normalize(heading_text);
168 heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text);
171 heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml);
176 $func$ LANGUAGE PLPGSQL IMMUTABLE;
178 CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
179 SELECT authority.normalize_heading($1, TRUE);
180 $func$ LANGUAGE SQL IMMUTABLE;
182 CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$
183 SELECT authority.normalize_heading($1, FALSE);
184 $func$ LANGUAGE SQL IMMUTABLE;
186 COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
187 Extract the authority heading, thesaurus, and NACO-normalized values
188 from an authority record. The primary purpose is to build a unique
189 index to defend against duplicated authority records from the same
193 -- Adding indexes using oils_xpath_string() for the main entry tags described in
194 -- authority.control_set_authority_field would speed this up, if we ever want to use it, though
195 -- the existing index on authority.normalize_heading() helps already with a record in hand
196 CREATE OR REPLACE VIEW authority.tracing_links AS
197 SELECT main.record AS record,
199 main.tag AS main_tag,
200 oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value,
201 authority.normalize_heading(are.marc) AS normalized_main_value,
202 substr(link.value,1,1) AS relationship,
203 substr(link.value,2,1) AS use_restriction,
204 substr(link.value,3,1) AS deprecation,
205 substr(link.value,4,1) AS display_restriction,
207 link.tag AS link_tag,
208 oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value
209 FROM authority.full_rec main
210 JOIN authority.record_entry are ON (main.record = are.id)
211 JOIN authority.control_set_authority_field main_entry
212 ON (main_entry.tag = main.tag
213 AND main_entry.main_entry IS NULL
214 AND main.subfield = 'a' )
215 JOIN authority.control_set_authority_field sub_entry
216 ON (main_entry.id = sub_entry.main_entry)
217 JOIN authority.full_rec link
218 ON (link.record = main.record
219 AND link.tag = sub_entry.tag
220 AND link.subfield = 'w' );
222 -- Function to generate an ephemeral overlay template from an authority record
223 CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$
226 main_entry authority.control_set_authority_field%ROWTYPE;
227 bib_field authority.control_set_bib_field%ROWTYPE;
228 auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT;
229 replace_data XML[] DEFAULT '{}'::XML[];
230 replace_rules TEXT[] DEFAULT '{}'::TEXT[];
233 IF auth_id IS NULL THEN
237 -- Default to the LoC controll set
238 SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id;
240 FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
241 auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml);
242 IF ARRAY_LENGTH(auth_field) > 0 THEN
243 FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP
244 replace_data := replace_data || XMLELEMENT( name datafield, bib_field.tag AS tag, XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]);
245 replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' );
253 XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns)
254 XMLELEMENT( name leader, '00881nam a2200193 4500'),
264 ARRAY_TO_STRING(replace_rules,',')
269 $f$ STABLE LANGUAGE PLPGSQL;
271 CREATE OR REPLACE FUNCTION authority.merge_records ( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
273 moved_objects INT := 0;
275 bib_rec biblio.record_entry%ROWTYPE;
276 auth_link authority.bib_linking%ROWTYPE;
280 -- Defining our terms:
281 -- "target record" = the record that will survive the merge
282 -- "source record" = the record that is sacrifing its existence and being
283 -- replaced by the target record
285 -- 1. Update all bib records with the ID from target_record in their $0
288 FROM biblio.record_entry bre
289 JOIN authority.bib_linking abl ON abl.bib = bre.id
290 WHERE abl.authority = source_record
293 UPDATE biblio.record_entry
294 SET marc = REGEXP_REPLACE(
296 E'(<subfield\\s+code="0"\\s*>[^<]*?\\))' || source_record || '<',
297 E'\\1' || target_record || '<',
300 WHERE id = bib_rec.id;
302 moved_objects := moved_objects + 1;
305 -- 2. Grab the current value of reingest on same MARC flag
306 SELECT enabled INTO ingest_same
307 FROM config.internal_flag
308 WHERE name = 'ingest.reingest.force_on_same_marc'
311 -- 3. Temporarily set reingest on same to TRUE
312 UPDATE config.internal_flag
314 WHERE name = 'ingest.reingest.force_on_same_marc'
317 -- 4. Make a harmless update to target_record to trigger auto-update
318 -- in linked bibliographic records
319 UPDATE authority.record_entry
321 WHERE id = target_record;
323 -- 5. "Delete" source_record
324 DELETE FROM authority.record_entry WHERE id = source_record;
326 -- 6. Set "reingest on same MARC" flag back to initial value
327 UPDATE config.internal_flag
328 SET enabled = ingest_same
329 WHERE name = 'ingest.reingest.force_on_same_marc'
332 RETURN moved_objects;
334 $func$ LANGUAGE plpgsql;