From 53b34b33c98ad5a195f4180b70803cc2f2a25aa1 Mon Sep 17 00:00:00 2001 From: Jason Stephenson Date: Tue, 6 Apr 2021 07:43:52 -0400 Subject: [PATCH] Lp 1922567: Limit authority.full_rec.value indexes Limit the authority_full_rec_value_index and authority_full_rec_value_tpo_index indexes to the first 1024 characters of a field or subfield in order to avoid database errors when inserting or updating authorities with long fields. Include release note and regression test. Signed-off-by: Jason Stephenson Signed-off-by: Jennifer Weston Signed-off-by: Galen Charlton --- Open-ILS/src/sql/Pg/011.schema.authority.sql | 4 ++-- .../lp1922567-test-asset-full-rec-indexes.pg | 21 +++++++++++++++++++ ....schema.authority-full-rec-value-index.sql | 12 +++++++++++ .../authority-full-rec-value-index.adoc | 17 +++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/t/regress/lp1922567-test-asset-full-rec-indexes.pg create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-full-rec-value-index.sql create mode 100644 docs/RELEASE_NOTES_NEXT/Cataloging/authority-full-rec-value-index.adoc diff --git a/Open-ILS/src/sql/Pg/011.schema.authority.sql b/Open-ILS/src/sql/Pg/011.schema.authority.sql index d6e22c5031..6186b18dc9 100644 --- a/Open-ILS/src/sql/Pg/011.schema.authority.sql +++ b/Open-ILS/src/sql/Pg/011.schema.authority.sql @@ -281,9 +281,9 @@ CREATE TRIGGER authority_full_rec_fti_trigger CREATE INDEX authority_full_rec_index_vector_idx ON authority.full_rec USING GIN (index_vector); /* Enable LIKE to use an index for database clusters with locales other than C or POSIX */ -CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (value text_pattern_ops); +CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (SUBSTRING(value FOR 1024) text_pattern_ops); /* But we still need this (boooo) for paging using >, <, etc */ -CREATE INDEX authority_full_rec_value_index ON authority.full_rec (value); +CREATE INDEX authority_full_rec_value_index ON authority.full_rec (SUBSTRING(value FOR 1024)); CREATE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id; DELETE FROM authority.full_rec WHERE record = OLD.id); diff --git a/Open-ILS/src/sql/Pg/t/regress/lp1922567-test-asset-full-rec-indexes.pg b/Open-ILS/src/sql/Pg/t/regress/lp1922567-test-asset-full-rec-indexes.pg new file mode 100644 index 0000000000..099b3aff03 --- /dev/null +++ b/Open-ILS/src/sql/Pg/t/regress/lp1922567-test-asset-full-rec-indexes.pg @@ -0,0 +1,21 @@ +BEGIN; + +SELECT plan(1); + +-- Attempt to insert an authority record that triggers the bug: +SELECT lives_ok( +$$INSERT INTO authority.record_entry +(last_xact_id, heading, simple_heading, marc) +VALUES +('IMPORT-1337697309.74328', +'100_-_lcsh a arenburg ile1ii ai d 1891 1967', +'100 a arenburg ile1ii ai d 1891 1967', +$_$09719cz a2200781n 4500n 50030508 DLC20210213070057.0800613n| azannaabn |b aaa n 50030508 000000010813807Xisnihttps://isni.org/isni/000000010813807X40169930viafhttp://www.viaf.org/viaf/40169930Q348497wikidatahttp://www.wikidata.org/entity/Q348497(OCoLC)oca00065876DLCengrdaDLCDLCNjPDLCOCoLCInUCLUWUCLU1891-01-271967-08-31edtfPG3476.E5Ärenburg, Ilʹi︠a︡,1891-1967Soviet Unionnafhttp://id.loc.gov/authorities/names/n80126312http://id.loc.gov/rwo/agents/n80126312Kyïv (Ukraine)nafhttp://id.loc.gov/authorities/names/n81022031http://id.loc.gov/rwo/agents/n81022031Ukrainenafhttp://id.loc.gov/authorities/names/n81017756http://id.loc.gov/rwo/agents/n81017756Journalism--Soviet Unionlcshhttp://id.loc.gov/authorities/subjects/sh2008106181Russian literaturelcshhttp://id.loc.gov/authorities/subjects/sh85115986Soviet literaturelcshhttp://id.loc.gov/authorities/subjects/sh85125831Authors, Russianlcshhttp://id.loc.gov/authorities/subjects/sh85009984Journalistslcshhttp://id.loc.gov/authorities/subjects/sh85070788Maleslcdgtrushttps://id.loc.gov/vocabulary/languages/rusAi-lun-pao,1891-1967Ailunbao,1891-1967Ehrenbourg, Ilya,1891-1967nnaaEhrenburg, Ilʹi︠a︡ Grigorʹevich,1891-1967Ehrenburg, Ilja,1891-1967Ehrenburg, Ilya,1891-1967Ehrenburg, Ilya Grigorievich,1891-1967Ehrenburg, Ilya Grigoryevich,1891-1967Ärenburg, I.(Ilʹi︠a︡),1891-1967Ärenburg, Ilʹi︠a︡ Grigorʹevich,1891-1967Erenburg, Ilya,1891-1967Erenburg, Ilyah,1891-1967Erenmpourgk, Älia,1891-1967ЭÑенбÑÑг, Ð.(ÐлÑÑ),1891-1967ЭÑенбÑÑг, ÐлÑÑ,1891-1967ЭÑенбÑÑг, ÐлÑÑ ÐÑигоÑÑевиÑ,1891-1967×רנ××ר×, ×.,1891-1967×רנ××ר×, ×××××,1891-1967×רנ××ר×, ×××××,1891-1967×רנ××ר×, ×××××,1891-1967. â¬×רנ××ר×, ×××××,1981Ö¾7691××רנ××ר×, ×××××,1891-1967ערנ××ר×, ×××××,1891-1967ערענ××ר×, ××,1891-1967ערענ××ר×, ×××××,1891-1967URIs added to 3XX and/or 5XX fields in this record for the PCC URI MARC Pilot. Please do not remove or edit these URIsNon-Latin script references not evaluated.Black book, c1981:t.p. (Ilya Ehrenburg)Encyc. Americana:(Ehrenburg, Ilya Grigorievich)Encyc. Brit. 15th:(Ehrenburg, Ilya (Grigoryevich))Kan, Y.T. Ai-lun-pao, 1891-1967, 1984:t.p. (Ai-lun-pao) colophon (Ailunbao)Chernai︠a︡ kniga. Yiddish. Dos shṿartse bukh, 1984:t.p. (Ilya Erenburg) t.p. verso (Ilyah Erenburg)nuc90-29696: His Lik voÄ­ny (vo frant︠s︡īi) [MI] 1920(usage on CU rept.: I. Ärenburg)Ob Armenii i armi︠a︡nskoÄ­ kulʹture, 1988:t.p. (Ilʹi︠a︡ Ärenburg) added t.p. (Ilya Ärenburg)Literatur nach Stalins Tod, c1995:t.p. (Ilja Ehrenburg)Kuneva, V. Ili︠a︡ Erenburg i Li︠u︡dmil Stoi︠a︡nov, 1991.To dentro, 1971:t.p. (Älia Erenmpourgk)MoÄ­ Parizh, 1933:t.p. (Ilʹi Ärenburga) p. 5 (ÐлÑÑ Ð­ÑенбÑÑг = Ilʹi︠a︡ Ärenburg)Wikipedia, Nov. 26, 2014(Ilʹi︠a︡ Ärenburg; January 27 [O.S. January 15] 1891-31 August 1967; Soviet writer, journalist, translator, and cultural figure; among the most prolific and notable authors of the Soviet Union; he published around one hundred titles. He became known first and foremost as a novelist and a journalist--in particular, as a reporter in three wars (First World War, Spanish Civil War and the Second World War))Wikidata, February 12, 2021(location of burial: Novodevichy Cemetery; award received: Knight of the Legion of Honour, Order of Lenin, Order of the Red Banner of Labour, Order of the Red Star, Medal \, Medal \, Medal \, Stalin Prize, International Stalin Prize for Peace, Order of Lenin, Lenin Peace Prize; Commons category: Ilya Ehrenburg; place of birth: Kyiv; place of death: Moscow; date of birth: +1891-01-14T00:00:00Z, +1891-01-26T00:00:00Z; instance of: human; date of death: +1967-08-31T00:00:00Z; member of: Jewish Anti-Fascist Committee, USSR Union of Writers, World Peace Council, Supreme Soviet of the Soviet Union; occupation: journalist, writer, poet, novelist, screenwriter, translator, politician, children's writer; country of citizenship: Russian Empire, Soviet Union; cause of death: myocardial infarction; given name: Ilya; notable work: The Extraordinary Adventures of Julio Jurenito, The Thaw, Black Book, People, Years and Life; described by source: Great Soviet Encyclopedia (1969-1978), Bio-bibliographic dictionary of Russian writers of the XX century, Obalky knih.cz; languages spoken or published: French, Russian; manner of death: natural causes; child: Irina Erenburg; spouse: Lyubov Kozintseva; position held: member of the Supreme Council of the Soviet Union; native language: Russian; archives at: Central State Archives of Bulgaria; significant event: Nazi book burnings; educated at: First Moscow gymnasium; topic's main category: Category:Ilya Ehrenburg; conflict: Eastern Front; genre: novel, short story, short novel, sketch story, essay, poetry; owner of: аÑÑив на ÐÐ»Ñ ÐÑенбÑÑг; member of political party: Russian Social Democratic Labour Party; writing language: Russian; sex or gender: male; alias: Ehrenburg, Erenburg, Ilia Ehrenburg, Ilia Erenburg, Ilia Grigórievich Ehrenburg, Ilya Grigoryevich Ehrenburg, Iliáa Erenburg, Ilja Ehrenburg, Il'ja Erenburg, Ilja G. Ehrenburg, Ilja Grigor Ehrenburg, Il'ja Grigor'eviÄ Erenburg, Iľja GrigorieviÄ Ehrenburg, Iľja GrigorjeviÄ Erenburg, Ilja Grigorjevitsj Ehrenburg, Ilja Grigorjewitsch Erenburg, Ilya Ehrenberg, Ilya Ehrenburg, Il'ya Erenburg, Ilya Gregoryevich Ehrenburg, Ilya Grigorievich Ehrenburg, Ä°lya Grigoryeviç Ehrenburg, Ilya Grigoryevich Ehrenburg, Ilya Grigoryevich Erenburg, Lya Grigoryevich Ehrenburg, ЭÑенбÑÑг, ЭÑенбÑÑг Ð., ЭÑенбÑÑг Ð. Ð.; abART person ID: 15652; Amazon author ID: B006VYQGIO; ArhivX LOD: 79626; Babelio author ID: 18045; BAnQ author ID: 0000048970; BAV (Vatican Library: identifier) ADV12641572; BHCL ID: 91305; BHCL UUID: 76818205-3126-47ed-a204-df5ae670f73f; BIBSYS identifier: 90064171; BNE identifier: XX950248; BnF identifier: 11901820x; Brockhaus Enzyklopädie online ID: ehrenburg-ilja-grigorjewitsch; BVMC person ID: 43521; CANTIC: a10078010; CiNii author identifier: DA00837084; CONOR.BG ID: 23544677; CONOR identifier: 102645603, 15712611; CONOR.SR ID: 17956711; DBNL ID: ehre001; Deutsche Biographie ID: 118529269; Dialnet author ID: 1892237; EGAXA identifier: vtls000890085; Encyclopædia Britannica Online ID: biography/Ilya-Grigoryevich-Ehrenburg; Encyclopædia Universalis ID: ilia-grigorievitch-ehrenbourg; Encyclopedia of Modern Ukraine ID: 17978; FantLab author ID: 4598; FAST-ID: 10153; Filmportal ID: 18512cefc40d4d93aa76c6e482889369; Find a Grave ID: 21425; Freebase identifier: /m/027_qc; GND identifier: 118529269; Goodreads author ID: 155894; Gran Enciclopèdia Catalana ID: 0024311; Great Russian Encyclopedia Online ID: 4937559; GTAA id: 96302; Hrvatska enciklopedija ID: 18234; IDU person ID: 24804; IMDb identifier: nm0251280; Interlingual Index ID: i95016; iTunes artist ID: 1271947273; Jewish Encyclopedia ID (Russian: ) 15105; LAC identifier: 0104H8549; LCAuth identifier: n50030508; LibraryThing author ID: ehrenburgilya; Libris-URI: xv8bb9lg4vbjrrj; Librivox author ID: 12019; LIMIS person ID: 5383497; LNB identifier: 000029527; Munzinger IBA: 00000000396; National Library of Korea ID: KAC201728656; National Portrait Gallery (London: person identifier) mp95108; NDL identifier: 00438691; NE.se ID: ilja-ehrenburg; NKC identifier: jn19990210194; NLA identifier: 36555544; NLI (Israel: identifier) 000044049; NLP identifier: A11787867; NLR (Romania: identifier) 000075913; NSK identifier: 000059527; NTA identifier (Netherlands: ) 070136238; NUKAT authorities: n94204187; Open Library identifier: OL170489A; Open Library subject ID: person:ilya_grigorevich_ehrenburg_; People Australia identifier: 1289938; Perlentaucher ID: ilja-ehrenburg; PLWABN ID: 9810646542105606; PM20 folder ID: pe/004780; PTBNP identifier: 26150; Rijksmuseum Research Library authority ID: 32301; RKDartists: 271549; Runeberg author ID: ehrenilj; SELIBR: 184634; SHARE Catalogue author ID: 158169; SNAC Ark ID: w6jd7k9n; Store norske leksikon ID: Ilja_Erenburg; SUDOC authorities: 02685211X; The Paris Review interviewee ID: 4636; Union Catalog of Armenian Libraries authority ID: 20366; Unione Romana Biblioteche Scientifiche ID: 298725; University of Barcelona authority ID: a1343179; Vatican Library VcBA ID: 495/336226; WorldCat Identities ID: lccn-n50030508)https://www.wikidata.org/wiki/Q348497$_$)$$, +'Able to insert authority record with long subfield value' +); + +-- If the bug it not patched, the above will fail with an error. + +SELECT * FROM finish(); + +ROLLBACK; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-full-rec-value-index.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-full-rec-value-index.sql new file mode 100644 index 0000000000..5fa36f222c --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-full-rec-value-index.sql @@ -0,0 +1,12 @@ +BEGIN; + +--SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +DROP INDEX authority.authority_full_rec_value_index; +CREATE INDEX authority_full_rec_value_index ON authority.full_rec (SUBSTRING(value FOR 1024)); + +DROP INDEX authority.authority_full_rec_value_tpo_index; +CREATE INDEX authority_full_rec_value_tpo_index ON authority.full_rec (SUBSTRING(value FOR 1024) text_pattern_ops); + + +COMMIT; diff --git a/docs/RELEASE_NOTES_NEXT/Cataloging/authority-full-rec-value-index.adoc b/docs/RELEASE_NOTES_NEXT/Cataloging/authority-full-rec-value-index.adoc new file mode 100644 index 0000000000..861f8bd92c --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/Cataloging/authority-full-rec-value-index.adoc @@ -0,0 +1,17 @@ +Fix for Authority Records with Long Subfields +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Importing or updating authority records with long subfields, i.e. in +the vicinity of 5,000 characters or more in length, can cause database +errors that will prevent the update or import from happening. The +error occurs because non-full text indexes in PostgreSQL have a +limited length, and long fields sometimes lead to index entries that +exceed this maximum value. + +In order to rectify this issue, two database indexes on the +`authority.full_rec` table's `value` column have been redefined to +match their counterparts in the `metabib.real_full_rec` table. After +this update, only the first 1024 characters of an authority field or +subfield will be considered by these indexes. + +NOTE: These indexes are not used for authority record search, though +they are used for sorting and paging. -- 2.43.2