From fd7f90415ab5302c32b0a70550d7e5d16b03e73b Mon Sep 17 00:00:00 2001 From: Dan Pearl Date: Thu, 2 Jun 2016 15:02:25 -0400 Subject: [PATCH] LP#1308090 Relator fields and facets need normalization. This incorporates two changes: facets were generated with trailing punctuation, which resulted in more than one entry for the same item, differing only in punctuation. In addition, relator codes were suppressed in the record detail unnecessarily. Signed-off-by: Dan Pearl Signed-off-by: Kate Butler Signed-off-by: Kathy Lussier --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 30 +++++++++++ Open-ILS/src/sql/Pg/950.data.seed-values.sql | 17 +++++++ .../src/sql/Pg/t/lp1308090-facet_punct.pg | 23 +++++++++ ...XXX.function.trim_trailing_punctuation.sql | 50 +++++++++++++++++++ .../templates/opac/parts/record/authors.tt2 | 11 +++- docs/RELEASE_NOTES_NEXT/OPAC/relator_list | 5 ++ 6 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punctuation.sql create mode 100644 docs/RELEASE_NOTES_NEXT/OPAC/relator_list diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index d732f4a893..71bab563bf 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -2458,5 +2458,35 @@ BEGIN END; $p$ LANGUAGE PLPGSQL; +-- This function is used to help clean up facet labels. Due to quirks in +-- MARC parsing, some facet labels may be generated with periods or commas +-- at the end. This will strip a trailing commas off all the time, and +-- periods when they don't look like they are part of initials. +-- Smith, John => no change +-- Smith, John, => Smith, John +-- Smith, John. => Smith, John +-- Public, John Q. => no change +CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$ +DECLARE + result TEXT; + last_char TEXT; +BEGIN + result := $1; + last_char = substring(result from '.$'); + + IF last_char = ',' THEN + result := substring(result from '^(.*),$'); + + ELSIF last_char = '.' THEN + IF substring(result from ' \w\.$') IS NULL THEN + result := substring(result from '^(.*)\.$'); + END IF; + END IF; + + RETURN result; + +END; +$$ language 'plpgsql'; + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index e029263de2..5e76959a72 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -10046,6 +10046,13 @@ INSERT INTO config.index_normalizer (name, description, func, param_count) VALUE 0 ); +INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES ( + 'Trim Trailing Punctuation', + 'Eliminate extraneous trailing commas and periods in text', + 'metabib.trim_trailing_punctuation', + 0 +); + -- make use of the index normalizers INSERT INTO config.metabib_field_index_norm_map (field,norm) @@ -10103,6 +10110,16 @@ INSERT INTO config.metabib_field_index_norm_map (field,norm,pos) WHERE i.func = 'remove_paren_substring' AND m.id IN (28); +INSERT INTO config.metabib_field_index_norm_map (field,norm,pos) + SELECT m.id, + i.id, + -1 + FROM config.metabib_field m, + config.index_normalizer i + WHERE i.func = 'metabib.trim_trailing_punctuation' + AND m.id IN (7,8,9,10); + + INSERT INTO config.record_attr_index_norm_map (attr,norm,pos) SELECT m.name, i.id, 0 FROM config.record_attr_definition m, diff --git a/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg b/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg new file mode 100644 index 0000000000..acf6846cff --- /dev/null +++ b/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg @@ -0,0 +1,23 @@ +BEGIN; + +SELECT plan(12); + +SELECT can('metabib', ARRAY['trim_trailing_punctuation'], 'metabib.trim_trailing_punctuation function exists'); + +SELECT is( metabib.trim_trailing_punctuation(''), '', 'Empty string'); + +SELECT is( metabib.trim_trailing_punctuation('X,'), 'X', 'Eliminate comma A'); +SELECT is( metabib.trim_trailing_punctuation('Smith, John,'), 'Smith, John', 'Eliminate comma B'); + +SELECT is( metabib.trim_trailing_punctuation('X.'), 'X.', 'Initial w/o preceding space (period)'); +SELECT is( metabib.trim_trailing_punctuation('X@'), 'X@', 'Initial w/o preceding space (other)'); +SELECT is( metabib.trim_trailing_punctuation('Smith, John'), 'Smith, John', 'Name no trailing punct A'); +SELECT is( metabib.trim_trailing_punctuation('Saki'), 'Saki', 'Name no trailing punct B'); +SELECT is( metabib.trim_trailing_punctuation('Smith, John.'), 'Smith, John', 'Chop trailing period'); +SELECT is( metabib.trim_trailing_punctuation('Public, John Q.'), 'Pulbic, John Q.', 'Retain trailing period'); +SELECT is( metabib.trim_trailing_punctuation('Public, John Q,'), 'Pulbic, John Q', 'Eliminate comma C'); +SELECT is( metabib.trim_trailing_punctuation('(FTC).'), '(FTC)', 'Trailing period'); + +SELECT * FROM finish(); + +ROLLBACK; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punctuation.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punctuation.sql new file mode 100644 index 0000000000..b13f66f129 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punctuation.sql @@ -0,0 +1,50 @@ +BEGIN; + +-- This function is used to help clean up facet labels. Due to quirks in +-- MARC parsing, some facet labels may be generated with periods or commas +-- at the end. This will strip a trailing commas off all the time, and +-- periods when they don't look like they are part of initials. +-- Smith, John => no change +-- Smith, John, => Smith, John +-- Smith, John. => Smith, John +-- Public, John Q. => no change +CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$ +DECLARE + result TEXT; + last_char TEXT; +BEGIN + result := $1; + last_char = substring(result from '.$'); + + IF last_char = ',' THEN + result := substring(result from '^(.*),$'); + + ELSIF last_char = '.' THEN + IF substring(result from ' \w\.$') IS NULL THEN + result := substring(result from '^(.*)\.$'); + END IF; + END IF; + + RETURN result; + +END; +$$ language 'plpgsql'; + +INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES ( + 'Trim Trailing Punctuation', + 'Eliminate extraneous trailing commas and periods in text', + 'metabib.trim_trailing_punctuation', + 0 +); + +INSERT INTO config.metabib_field_index_norm_map (field,norm,pos) + SELECT m.id, + i.id, + -1 + FROM config.metabib_field m, + config.index_normalizer i + WHERE i.func = 'metabib.trim_trailing_punctuation' + AND m.id IN (7,8,9,10); + +COMMIT; + diff --git a/Open-ILS/src/templates/opac/parts/record/authors.tt2 b/Open-ILS/src/templates/opac/parts/record/authors.tt2 index 735b3ef1df..2208cf0687 100644 --- a/Open-ILS/src/templates/opac/parts/record/authors.tt2 +++ b/Open-ILS/src/templates/opac/parts/record/authors.tt2 @@ -25,6 +25,7 @@ authors = [ BLOCK normalize_qterm; subfield.textContent.replace('[#"^$\+\-,\.:;&|\[\]()]', ' '); + subfield.textContent.replace('\s{2,}', ' '); END; BLOCK normalize_authors; @@ -42,7 +43,7 @@ BLOCK build_author_links; link_term = ''; # Linked term (e.g. Personal name + Fuller form of name) supp_term = ''; # Supplementary terms qterm = ''; # Search query - tlabel = ''; + tlabels = []; birthdate = ''; deathdate = ''; graphics = []; @@ -53,7 +54,11 @@ BLOCK build_author_links; code = subfield.getAttribute('code'); IF code == '4'; relcode = subfield.textContent.substr(0,3); - tlabel = relators.$relcode || label; + tlabels.push( relators.$relcode || label ); + END; + IF code == 'e'; + tlabels.push( subfield.textContent() ); + indexed_term = 1; END; IF code == '6'; target_field = tag; @@ -92,6 +97,8 @@ BLOCK build_author_links; END; END; url = mkurl(ctx.opac_root _ '/results', {query => qterm.replace('^\s*(.*?)\s*$', '$1'), qtype => 'author'}, stop_parms.merge(expert_search_parms, general_search_parms, browse_search_parms, facet_search_parms)); + tlabel = tlabels.join(', '); + tlabels = []; author_type = (tlabel || label) | html; # schema.org changes diff --git a/docs/RELEASE_NOTES_NEXT/OPAC/relator_list b/docs/RELEASE_NOTES_NEXT/OPAC/relator_list new file mode 100644 index 0000000000..bfe02aa311 --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/OPAC/relator_list @@ -0,0 +1,5 @@ +Author Roles +^^^^^^^^^^^^ +All author/contrbutor roles will now be displayed in the record detail. Previously, some +of the roles were omitted. + -- 2.43.2