From 8a6f2471866fc79562ff48126342d91a20c13b5b Mon Sep 17 00:00:00 2001 From: gmc Date: Mon, 29 Nov 2010 22:54:30 +0000 Subject: [PATCH] test cases for the naco_normalize stored function Note that running the tests would require sticking in the connection parameters for an Evergreen database; check comments for some musings on how to do this better. Signed-off-by: Galen Charlton git-svn-id: svn://svn.open-ils.org/ILS/trunk@18865 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/tests/naco_normalize.t | 83 +++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 Open-ILS/tests/naco_normalize.t diff --git a/Open-ILS/tests/naco_normalize.t b/Open-ILS/tests/naco_normalize.t new file mode 100644 index 0000000000..b43f3447f9 --- /dev/null +++ b/Open-ILS/tests/naco_normalize.t @@ -0,0 +1,83 @@ +use strict; +use warnings; +use utf8; + +use Test::More; +use Unicode::Normalize; +use DBI; + +# This could be made better in at least one of two ways (or both); +# 1. put PL/Perl code that doesn't require a database into external +# modules so that test frameworks can get at it more easily +# 2. Build a test harness that knows how to find an Evergreen +# database to use for non-destructive testing. Of course, there +# can be a chicken-and-egg problem here; also, a complete test +# suite would need to be able to do *destructive* testing, from +# which we'd presumably want to protect production databases. + +# Database connection parameters +my $db_driver = 'Pg'; +my $db_host = 'evergreen'; +my $db_port = '5432'; +my $db_name = 'evergreen'; +my $db_user = 'evergreen'; +my $db_pw = 'evergreen'; +my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port; + +my $dbh = DBI->connect($dsn, $db_user, $db_pw, {AutoCommit => 1, pg_enable_utf8 => 1, PrintError => 0}); +if (!defined($dbh)) { + plan skip_all => "Failed to connect to database: $DBI::errstr"; +} + +binmode STDOUT, ':utf8'; +binmode STDERR, ':utf8'; + +my @test_cases = ( + [ 'abc', 'abc', 'regular text' ], + [ 'ABC', 'abc', 'regular text' ], + [ 'åbçdéñœöîøæÇıÂÅÍÎÏÔÔÒÚÆŒè', 'abcdenoeoioaeciaaiiiooouaeoee', 'European diacritics' ], + [ '“‘„«quotes»’”', 'quotes', 'special quotes' ], + [ '˜abcœ def', 'def', 'special non-filing characters designation' ], + [ 'œabcdef', 'abcdef', 'unpaired start of string' ], + [ 'ß', 'ss', 'sharp S (eszett)' ], + [ 'flfiff', 'flfiff', 'ligatures' ], + [ 'ƠơƯư²IJij', 'oouu2ijij', 'NFKD applied correctly' ], + [ 'ÆØÞæðøþĐđıŁłŒœʻʼℓ', 'aeothaedothddilloeoel', 'part 3.6' ], + [ 'Ð', 'd', 'uppercase eth (missing from 3.6?)' ], + [ 'ıİ', 'ii', 'Turkish I' ], + [ '[book\'s cover]', 'books cover', 'square brackets and apostrophe' ], + [ ' grue food ', 'grue food', 'trim spaces' ], + # note addition of NFKD() to transform expected output + [ '한국어 조선말', NFKD('한국어 조선말'), 'Korean text' ], + [ '普通話 / 普通话', '普通話 普通话', 'Chinese text' ], + [ 'العربية', 'العربية', 'Arabic text' ], + [ 'ქართული ენა', 'ქართული ენა', 'Georgian text' ], + [ 'русский язык', 'русскии язык', 'Russian text' ], + [ "\r\npa\tper\f", 'paper', 'other whitespace' ], + [ '#1: ∃ C++, @ home & abroad', '#1 c++ @ home & abroad', 'other punctuation' ], + [ '٠١٢٣٤٥', '012345', 'other decimal digits' ], + [ '²³¹', '231', 'superscript numbers' ], + [ '♭©®♯', '♭ ♯', 'other symbols' ], +); + +my $sth1 = $dbh->prepare_cached('SELECT public.naco_normalize(?)'); +my $sth2 = $dbh->prepare_cached('SELECT public.naco_normalize(?, ?)'); +sub naco_normalize_wrapper { + my ($str, $sf) = @_; + if (defined $sf) { + $sth2->execute($str, $sf); + return $sth2->fetchrow_array; + } else { + $sth1->execute($str); + return $sth1->fetchrow_array; + } +} + +foreach my $case (@test_cases) { + is(naco_normalize_wrapper($case->[0]), $case->[1], $case->[2]); +} + +is(naco_normalize_wrapper('Smith, Jane. Poet, painter, and author', 'a'), 'smith, jane poet painter and author', + 'retain first comma'); + +done_testing; -- 2.43.2