6 use Unicode::Normalize;
9 # This could be made better in at least one of two ways (or both);
10 # 1. put PL/Perl code that doesn't require a database into external
11 # modules so that test frameworks can get at it more easily
12 # 2. Build a test harness that knows how to find an Evergreen
13 # database to use for non-destructive testing. Of course, there
14 # can be a chicken-and-egg problem here; also, a complete test
15 # suite would need to be able to do *destructive* testing, from
16 # which we'd presumably want to protect production databases.
18 # Database connection parameters
20 my $db_host = 'evergreen';
22 my $db_name = 'evergreen';
23 my $db_user = 'evergreen';
24 my $db_pw = 'evergreen';
25 my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port;
27 my $dbh = DBI->connect($dsn, $db_user, $db_pw, {AutoCommit => 1, pg_enable_utf8 => 1, PrintError => 0});
29 plan skip_all => "Failed to connect to database: $DBI::errstr";
32 binmode STDOUT, ':utf8';
33 binmode STDERR, ':utf8';
36 [ 'abc', 'abc', 'regular text' ],
37 [ 'ABC', 'abc', 'regular text' ],
38 [ 'åbçdéñœöîøæÇıÂÅÍÎÏÔÔÒÚÆŒè', 'abcdenoeoioaeciaaiiiooouaeoee', 'European diacritics' ],
39 [ '“‘„«quotes»’”', 'quotes', 'special quotes' ],
40 [ '
\98abc
\9c def', 'def', 'special non-filing characters designation' ],
41 [ '
\9cabcdef', 'abcdef', 'unpaired start of string' ],
42 [ 'ß', 'ss', 'sharp S (eszett)' ],
43 [ 'flfiff', 'flfiff', 'ligatures' ],
44 [ 'ƠơƯư²IJij', 'oouu2ijij', 'NFKD applied correctly' ],
45 [ 'ÆØÞæðøþĐđıŁłŒœʻʼℓ', 'aeothaedothddilloeoel', 'part 3.6' ],
46 [ 'Ð', 'd', 'uppercase eth (missing from 3.6?)' ],
47 [ 'ıİ', 'ii', 'Turkish I' ],
48 [ '[book\'s cover]', 'books cover', 'square brackets and apostrophe' ],
49 [ ' grue food ', 'grue food', 'trim spaces' ],
50 # note addition of NFKD() to transform expected output
51 [ '한국어 조선말', NFKD('한국어 조선말'), 'Korean text' ],
52 [ '普通話 / 普通话', '普通話 普通话', 'Chinese text' ],
53 [ 'العربية', 'العربية', 'Arabic text' ],
54 [ 'ქართული ენა', 'ქართული ენა', 'Georgian text' ],
55 [ 'русский язык', 'русскии язык', 'Russian text' ],
56 [ "\r\npa\tper\f", 'paper', 'other whitespace' ],
57 [ '#1: ∃ C++, @ home & abroad', '#1 c++ @ home & abroad', 'other punctuation' ],
58 [ '٠١٢٣٤٥', '012345', 'other decimal digits' ],
59 [ '²³¹', '231', 'superscript numbers' ],
60 [ '♭©®♯', '♭ ♯', 'other symbols' ],
63 my $sth1 = $dbh->prepare_cached('SELECT public.naco_normalize(?)');
64 my $sth2 = $dbh->prepare_cached('SELECT public.naco_normalize(?, ?)');
65 sub naco_normalize_wrapper {
68 $sth2->execute($str, $sf);
69 return $sth2->fetchrow_array;
72 return $sth1->fetchrow_array;
76 foreach my $case (@test_cases) {
77 is(naco_normalize_wrapper($case->[0]), $case->[1], $case->[2]);
80 is(naco_normalize_wrapper('Smith, Jane. Poet, painter, and author', 'a'), 'smith, jane poet painter and author',
81 'retain first comma');