#!/bin/sh # Test GNU extension "\u" and "\U" (uppercase conversion) # in "s///" command. # This is an adaptation of the old utf8-1/2/3/4 tests. # Copyright (C) 2017-2018 Free Software Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . . "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed print_ver_ sed require_ru_utf8_locale_ # The letter used in these tests are: # UTF8:Octal UTF8:HEX CodePoint Name # А \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A} # Д \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE} # а \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A} # д \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE} # Using octal values, as these are the most portable access various printfs. # Input: Same input for all test (all lower case letters) # д а д printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_ # Test 1: Convert "small DE" to upper case (with \U) # s/д/\U&/g printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_ # Test 1: Expected output - two capital DE letters. # Д а Д printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_ # Test 2: Convert "small DE" to upper case (with \u - next character only) # s/д/\u&/g printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_ # The expected output of test 2 is identical to test 1. # We create the file to make the test loop (below) simpler. cp utf8-1-exp utf8-2-exp || framework_failure_ # Test 3: Capitalize only the next character (\u) # Only the first "DE" should be capitilized. # s/д.*/\u&/g printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_ # Test 3: Expected output - First DE capitilized, second DE not. # Д а д printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_ # Test 4: Capitalize all matched characters # s/д.*/\U&/g printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_ # Test 4: Expected output - All capital letters: # Д А Д printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_ # Step 1: force Russian UTF8 locale. # The case-conversion should either work, or not modify the input. for i in 1 2 3 4; do LC_ALL=ru_RU.UTF-8 \ sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1 remove_cr_inplace utf8-$i-ru-out # If we have the expected output - continue to next text compare utf8-$i-exp utf8-$i-ru-out && continue # Otherwise, ensure the input wasn't modified # (i.e. sed did not modify partial octets resulting in # invalid multibyte sequences) compare utf8-$i-inp utf8-$i-ru-out || fail=1 done # Step 2: If the current locale supports UTF8, repeat the above tests. l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//') case "$n" in *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;; *) utf8=no;; esac if test "$utf8" = yes ; then for i in 1 2 3 4; do sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1 remove_cr_inplace utf8-$i-out # If we have the expected output - continue to next text compare utf8-$i-exp utf8-$i-out && continue # Otherwise, ensure the input wasn't modified # (i.e. sed did not modify partial octets resulting in # invalid multibyte sequences) compare utf8-$i-inp utf8-$i-out || fail=1 done fi Exit $fail