/* Charset handling for GNU tar. Copyright 2004-2021 Free Software Foundation, Inc. This file is part of GNU tar. GNU tar is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU tar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include "common.h" #ifdef HAVE_ICONV_H # include #endif #ifndef ICONV_CONST # define ICONV_CONST #endif #ifndef HAVE_ICONV # undef iconv_open # define iconv_open(tocode, fromcode) ((iconv_t) -1) # undef iconv # define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) (errno = ENOSYS, (size_t) -1) # undef iconv_close # define iconv_close(cd) 0 # undef iconv_t # define iconv_t int #endif static iconv_t conv_desc[2] = { (iconv_t) -1, (iconv_t) -1 }; static iconv_t utf8_init (bool to_utf) { if (conv_desc[(int) to_utf] == (iconv_t) -1) { if (to_utf) conv_desc[(int) to_utf] = iconv_open ("UTF-8", locale_charset ()); else conv_desc[(int) to_utf] = iconv_open (locale_charset (), "UTF-8"); } return conv_desc[(int) to_utf]; } bool utf8_convert (bool to_utf, char const *input, char **output) { char ICONV_CONST *ib; char *ob, *ret; size_t inlen; size_t outlen; iconv_t cd = utf8_init (to_utf); if (cd == 0) { *output = xstrdup (input); return true; } else if (cd == (iconv_t)-1) return false; inlen = strlen (input) + 1; outlen = inlen * MB_LEN_MAX + 1; ob = ret = xmalloc (outlen); ib = (char ICONV_CONST *) input; /* According to POSIX, "if iconv() encounters a character in the input buffer that is valid, but for which an identical character does not exist in the target codeset, iconv() shall perform an implementation-defined conversion on this character." It will "update the variables pointed to by the arguments to reflect the extent of the conversion and return the number of non-identical conversions performed". On error, it returns -1. In other words, non-zero return always indicates failure, either because the input was not fully converted, or because it was converted in a non-reversible way. */ if (iconv (cd, &ib, &inlen, &ob, &outlen) != 0) { free (ret); return false; } *ob = 0; *output = ret; return true; } bool string_ascii_p (char const *p) { for (; *p; p++) if (*p & ~0x7f) return false; return true; }