/* Conversion module for UTF-7.
Copyright (C) 2000-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* UTF-7 is a legacy encoding used for transmitting Unicode within the
ASCII character set, used primarily by mail agents. New programs
are encouraged to use UTF-8 instead.
UTF-7 is specified in RFC 2152 (and old RFC 1641, RFC 1642). The
original Base64 encoding is defined in RFC 2045. */
#include <dlfcn.h>
#include <gconv.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
enum variant
{
UTF7,
UTF_7_IMAP
};
/* Must be in the same order as enum variant above. */
static const char names[] =
"UTF-7//\0"
"UTF-7-IMAP//\0"
"\0";
static uint32_t
shift_character (enum variant const var)
{
if (var == UTF7)
return '+';
else if (var == UTF_7_IMAP)
return '&';
else
abort ();
}
static bool
between (uint32_t const ch,
uint32_t const lower_bound, uint32_t const upper_bound)
{
return (ch >= lower_bound && ch <= upper_bound);
}
/* The set of "direct characters":
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
FOR UTF-7-IMAP
A-Z a-z 0-9 ' ( ) , - . / : ? space
! " # $ % + * ; < = > @ [ \ ] ^ _ ` { | } ~
*/
static bool
isdirect (uint32_t ch, enum variant var)
{
if (var == UTF7)
return (between (ch, 'A', 'Z')
|| between (ch, 'a', 'z')
|| between (ch, '0', '9')
|| ch == '\'' || ch == '(' || ch == ')'
|| between (ch, ',', '/')
|| ch == ':' || ch == '?'
|| ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
else if (var == UTF_7_IMAP)
return (ch != '&' && between (ch, ' ', '~'));
abort ();
}
/* The set of "direct and optional direct characters":
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
(UTF-7 only)
! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
*/
static bool
isxdirect (uint32_t ch, enum variant var)
{
if (isdirect (ch, var))
return true;
if (var != UTF7)
return false;
return between (ch, '!', '&')
|| ch == '*'
|| between (ch, ';', '@')
|| (between (ch, '[', '`') && ch != '\\')
|| between (ch, '{', '}');
}
/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
only): Modified base64 + '-' (shift back character)
A-Z a-z 0-9 + / -
*/
static