17static constexpr size_t Max = 4;
18static constexpr char32_t BOM = 0xfeff;
19static constexpr char32_t EoF
20 = (char32_t)std::istream::traits_type::eof();
21static constexpr char32_t Null = 0;
22static constexpr char32_t Invalid = 0x110000;
27 if ((c &
char8_t(0b10000000)) ==
char8_t(0b00000000))
return 1;
28 if ((c &
char8_t(0b11100000)) ==
char8_t(0b11000000))
return 2;
29 if ((c &
char8_t(0b11110000)) ==
char8_t(0b11100000))
return 3;
30 if ((c &
char8_t(0b11111000)) ==
char8_t(0b11110000))
return 4;
35constexpr char32_t append(
char32_t c,
char8_t b)
noexcept {
return (c << 6) | (b & 0b00111111); }
38constexpr char32_t first(
char32_t c,
char32_t num)
noexcept {
return c & (0b00011111 >> (num - 2)); }
43 case 1:
return 0x000000;
44 case 2:
return 0x000080;
45 case 3:
return 0x000800;
46 case 4:
return 0x010000;
47 default:
return 0x110000;
52constexpr bool is_scalar_value(
char32_t c)
noexcept {
return c <= 0x10ffff && !(0xd800 <= c && c <= 0xdfff); }
57 return (c &
char8_t(0b11000000)) == char8_t(0b10000000) ? (c & char8_t(0b00111111)) : char8_t(-1);
64inline char32_t decode(std::istream& is) {
65 char32_t result = is.get();
66 if (result ==
EoF)
return result;
70 case 1:
return result;
74 for (
size_t i = 1; i != n; ++i)
75 if (
auto x =
is_valid234(is.get()); x !=
char8_t(-1))
88std::ostream& ao(std::ostream& os,
char32_t c32,
char32_t a = 0b00111111,
char32_t o = 0b10000000) {
89 return os << char((c32 & a) | o);
96inline bool encode(std::ostream& os,
char32_t c32) {
98 if (c32 <= 0x00007f) { ao(os, c32 , 0b11111111, 0b00000000);
return true; }
99 if (c32 <= 0x0007ff) { ao(ao(os, c32 >> 6, 0b00011111, 0b11000000), c32);
return true; }
100 if (c32 <= 0x00ffff) { ao(ao(ao(os, c32 >> 12, 0b00001111, 0b11100000), c32 >> 6), c32);
return true; }
101 if (c32 <= 0x10ffff) { ao(ao(ao(ao(os, c32 >> 18, 0b00000111, 0b11110000), c32 >> 12), c32 >> 6), c32);
return true; }
125inline bool isalnum (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isalnum (c) :
false; }
126inline bool isalpha (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isalpha (c) :
false; }
127inline bool isblank (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isblank (c) :
false; }
128inline bool iscntrl (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::iscntrl (c) :
false; }
129inline bool isdigit (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isdigit (c) :
false; }
130inline bool isgraph (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isgraph (c) :
false; }
131inline bool islower (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::islower (c) :
false; }
132inline bool isprint (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isprint (c) :
false; }
133inline bool ispunct (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::ispunct (c) :
false; }
134inline bool isspace (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isspace (c) :
false; }
135inline bool isupper (
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isupper (c) :
false; }
136inline bool isxdigit(
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::isxdigit(c) :
false; }
137inline bool isascii (
char32_t c)
noexcept {
return c <= 0x7F; }
138inline char32_t tolower(
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::tolower(c) : c; }
139inline char32_t toupper(
char32_t c)
noexcept {
return (c & ~0xFF) == 0 ? std::toupper(c) : c; }
142constexpr bool isrange(
char32_t c,
char32_t begin,
char32_t finis)
noexcept {
return begin <= c && c <= finis; }
143constexpr auto isrange(
char32_t begin,
char32_t finis)
noexcept {
return [=](
char32_t c) {
return isrange(c, begin, finis); }; }
153inline bool _any(
char32_t c,
char32_t d) {
return c == d; }
155inline bool _any(
char32_t c,
char32_t d, T... args) {
156 return c == d ||
_any(c, args...);
159inline auto any(T... args) {
160 return [=](
char32_t c) {
return _any(c, args...); };
UTF-8 helpers for decoding byte streams, encoding char32_t values, and running ASCII-style character ...
bool isalnum(char32_t c) noexcept
bool isdigit(char32_t c) noexcept
static constexpr char32_t Invalid
Sentinel returned by decode for malformed UTF-8.
bool _any(char32_t c, char32_t d)
static constexpr char32_t BOM
Byte Order Mark.
char32_t tolower(char32_t c) noexcept
bool isascii(char32_t c) noexcept
constexpr size_t num_bytes(char8_t c) noexcept
Returns the expected number of bytes for an UTF-8 char sequence by inspecting the first byte.
constexpr bool isbdigit(char32_t c) noexcept
Is binary digit?
constexpr char32_t first(char32_t c, char32_t num) noexcept
Get relevant bits of first UTF-8 byte c of a multi-byte sequence consisting of num bytes.
char32_t decode(std::istream &is)
Decodes the next UTF-8 sequence from is into a single char32_t.
bool isxdigit(char32_t c) noexcept
bool encode(std::ostream &os, char32_t c32)
Encodes c32 as UTF-8 and writes the resulting bytes to os.
constexpr char8_t is_valid234(char8_t c) noexcept
Is the 2nd, 3rd, or 4th byte of an UTF-8 byte sequence valid?
bool isprint(char32_t c) noexcept
constexpr bool isodigit(char32_t c) noexcept
Is octal digit?
constexpr char32_t min_code_point(size_t num) noexcept
Minimum Unicode scalar value representable in an UTF-8 sequence of num bytes.
bool isblank(char32_t c) noexcept
bool isalpha(char32_t c) noexcept
static constexpr size_t Max
Maximal number of char8_ts of an UTF-8 byte sequence.
bool isupper(char32_t c) noexcept
char32_t toupper(char32_t c) noexcept
bool iscntrl(char32_t c) noexcept
bool ispunct(char32_t c) noexcept
static constexpr char32_t EoF
End of stream sentinel returned by decode.
constexpr char32_t append(char32_t c, char8_t b) noexcept
Append b to c for converting UTF-8 to UTF-32.
constexpr bool isrange(char32_t c, char32_t begin, char32_t finis) noexcept
Is c within [begin, finis]?
bool islower(char32_t c) noexcept
constexpr bool is_scalar_value(char32_t c) noexcept
Is c a valid Unicode scalar value?
bool isspace(char32_t c) noexcept
bool isgraph(char32_t c) noexcept
static constexpr char32_t Null
U+0000 NULL returned unchanged by decode.
friend std::ostream & operator<<(std::ostream &os, Char32 c)