12static constexpr size_t Max = 4;
13static constexpr char32_t BOM = 0xfeff;
14static constexpr char32_t EoF = (char32_t)std::istream::traits_type::eof();
15static constexpr char32_t Null = 0;
16static constexpr char32_t Invalid = 0x110000;
21 if ((c &
char8_t(0b10000000)) ==
char8_t(0b00000000))
return 1;
22 if ((c &
char8_t(0b11100000)) ==
char8_t(0b11000000))
return 2;
23 if ((c &
char8_t(0b11110000)) ==
char8_t(0b11100000))
return 3;
24 if ((c &
char8_t(0b11111000)) ==
char8_t(0b11110000))
return 4;
29inline char32_t append(
char32_t c,
char8_t b) {
return (c << 6) | (b & 0b00111111); }
32inline char32_t first(
char32_t c,
char32_t num) {
return c & (0b00011111 >> (num - 2)); }
37 case 1:
return 0x000000;
38 case 2:
return 0x000080;
39 case 3:
return 0x000800;
40 case 4:
return 0x010000;
41 default:
return 0x110000;
46inline bool is_scalar_value(
char32_t c) {
return c <= 0x10ffff && !(0xd800 <= c && c <= 0xdfff); }
51 return (c &
char8_t(0b11000000)) == char8_t(0b10000000) ? (c & char8_t(0b00111111)) : char8_t(-1);
56inline char32_t decode(std::istream& is) {
57 char32_t result = is.get();
58 if (result ==
EoF)
return result;
62 case 1:
return result;
66 for (
size_t i = 1; i != n; ++i)
67 if (
auto x =
is_valid234(is.get()); x !=
char8_t(-1))
80std::ostream& ao(std::ostream& os,
char32_t c32,
char32_t a = 0b00111111,
char32_t o = 0b10000000) {
81 return os << char((c32 & a) | o);
87inline bool encode(std::ostream& os,
char32_t c32) {
89 if (c32 <= 0x00007f) { ao(os, c32 , 0b11111111, 0b00000000);
return true; }
90 if (c32 <= 0x0007ff) { ao(ao(os, c32 >> 6, 0b00011111, 0b11000000), c32);
return true; }
91 if (c32 <= 0x00ffff) { ao(ao(ao(os, c32 >> 12, 0b00001111, 0b11100000), c32 >> 6), c32);
return true; }
92 if (c32 <= 0x10ffff) { ao(ao(ao(ao(os, c32 >> 18, 0b00000111, 0b11110000), c32 >> 12), c32 >> 6), c32);
return true; }
116inline bool isalnum (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isalnum (c) :
false; }
117inline bool isalpha (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isalpha (c) :
false; }
118inline bool isblank (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isblank (c) :
false; }
119inline bool iscntrl (
char32_t c) {
return (c & ~0xFF) == 0 ? std::iscntrl (c) :
false; }
120inline bool isdigit (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isdigit (c) :
false; }
121inline bool isgraph (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isgraph (c) :
false; }
122inline bool islower (
char32_t c) {
return (c & ~0xFF) == 0 ? std::islower (c) :
false; }
123inline bool isprint (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isprint (c) :
false; }
124inline bool ispunct (
char32_t c) {
return (c & ~0xFF) == 0 ? std::ispunct (c) :
false; }
125inline bool isspace (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isspace (c) :
false; }
126inline bool isupper (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isupper (c) :
false; }
127inline bool isxdigit(
char32_t c) {
return (c & ~0xFF) == 0 ? std::isxdigit(c) :
false; }
128inline bool isascii (
char32_t c) {
return c <= 0x7F; }
129inline char32_t tolower(
char32_t c) {
return (c & ~0xFF) == 0 ? std::tolower(c) : c; }
130inline char32_t toupper(
char32_t c) {
return (c & ~0xFF) == 0 ? std::toupper(c) : c; }
133inline bool isrange(
char32_t c,
char32_t begin,
char32_t finis) {
return begin <= c && c <= finis; }
134inline auto isrange(
char32_t begin,
char32_t finis) {
return [=](
char32_t c) {
return isrange(c, begin, finis); }; }
144inline bool _any(
char32_t c,
char32_t d) {
return c == d; }
146inline bool _any(
char32_t c,
char32_t d, T... args) {
147 return c == d ||
_any(c, args...);
150inline auto any(T... args) {
151 return [=](
char32_t c) {
return _any(c, args...); };
bool isbdigit(char32_t c)
Is binary digit?
char32_t min_code_point(size_t num)
Minimum Unicode scalar value representable in an UTF-8 sequence of num bytes.
static constexpr char32_t Invalid
Invalid UTF-8 sequence.
bool _any(char32_t c, char32_t d)
static constexpr char32_t BOM
Byte Order Mark.
bool isodigit(char32_t c)
Is octal digit?
char8_t is_valid234(char8_t c)
Is the 2nd, 3rd, or 4th byte of an UTF-8 byte sequence valid?
bool isrange(char32_t c, char32_t begin, char32_t finis)
Is c within [begin, finis]?
char32_t decode(std::istream &is)
Decodes the next sequence of bytes from is as UTF-32.
bool encode(std::ostream &os, char32_t c32)
Encodes the UTF-32 char c32 as UTF-8 and writes the sequence of bytes to os.
char32_t tolower(char32_t c)
size_t num_bytes(char8_t c)
Returns the expected number of bytes for an UTF-8 char sequence by inspecting the first byte.
char32_t toupper(char32_t c)
static constexpr size_t Max
Maximal number of char8_ts of an UTF-8 byte sequence.
static constexpr char32_t EoF
End of File.
bool isxdigit(char32_t c)
char32_t first(char32_t c, char32_t num)
Get relevant bits of first UTF-8 byte c of a multi-byte sequence consisting of num bytes.
char32_t append(char32_t c, char8_t b)
Append b to c for converting UTF-8 to UTF-32.
static constexpr char32_t Null
U+0000 NULL.
bool is_scalar_value(char32_t c)
Is c a valid Unicode scalar value?
friend std::ostream & operator<<(std::ostream &os, Char32 c)