12static constexpr size_t Max = 4;
13static constexpr char32_t BOM = 0xfeff;
14static constexpr char32_t EoF = (char32_t)std::istream::traits_type::eof();
15static constexpr char32_t Null = 0;
20 if ((c &
char8_t(0b10000000)) ==
char8_t(0b00000000))
return 1;
21 if ((c &
char8_t(0b11100000)) ==
char8_t(0b11000000))
return 2;
22 if ((c &
char8_t(0b11110000)) ==
char8_t(0b11100000))
return 3;
23 if ((c &
char8_t(0b11111000)) ==
char8_t(0b11110000))
return 4;
28inline char32_t append(
char32_t c,
char32_t b) {
return (c << 6) | (b & 0b00111111); }
31inline char32_t first(
char32_t c,
char32_t num) {
return c & (0b00011111 >> (num - 2)); }
36 return (c &
char8_t(0b11000000)) == char8_t(0b10000000) ? (c & char8_t(0b00111111)) : char8_t(-1);
41inline char32_t decode(std::istream& is) {
42 char32_t result = is.get();
43 if (result ==
EoF)
return result;
47 case 1:
return result;
51 for (
size_t i = 1; i != n; ++i)
52 if (
auto x =
is_valid234(is.get()); x !=
char8_t(-1))
63std::ostream& ao(std::ostream& os,
char32_t c32,
char32_t a = 0b00111111,
char32_t o = 0b10000000) {
64 return os << char((c32 & a) | o);
70inline bool encode(std::ostream& os,
char32_t c32) {
72 if (c32 <= 0x00007f) { ao(os, c32 , 0b11111111, 0b00000000);
return true; }
73 if (c32 <= 0x0007ff) { ao(ao(os, c32 >> 6, 0b00011111, 0b11000000), c32);
return true; }
74 if (c32 <= 0x00ffff) { ao(ao(ao(os, c32 >> 12, 0b00001111, 0b11100000), c32 >> 6), c32);
return true; }
75 if (c32 <= 0x10ffff) { ao(ao(ao(ao(os, c32 >> 18, 0b00000111, 0b11110000), c32 >> 12), c32 >> 6), c32);
return true; }
99inline bool isalnum (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isalnum (c) :
false; }
100inline bool isalpha (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isalpha (c) :
false; }
101inline bool isblank (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isblank (c) :
false; }
102inline bool iscntrl (
char32_t c) {
return (c & ~0xFF) == 0 ? std::iscntrl (c) :
false; }
103inline bool isdigit (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isdigit (c) :
false; }
104inline bool isgraph (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isgraph (c) :
false; }
105inline bool islower (
char32_t c) {
return (c & ~0xFF) == 0 ? std::islower (c) :
false; }
106inline bool isprint (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isprint (c) :
false; }
107inline bool ispunct (
char32_t c) {
return (c & ~0xFF) == 0 ? std::ispunct (c) :
false; }
108inline bool isspace (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isspace (c) :
false; }
109inline bool isupper (
char32_t c) {
return (c & ~0xFF) == 0 ? std::isupper (c) :
false; }
110inline bool isxdigit(
char32_t c) {
return (c & ~0xFF) == 0 ? std::isxdigit(c) :
false; }
111inline bool isascii (
char32_t c) {
return c <= 0x7F; }
112inline char32_t tolower(
char32_t c) {
return (c & ~0xFF) == 0 ? std::tolower(c) : c; }
113inline char32_t toupper(
char32_t c) {
return (c & ~0xFF) == 0 ? std::toupper(c) : c; }
116inline bool isrange(
char32_t c,
char32_t begin,
char32_t finis) {
return begin <= c && c <= finis; }
117inline auto isrange(
char32_t begin,
char32_t finis) {
return [=](
char32_t c) {
return isrange(c, begin, finis); }; }
127inline bool _any(
char32_t c,
char32_t d) {
return c == d; }
128template<
class... T>
inline bool _any(
char32_t c,
char32_t d, T... args) {
return c == d ||
_any(c, args...); }
129template<
class... T>
inline auto any(T... args) {
130 return [=](
char32_t c) {
return _any(c, args...); };
bool isbdigit(char32_t c)
Is binary digit?
bool _any(char32_t c, char32_t d)
static constexpr char32_t BOM
Byte Order Mark.
bool isodigit(char32_t c)
Is octal digit?
char8_t is_valid234(char8_t c)
Is the 2nd, 3rd, or 4th byte of an UTF-8 byte sequence valid?
bool isrange(char32_t c, char32_t begin, char32_t finis)
Is c within [begin, finis]?
char32_t decode(std::istream &is)
Decodes the next sequence of bytes from is as UTF-32.
bool encode(std::ostream &os, char32_t c32)
Encodes the UTF-32 char c32 as UTF-8 and writes the sequence of bytes to os.
char32_t tolower(char32_t c)
size_t num_bytes(char8_t c)
Returns the expected number of bytes for an UTF-8 char sequence by inspecting the first byte.
char32_t toupper(char32_t c)
static constexpr size_t Max
Maximal number of char8_ts of an UTF-8 byte sequence.
static constexpr char32_t EoF
End of File.
bool isxdigit(char32_t c)
char32_t append(char32_t c, char32_t b)
Append b to c for converting UTF-8 to UTF-32.
char32_t first(char32_t c, char32_t num)
Get relevant bits of first UTF-8 byte c of a multi-byte sequence consisting of num bytes.
static constexpr char32_t Null
Wrapper for char32_t which has a friend ostream operator.
friend std::ostream & operator<<(std::ostream &os, Char32 c)