FE 0.5.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
fe::utf8 Namespace Reference

Classes

struct  Char32
 Wrapper for char32_t which has a friend ostream operator. More...
 

Functions

size_t num_bytes (char8_t c)
 Returns the expected number of bytes for an UTF-8 char sequence by inspecting the first byte.
 
char32_t append (char32_t c, char32_t b)
 Append b to c for converting UTF-8 to UTF-32.
 
char32_t first (char32_t c, char32_t num)
 Get relevant bits of first UTF-8 byte c of a multi-byte sequence consisting of num bytes.
 
char8_t is_valid234 (char8_t c)
 Is the 2nd, 3rd, or 4th byte of an UTF-8 byte sequence valid?
 
char32_t decode (std::istream &is)
 Decodes the next sequence of bytes from is as UTF-32.
 
bool encode (std::ostream &os, char32_t c32)
 Encodes the UTF-32 char c32 as UTF-8 and writes the sequence of bytes to os.
 
Wrappers

Safe char32_t-style wrappers for <ctype> functions:

‍Like all other functions from <cctype>, the behavior of std::isalnum is undefined if the argument's value is neither representable as unsigned char nor equal to EOF.

bool isalnum (char32_t c)
 
bool isalpha (char32_t c)
 
bool isblank (char32_t c)
 
bool iscntrl (char32_t c)
 
bool isdigit (char32_t c)
 
bool isgraph (char32_t c)
 
bool islower (char32_t c)
 
bool isprint (char32_t c)
 
bool ispunct (char32_t c)
 
bool isspace (char32_t c)
 
bool isupper (char32_t c)
 
bool isxdigit (char32_t c)
 
bool isascii (char32_t c)
 
char32_t tolower (char32_t c)
 
char32_t toupper (char32_t c)
 
bool isrange (char32_t c, char32_t begin, char32_t finis)
 Is c within [begin, finis]?
 
auto isrange (char32_t begin, char32_t finis)
 
bool isodigit (char32_t c)
 Is octal digit?
 
bool isbdigit (char32_t c)
 Is binary digit?
 
any

Is c in any of the remaining arguments?

bool _any (char32_t c, char32_t d)
 
template<class... T>
bool _any (char32_t c, char32_t d, T... args)
 
template<class... T>
auto any (T... args)
 

Variables

static constexpr size_t Max = 4
 Maximal number of char8_ts of an UTF-8 byte sequence.
 
static constexpr char32_t BOM = 0xfeff
 Byte Order Mark.
 
static constexpr char32_t EoF = (char32_t)std::istream::traits_type::eof()
 End of File.
 
static constexpr char32_t Null = 0
 

Function Documentation

◆ _any() [1/2]

bool fe::utf8::_any ( char32_t  c,
char32_t  d 
)
inline

Definition at line 127 of file utf8.h.

Referenced by _any(), and any().

◆ _any() [2/2]

template<class... T>
bool fe::utf8::_any ( char32_t  c,
char32_t  d,
T...  args 
)
inline

Definition at line 128 of file utf8.h.

References _any().

◆ any()

template<class... T>
auto fe::utf8::any ( T...  args)
inline

Definition at line 129 of file utf8.h.

References _any().

◆ append()

char32_t fe::utf8::append ( char32_t  c,
char32_t  b 
)
inline

Append b to c for converting UTF-8 to UTF-32.

Definition at line 28 of file utf8.h.

Referenced by decode().

◆ decode()

char32_t fe::utf8::decode ( std::istream &  is)
inline

Decodes the next sequence of bytes from is as UTF-32.

Returns
Null on error.

Definition at line 41 of file utf8.h.

References append(), EoF, first(), is_valid234(), Null, and num_bytes().

Referenced by fe::Lexer< K, S >::next().

◆ encode()

bool fe::utf8::encode ( std::ostream &  os,
char32_t  c32 
)
inline

Encodes the UTF-32 char c32 as UTF-8 and writes the sequence of bytes to os.

Returns
false on error.

Definition at line 70 of file utf8.h.

◆ first()

char32_t fe::utf8::first ( char32_t  c,
char32_t  num 
)
inline

Get relevant bits of first UTF-8 byte c of a multi-byte sequence consisting of num bytes.

Definition at line 31 of file utf8.h.

Referenced by decode().

◆ is_valid234()

char8_t fe::utf8::is_valid234 ( char8_t  c)
inline

Is the 2nd, 3rd, or 4th byte of an UTF-8 byte sequence valid?

Returns
the extracted char8_t or char8_t(-1) if invalid.

Definition at line 35 of file utf8.h.

Referenced by decode().

◆ isalnum()

bool fe::utf8::isalnum ( char32_t  c)
inline

Definition at line 99 of file utf8.h.

◆ isalpha()

bool fe::utf8::isalpha ( char32_t  c)
inline

Definition at line 100 of file utf8.h.

◆ isascii()

bool fe::utf8::isascii ( char32_t  c)
inline

Definition at line 111 of file utf8.h.

◆ isbdigit()

bool fe::utf8::isbdigit ( char32_t  c)
inline

Is binary digit?

Definition at line 120 of file utf8.h.

References isrange().

◆ isblank()

bool fe::utf8::isblank ( char32_t  c)
inline

Definition at line 101 of file utf8.h.

◆ iscntrl()

bool fe::utf8::iscntrl ( char32_t  c)
inline

Definition at line 102 of file utf8.h.

◆ isdigit()

bool fe::utf8::isdigit ( char32_t  c)
inline

Definition at line 103 of file utf8.h.

◆ isgraph()

bool fe::utf8::isgraph ( char32_t  c)
inline

Definition at line 104 of file utf8.h.

◆ islower()

bool fe::utf8::islower ( char32_t  c)
inline

Definition at line 105 of file utf8.h.

◆ isodigit()

bool fe::utf8::isodigit ( char32_t  c)
inline

Is octal digit?

Definition at line 119 of file utf8.h.

References isrange().

◆ isprint()

bool fe::utf8::isprint ( char32_t  c)
inline

Definition at line 106 of file utf8.h.

◆ ispunct()

bool fe::utf8::ispunct ( char32_t  c)
inline

Definition at line 107 of file utf8.h.

◆ isrange() [1/2]

auto fe::utf8::isrange ( char32_t  begin,
char32_t  finis 
)
inline

Definition at line 117 of file utf8.h.

References isrange().

◆ isrange() [2/2]

bool fe::utf8::isrange ( char32_t  c,
char32_t  begin,
char32_t  finis 
)
inline

Is c within [begin, finis]?

Definition at line 116 of file utf8.h.

Referenced by isbdigit(), isodigit(), and isrange().

◆ isspace()

bool fe::utf8::isspace ( char32_t  c)
inline

Definition at line 108 of file utf8.h.

◆ isupper()

bool fe::utf8::isupper ( char32_t  c)
inline

Definition at line 109 of file utf8.h.

◆ isxdigit()

bool fe::utf8::isxdigit ( char32_t  c)
inline

Definition at line 110 of file utf8.h.

◆ num_bytes()

size_t fe::utf8::num_bytes ( char8_t  c)
inline

Returns the expected number of bytes for an UTF-8 char sequence by inspecting the first byte.

Retuns 0 if invalid.

Definition at line 19 of file utf8.h.

Referenced by decode().

◆ tolower()

char32_t fe::utf8::tolower ( char32_t  c)
inline

Definition at line 112 of file utf8.h.

Referenced by fe::Lexer< K, S >::accept().

◆ toupper()

char32_t fe::utf8::toupper ( char32_t  c)
inline

Definition at line 113 of file utf8.h.

Referenced by fe::Lexer< K, S >::accept().

Variable Documentation

◆ BOM

constexpr char32_t fe::utf8::BOM = 0xfeff
staticconstexpr

Byte Order Mark.

Definition at line 13 of file utf8.h.

Referenced by fe::Lexer< K, S >::next().

◆ EoF

constexpr char32_t fe::utf8::EoF = (char32_t)std::istream::traits_type::eof()
staticconstexpr

End of File.

Definition at line 14 of file utf8.h.

Referenced by decode(), and fe::Lexer< K, S >::next().

◆ Max

constexpr size_t fe::utf8::Max = 4
staticconstexpr

Maximal number of char8_ts of an UTF-8 byte sequence.

Definition at line 12 of file utf8.h.

◆ Null

constexpr char32_t fe::utf8::Null = 0
staticconstexpr

Definition at line 15 of file utf8.h.

Referenced by decode().