FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
lexer.h
Go to the documentation of this file.
1#pragma once
2
3#include <filesystem>
4#include <istream>
5
6#include "fe/loc.h"
7#include "fe/ring.h"
8#include "fe/utf8.h"
9
10namespace fe {
11
12/// The blueprint for a lexer with a buffer of @p K tokens to peek into the future (Lexer::ahead).
13/// You can "overide" Lexer::next via CRTP (@p S is the child).
14template<size_t K, class S> class Lexer {
15private:
16 S& self() { return *static_cast<S*>(this); }
17 const S& self() const { return *static_cast<const S*>(this); }
18
19public:
20 Lexer(std::istream& istream, const std::filesystem::path* path = nullptr)
21 : istream_(istream)
22 , loc_(path, {0, 0})
23 , peek_(1, 1) {
24 for (size_t i = 0; i != K; ++i) ahead_[i] = utf8::decode(istream_);
25 accept(utf8::BOM); // eat UTF-8 BOM, if present
26 assert(peek_.col == 1);
27 }
28
29protected:
30 char32_t ahead(size_t i = 0) const { return ahead_[i]; }
31
32 /// Invoke before assembling the next token.
33 void start() {
35 str_.clear();
36 }
37
38 /// Get next `char32_t` in Lexer::istream_ and increase Lexer::loc_.
39 /// @returns Null on an invalid UTF-8 sequence.
40 char32_t next() {
42 auto res = ahead_.put(utf8::decode(istream_));
43 auto c = ahead_.front(); // char of the peek location
44
45 if (c == '\n') {
46 ++peek_.row;
47 peek_.col = 0;
48 } else if (c == utf8::EoF || c == utf8::BOM) {
49 /* do nothing */
50 } else {
51 ++peek_.col;
52 }
53
54 return res;
55 }
56
57 /// @name Accept
58 /// Accept next character in Lexer::istream_, depending on some condition.
59 ///@{
60 /// What should happend to the accepted char?
61 /// Normalize identifiers via Append::Lower or Append::Upper for case-insensitive languages like FORTRAN or SQL.
62 enum class Append {
63 Off, ///< Do not append accepted char to Lexer::str_.
64 On, ///< Append accepted char as is to Lexer::str_.
65 Lower, ///< Append accepted char via fe::utf8::tolower` to Lexer::str_.
66 Upper, ///< Append accepted char via fe::utf8::toupper` to Lexer::str_.
67 };
68
69 /// @returns `true` if @p pred holds.
70 /// In this case invoke Lexer::next() and append to Lexer::str_, if @p append.
71 template<Append append = Append::On, class Pred> bool accept(Pred pred) {
72 if (pred(ahead())) {
73 auto c = self().next();
74 if constexpr (append != Append::Off) {
75 if constexpr (append == Append::Lower) c = fe::utf8::tolower(c);
76 if constexpr (append == Append::Upper) c = fe::utf8::toupper(c);
77 str_ += c;
78 }
79 return true;
80 }
81 return false;
82 }
83
84 // clang-format off
85 template<Append append = Append::On> bool accept(char32_t c) { return accept<append>([c](char32_t d) { return c == d; }); }
86 template<Append append = Append::On> bool accept(char c) { return accept<append>((char32_t)c); }
87 template<Append append = Append::On> bool accept(char8_t c) { return accept<append>((char32_t)c); }
88 // clang-format on
89 ///@}
90
91 std::istream& istream_;
93 Loc loc_; ///< Loc%ation of the token we are currently constructing within Lexer::str_,
94 Pos peek_; ///< Pos%ition of ahead_::first;
95 std::string str_;
96};
97
98} // namespace fe
The blueprint for a lexer with a buffer of K tokens to peek into the future (Lexer::ahead).
Definition lexer.h:14
bool accept(char8_t c)
Definition lexer.h:87
char32_t next()
Get next char32_t in Lexer::istream_ and increase Lexer::loc_.
Definition lexer.h:40
void start()
Invoke before assembling the next token.
Definition lexer.h:33
Lexer(std::istream &istream, const std::filesystem::path *path=nullptr)
Definition lexer.h:20
Loc loc_
Location of the token we are currently constructing within Lexer::str_,.
Definition lexer.h:93
bool accept(char c)
Definition lexer.h:86
Ring< char32_t, K > ahead_
Definition lexer.h:92
char32_t ahead(size_t i=0) const
Definition lexer.h:30
Pos peek_
Position of ahead_::first;.
Definition lexer.h:94
std::string str_
Definition lexer.h:95
@ Upper
Append accepted char via fe::utf8::toupper` to Lexer::str_.
@ On
Append accepted char as is to Lexer::str_.
@ Lower
Append accepted char via fe::utf8::tolower` to Lexer::str_.
@ Off
Do not append accepted char to Lexer::str_.
std::istream & istream_
Definition lexer.h:91
bool accept(Pred pred)
Definition lexer.h:71
bool accept(char32_t c)
Definition lexer.h:85
A ring buffer with N elements.
Definition ring.h:14
T put(T item)
Puts item into buffer.
Definition ring.h:48
T & front()
Definition ring.h:30
static constexpr char32_t BOM
Byte Order Mark.
Definition utf8.h:13
char32_t decode(std::istream &is)
Decodes the next sequence of bytes from is as UTF-32.
Definition utf8.h:41
char32_t tolower(char32_t c)
Definition utf8.h:112
char32_t toupper(char32_t c)
Definition utf8.h:113
static constexpr char32_t EoF
End of File.
Definition utf8.h:14
Definition arena.h:9
Location in a File.
Definition loc.h:33
Pos finis
It's called finis because it refers to the last character within this Location.
Definition loc.h:57
Pos begin
Definition loc.h:56
Position in a source file; pass around as value.
Definition loc.h:10
uint16_t col
Definition loc.h:23
uint16_t row
Definition loc.h:22