FE 0.9.2
Header-only C++ frontend library
Loading...
Searching...
No Matches
lexer.h
Go to the documentation of this file.
1#pragma once
2
3#include <filesystem>
4#include <istream>
5
6#include "fe/loc.h"
7#include "fe/ring.h"
8#include "fe/utf8.h"
9
10namespace fe {
11
12/// The blueprint for a lexer with a buffer of @p K tokens to peek into the future (Lexer::ahead).
13/// You can "override" Lexer::next via CRTP (@p S is the child).
14template<size_t K, class S>
15class Lexer {
16private:
17 S& self() { return *static_cast<S*>(this); }
18 const S& self() const { return *static_cast<const S*>(this); }
19
20public:
21 Lexer(std::istream& istream, const std::filesystem::path* path = nullptr)
22 : istream_(istream)
23 , loc_(path, {0, 0})
24 , peek_(1, 1) {
25 for (size_t i = 0; i != K; ++i)
27 // Eat UTF-8 BOM, if present.
28 // Call the base Lexer::next directly (not self().next() via accept): the derived S is not yet
29 // constructed here, so dispatching into a CRTP override of next would be undefined behavior.
30 if (ahead() == utf8::BOM) Lexer::next();
31 assert(peek_.col == 1);
32 }
33
34protected:
35 char32_t ahead(size_t i = 0) const { return ahead_[i]; }
36
37 /// Invoke before assembling the next token.
38 void start() {
39 loc_.begin = peek_;
40 str_.clear();
41 }
42
43 /// Get next `char32_t` in Lexer::istream_ and increase Lexer::loc_.
44 /// @returns utf8::Invalid on an invalid UTF-8 sequence.
45 char32_t next() {
46 loc_.finis = peek_;
47 auto res = ahead_.put(utf8::decode(istream_));
48 auto c = ahead_.front(); // char of the peek location
49
50 if (c == '\n') {
51 ++peek_.row;
52 peek_.col = 0;
53 } else if (c == utf8::EoF || c == utf8::BOM) {
54 /* do nothing */
55 } else {
56 ++peek_.col;
57 }
58
59 return res;
60 }
61
62 /// @name Accept
63 /// Accept next character in Lexer::istream_, depending on some condition.
64 ///@{
65 /// What should happen to the accepted char?
66 /// Normalize identifiers via Append::Lower or Append::Upper for case-insensitive languages like FORTRAN or SQL.
67 enum class Append {
68 Off, ///< Do not append accepted char to Lexer::str_.
69 On, ///< Append accepted char as is to Lexer::str_.
70 Lower, ///< Append accepted char via fe::utf8::tolower` to Lexer::str_.
71 Upper, ///< Append accepted char via fe::utf8::toupper` to Lexer::str_.
72 };
73
74 /// @returns `true` if @p pred holds.
75 /// In this case invoke Lexer::next() and append to Lexer::str_, if @p append.
76 template<Append append = Append::On, class Pred>
77 bool accept(Pred pred) {
78 if (pred(ahead())) {
79 auto c = self().next();
80 if constexpr (append != Append::Off) {
81 if constexpr (append == Append::Lower) c = fe::utf8::tolower(c);
82 if constexpr (append == Append::Upper) c = fe::utf8::toupper(c);
83 str_ += c;
84 }
85 return true;
86 }
87 return false;
88 }
89
90 // clang-format off
91 template<Append append = Append::On> bool accept(char32_t c) { return accept<append>([c](char32_t d) { return c == d; }); }
92 template<Append append = Append::On> bool accept(char c) { return accept<append>((char32_t)c); }
93 template<Append append = Append::On> bool accept(char8_t c) { return accept<append>((char32_t)c); }
94 // clang-format on
95 ///@}
96
97 std::istream& istream_;
99 Loc loc_; ///< Loc%ation of the token we are currently constructing within Lexer::str_,
100 Pos peek_; ///< Pos%ition of ahead_::first;
101 std::string str_;
102};
103
104} // namespace fe
bool accept(char8_t c)
Definition lexer.h:93
char32_t next()
Get next char32_t in Lexer::istream_ and increase Lexer::loc_.
Definition lexer.h:45
void start()
Invoke before assembling the next token.
Definition lexer.h:38
Lexer(std::istream &istream, const std::filesystem::path *path=nullptr)
Definition lexer.h:21
Loc loc_
Location of the token we are currently constructing within Lexer::str_,.
Definition lexer.h:99
bool accept(char c)
Definition lexer.h:92
Ring< char32_t, K > ahead_
Definition lexer.h:98
char32_t ahead(size_t i=0) const
Definition lexer.h:35
Pos peek_
Position of ahead_::first;.
Definition lexer.h:100
std::string str_
Definition lexer.h:101
@ Upper
Append accepted char via fe::utf8::toupper` to Lexer::str_.
Definition lexer.h:71
@ On
Append accepted char as is to Lexer::str_.
Definition lexer.h:69
@ Lower
Append accepted char via fe::utf8::tolower` to Lexer::str_.
Definition lexer.h:70
@ Off
Do not append accepted char to Lexer::str_.
Definition lexer.h:68
std::istream & istream_
Definition lexer.h:97
bool accept(Pred pred)
Definition lexer.h:77
bool accept(char32_t c)
Definition lexer.h:91
A ring buffer with N elements.
Definition ring.h:15
static constexpr char32_t BOM
Byte Order Mark.
Definition utf8.h:18
char32_t tolower(char32_t c) noexcept
Definition utf8.h:138
char32_t decode(std::istream &is)
Decodes the next UTF-8 sequence from is into a single char32_t.
Definition utf8.h:64
char32_t toupper(char32_t c) noexcept
Definition utf8.h:139
static constexpr char32_t EoF
End of stream sentinel returned by decode.
Definition utf8.h:20
Definition arena.h:13
Location in a File.
Definition loc.h:35
Position in a source file; pass around as value.
Definition loc.h:10
uint16_t col
Definition loc.h:23