FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
lexer.h
Go to the documentation of this file.
1#pragma once
2
3#include <filesystem>
4#include <istream>
5
6#include "fe/loc.h"
7#include "fe/ring.h"
8#include "fe/utf8.h"
9
10namespace fe {
11
12/// The blueprint for a lexer with a buffer of @p K tokens to peek into the future (Lexer::ahead).
13/// You can "overide" Lexer::next via CRTP (@p S is the child).
14template<size_t K, class S>
15class Lexer {
16private:
17 S& self() { return *static_cast<S*>(this); }
18 const S& self() const { return *static_cast<const S*>(this); }
19
20public:
21 Lexer(std::istream& istream, const std::filesystem::path* path = nullptr)
22 : istream_(istream)
23 , loc_(path, {0, 0})
24 , peek_(1, 1) {
25 for (size_t i = 0; i != K; ++i)
27 accept(utf8::BOM); // eat UTF-8 BOM, if present
28 assert(peek_.col == 1);
29 }
30
31protected:
32 char32_t ahead(size_t i = 0) const { return ahead_[i]; }
33
34 /// Invoke before assembling the next token.
35 void start() {
37 str_.clear();
38 }
39
40 /// Get next `char32_t` in Lexer::istream_ and increase Lexer::loc_.
41 /// @returns Null on an invalid UTF-8 sequence.
42 char32_t next() {
44 auto res = ahead_.put(utf8::decode(istream_));
45 auto c = ahead_.front(); // char of the peek location
46
47 if (c == '\n') {
48 ++peek_.row;
49 peek_.col = 0;
50 } else if (c == utf8::EoF || c == utf8::BOM) {
51 /* do nothing */
52 } else {
53 ++peek_.col;
54 }
55
56 return res;
57 }
58
59 /// @name Accept
60 /// Accept next character in Lexer::istream_, depending on some condition.
61 ///@{
62 /// What should happend to the accepted char?
63 /// Normalize identifiers via Append::Lower or Append::Upper for case-insensitive languages like FORTRAN or SQL.
64 enum class Append {
65 Off, ///< Do not append accepted char to Lexer::str_.
66 On, ///< Append accepted char as is to Lexer::str_.
67 Lower, ///< Append accepted char via fe::utf8::tolower` to Lexer::str_.
68 Upper, ///< Append accepted char via fe::utf8::toupper` to Lexer::str_.
69 };
70
71 /// @returns `true` if @p pred holds.
72 /// In this case invoke Lexer::next() and append to Lexer::str_, if @p append.
73 template<Append append = Append::On, class Pred>
74 bool accept(Pred pred) {
75 if (pred(ahead())) {
76 auto c = self().next();
77 if constexpr (append != Append::Off) {
78 if constexpr (append == Append::Lower) c = fe::utf8::tolower(c);
79 if constexpr (append == Append::Upper) c = fe::utf8::toupper(c);
80 str_ += c;
81 }
82 return true;
83 }
84 return false;
85 }
86
87 // clang-format off
88 template<Append append = Append::On> bool accept(char32_t c) { return accept<append>([c](char32_t d) { return c == d; }); }
89 template<Append append = Append::On> bool accept(char c) { return accept<append>((char32_t)c); }
90 template<Append append = Append::On> bool accept(char8_t c) { return accept<append>((char32_t)c); }
91 // clang-format on
92 ///@}
93
94 std::istream& istream_;
96 Loc loc_; ///< Loc%ation of the token we are currently constructing within Lexer::str_,
97 Pos peek_; ///< Pos%ition of ahead_::first;
98 std::string str_;
99};
100
101} // namespace fe
The blueprint for a lexer with a buffer of K tokens to peek into the future (Lexer::ahead).
Definition lexer.h:15
bool accept(char8_t c)
Definition lexer.h:90
char32_t next()
Get next char32_t in Lexer::istream_ and increase Lexer::loc_.
Definition lexer.h:42
void start()
Invoke before assembling the next token.
Definition lexer.h:35
Lexer(std::istream &istream, const std::filesystem::path *path=nullptr)
Definition lexer.h:21
Loc loc_
Location of the token we are currently constructing within Lexer::str_,.
Definition lexer.h:96
bool accept(char c)
Definition lexer.h:89
Ring< char32_t, K > ahead_
Definition lexer.h:95
char32_t ahead(size_t i=0) const
Definition lexer.h:32
Pos peek_
Position of ahead_::first;.
Definition lexer.h:97
std::string str_
Definition lexer.h:98
@ Upper
Append accepted char via fe::utf8::toupper` to Lexer::str_.
@ On
Append accepted char as is to Lexer::str_.
@ Lower
Append accepted char via fe::utf8::tolower` to Lexer::str_.
@ Off
Do not append accepted char to Lexer::str_.
std::istream & istream_
Definition lexer.h:94
bool accept(Pred pred)
Definition lexer.h:74
bool accept(char32_t c)
Definition lexer.h:88
A ring buffer with N elements.
Definition ring.h:15
T put(T item)
Puts item into buffer.
Definition ring.h:49
T & front()
Definition ring.h:31
static constexpr char32_t BOM
Byte Order Mark.
Definition utf8.h:13
char32_t decode(std::istream &is)
Decodes the next sequence of bytes from is as UTF-32.
Definition utf8.h:41
char32_t tolower(char32_t c)
Definition utf8.h:112
char32_t toupper(char32_t c)
Definition utf8.h:113
static constexpr char32_t EoF
End of File.
Definition utf8.h:14
Definition arena.h:10
Location in a File.
Definition loc.h:33
Pos finis
It's called finis because it refers to the last character within this Location.
Definition loc.h:57
Pos begin
Definition loc.h:56
Position in a source file; pass around as value.
Definition loc.h:10
uint16_t col
Definition loc.h:23
uint16_t row
Definition loc.h:22