FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
sym.h
Go to the documentation of this file.
1#pragma once
2
3#include <cassert>
4#include <cstring>
5
6#include <bit>
7#include <iostream>
8#include <string>
9
10#ifdef FE_ABSL
11# include <absl/container/flat_hash_map.h>
12# include <absl/container/flat_hash_set.h>
13#else
14# include <unordered_map>
15# include <unordered_set>
16#endif
17
18#include "fe/arena.h"
19
20namespace fe {
21
22/// A Sym%bol just wraps a pointer to Sym::String, so pass Sym itself around as value.
23/// Sym is compatible with:
24/// * recommended: `std::string_view` (via Sym::view)
25/// * null-terminated C-strings (via Sym::c_str)
26///
27/// This means that retrieving a `std::string_view` or a null-terminated C-string is basically free.
28/// You can also obtain a `std::string` (via Sym::str), but this involves a copy.
29/// With the exception of the empty string, you should only create Sym%bols via SymPool::sym.
30/// This in turn will toss all Sym%bols into a big hash set.
31/// This makes Sym::operator== and Sym::operator!= an O(1) operation.
32/// The empty string is internally handled as `nullptr`.
33/// Thus, you can create a Sym%bol representing an empty string without having access to the SymPool.
34/// @note The empty `std::string`/`std::string_view`, `nullptr`, and `"\0"` are all identified as Sym::Sym().
35/// @warning Big endian version has not been tested.
36class Sym {
37public:
38 static constexpr size_t Short_String_Bytes = sizeof(uintptr_t);
39 static constexpr size_t Short_String_Mask = Short_String_Bytes - 1;
40
41 struct String {
42 constexpr String() noexcept = default;
43 constexpr String(size_t size) noexcept
44 : size(size) {}
45
46 size_t size = 0;
47 char chars[]; // This is actually a C-only feature, but all C++ compilers support that anyway.
48
49 struct Equal {
50 constexpr bool operator()(const String* s1, const String* s2) const noexcept {
51 bool res = s1->size == s2->size;
52 for (size_t i = 0, e = s1->size; res && i != e; ++i)
53 res &= s1->chars[i] == s2->chars[i];
54 return res;
55 }
56 };
57
58 struct Hash {
59 size_t operator()(const String* s) const noexcept {
60 return std::hash<std::string_view>()(std::string_view(s->chars, s->size));
61 }
62 };
63
64#ifdef FE_ABSL
65 template<class H>
66 friend constexpr H AbslHashValue(H h, const String* string) noexcept {
67 return H::combine(std::move(h), std::string_view(string->chars, string->size));
68 }
69#endif
70 };
71
72 static_assert(sizeof(String) == sizeof(size_t), "String.chars should be 0");
73
74private:
75 constexpr Sym(uintptr_t ptr) noexcept
76 : ptr_(ptr) {}
77
78public:
79 constexpr Sym() noexcept = default;
80
81 /// @name Getters
82 ///@{
83 [[nodiscard]] constexpr bool empty() const noexcept { return ptr_ == 0; }
84 [[nodiscard]] constexpr size_t size() const noexcept {
85 if (empty()) return 0;
86 if (auto size = ptr_ & Short_String_Mask) return size;
87 return ((const String*)ptr_)->size;
88 }
89 ///@}
90
91 /// @name Access
92 ///@{
93 constexpr char operator[](size_t i) const noexcept {
94 assert(i < size());
95 return c_str()[i];
96 }
97 constexpr char front() const noexcept { return (*this)[0]; }
98 constexpr char back() const noexcept { return (*this)[size() - 1]; }
99 ///@}
100
101 /// @name Iterators
102 ///@{
103 constexpr auto begin() const noexcept { return c_str(); }
104 constexpr auto end() const noexcept { return c_str() + size(); }
105 constexpr auto cbegin() const noexcept { return begin(); }
106 constexpr auto cend() const noexcept { return end(); }
107 constexpr auto rbegin() const noexcept { return std::reverse_iterator(end()); }
108 constexpr auto rend() const noexcept { return std::reverse_iterator(begin()); }
109 constexpr auto crbegin() const noexcept { return rbegin(); }
110 constexpr auto crend() const noexcept { return rend(); }
111 ///@}
112
113 /// @name Comparison: Sym w/ Sym
114 ///@{
115 friend constexpr auto operator<=>(Sym s1, Sym s2) noexcept { return s1.view() <=> s2.view(); }
116 friend constexpr bool operator==(Sym s1, Sym s2) noexcept { return s1.ptr_ == s2.ptr_; }
117 ///@}
118
119 /// @name Comparison: Sym w/ char
120 ///@{
121 friend constexpr std::strong_ordering operator<=>(Sym s, char c) noexcept { return cmp<false>(s, c); }
122 friend constexpr std::strong_ordering operator<=>(char c, Sym s) noexcept { return cmp<true>(s, c); }
123 friend constexpr bool operator==(Sym s, char c) noexcept { return (s.size() == 1) && (s[0] == c); }
124 friend constexpr bool operator==(char c, Sym s) noexcept { return (s.size() == 1) && (s[0] == c); }
125 ///@}
126
127 /// @name Comparison: Sym w/ convertible to std::string_view
128 ///@{
129 template<typename T>
130 requires std::is_convertible_v<T, std::string_view>
131 friend constexpr auto operator<=>(Sym lhs, const T& rhs) noexcept {
132 return lhs.view() <=> std::string_view(rhs);
133 }
134 template<typename T>
135 requires std::is_convertible_v<T, std::string_view>
136 friend constexpr auto operator<=>(const T& lhs, Sym rhs) noexcept {
137 return std::string_view(lhs) <=> rhs.view();
138 }
139
140 template<typename T>
141 requires std::is_convertible_v<T, std::string_view>
142 friend constexpr bool operator==(Sym lhs, const T& rhs) noexcept {
143 return lhs.view() == std::string_view(rhs);
144 }
145
146 template<typename T>
147 requires std::is_convertible_v<T, std::string_view>
148 friend constexpr bool operator==(const T& lhs, Sym rhs) noexcept {
149 return std::string_view(lhs) == rhs.view();
150 }
151 ///@}
152
153 /// @name Conversions
154 ///@{
155 [[nodiscard]] constexpr const char* c_str() const noexcept { return view().data(); }
156
157 [[nodiscard]] constexpr std::string_view view() const noexcept {
158 if (empty()) return {std::bit_cast<const char*>(&ptr_), 0};
159 // Little endian: 2 a b 0 register: 0ba2
160 // Big endian: a b 0 2 register: ab02
161 uintptr_t offset = std::endian::native == std::endian::little ? 1 : 0;
162 if (auto size = ptr_ & Short_String_Mask) return {std::bit_cast<const char*>(&ptr_) + offset, size};
163 auto S = std::bit_cast<const String*>(ptr_);
164 return std::string_view(S->chars, S->size);
165 }
166 constexpr operator std::string_view() const noexcept { return view(); }
167 constexpr std::string_view operator*() const noexcept { return view(); }
168 // Unfortunately, this doesn't work:
169 // std::string_view operator->() const { return view(); }
170
171 constexpr std::string str() const noexcept { return std::string(view()); } ///< This involves a copy.
172 constexpr explicit operator std::string() const noexcept { return str(); } ///< `explicit` as this involves a copy.
173 constexpr explicit operator bool() const noexcept { return ptr_; } ///< Is not empty?
174 ///@}
175
176#ifdef FE_ABSL
177 template<class H>
178 friend constexpr H AbslHashValue(H h, Sym sym) noexcept {
179 return H::combine(std::move(h), sym.ptr_);
180 }
181#endif
182 friend struct ::std::hash<fe::Sym>;
183 friend std::ostream& operator<<(std::ostream& o, Sym sym) { return o << sym.view(); }
184
185 /// @name Heterogeneous lookups for hash tables.
186 ///@{
187 struct Hash {
188 using is_transparent = void;
189 size_t operator()(Sym s) const noexcept { return std::hash<uintptr_t>()(s.ptr_); }
190 size_t operator()(std::string_view v) const noexcept { return std::hash<std::string_view>()(v); }
191 };
192
193 struct Eq {
194 using is_transparent = void;
195 bool operator()(Sym a, Sym b) const noexcept { return a.ptr_ == b.ptr_; }
196 bool operator()(Sym a, std::string_view b) const noexcept { return a.view() == b; }
197 bool operator()(std::string_view a, Sym b) const noexcept { return a == b.view(); }
198 };
199 ///@}
200
201private:
202 template<bool Rev>
203 static constexpr std::strong_ordering cmp(Sym s, char c) noexcept {
204 const auto n = s.size();
205 if (n == 0) return Rev ? std::strong_ordering::greater : std::strong_ordering::less;
206
207 auto cmp = s[0] <=> c;
208 if (cmp != 0) return cmp;
209
210 return (n == 1) ? std::strong_ordering::equal
211 : (Rev ? std::strong_ordering::less : std::strong_ordering::greater);
212 }
213
214 // Little endian: 2 a b 0 register: 0ba2
215 // Big endian: a b 0 2 register: ab02
216 uintptr_t ptr_ = 0;
217
218 friend class SymPool;
219};
220
221#ifndef DOXYGEN
222} // namespace fe
223
224template<>
225struct std::hash<fe::Sym> {
226 size_t operator()(fe::Sym sym) const noexcept { return std::hash<uintptr_t>()(sym.ptr_); }
227};
228
229namespace fe {
230#endif
231
232/// @name SymMap/SymSet
233/// Set/Map is keyed by pointer - which is hashed in SymPool.
234///@{
235///
236#ifdef FE_ABSL
237template<class V>
238using SymMap = absl::flat_hash_map<Sym, V, Sym::Hash, Sym::Eq>;
239using SymSet = absl::flat_hash_set<Sym, Sym::Hash, Sym::Eq>;
240#else
241template<class V>
242using SymMap = std::unordered_map<Sym, V, Sym::Hash, Sym::Eq>;
243using SymSet = std::unordered_set<Sym, Sym::Hash, Sym::Eq>;
244#endif
245///@}
246
247/// Hash set where all strings - wrapped in Sym%bol - live in.
248/// You can access the SymPool from Driver.
249class SymPool {
250public:
252
253 /// @name Constructor & Destruction
254 ///@{
255 SymPool(const SymPool&) = delete;
256#ifdef FE_ABSL
257 SymPool() noexcept {}
258#else
259 SymPool() noexcept
260 : pool_(container_.allocator<const String*>()) {}
261#endif
262 SymPool(SymPool&& other) noexcept
263 : SymPool() {
264 swap(*this, other);
265 }
267 ///@}
268
269 /// @name sym
270 ///@{
271 Sym sym(std::string_view s) {
272 if (s.empty()) return Sym();
273 auto size = s.size();
274
275 if (size <= Sym::Short_String_Bytes - 2) { // small string: need two more bytes for `\0' and size
276 uintptr_t ptr = size;
277 // Little endian: 2 a b 0 register: 0ba2
278 // Big endian: a b 0 2 register: ab02
279 if constexpr (std::endian::native == std::endian::little)
280 for (uintptr_t i = 0, shift = 8; i != size; ++i, shift += 8)
281 ptr |= (uintptr_t(s[i]) << shift);
282 else
283 for (uintptr_t i = 0, shift = (Sym::Short_String_Bytes - 1) * 8; i != size; ++i, shift -= 8)
284 ptr |= (uintptr_t(s[i]) << shift);
285 return Sym(ptr);
286 }
287
288 auto state = strings_.state();
289 auto ptr = (String*)strings_.allocate(sizeof(String) + s.size() + 1 /*'\0'*/, Sym::Short_String_Bytes);
290 new (ptr) String(s.size());
291 *std::copy(s.begin(), s.end(), ptr->chars) = '\0';
292 auto [i, ins] = pool_.emplace(ptr);
293 if (ins) return Sym(std::bit_cast<uintptr_t>(ptr));
294 strings_.deallocate(state);
295 return Sym(std::bit_cast<uintptr_t>(*i));
296 }
297 Sym sym(const std::string& s) { return sym((std::string_view)s); }
298 /// @p s is a null-terminated C-string.
299 constexpr Sym sym(const char* s) { return s == nullptr ? Sym() : sym(std::string_view(s)); }
300 // TODO we can try to fit s in current page and hence eliminate the explicit use of strlen
301 ///@}
302
303 friend void swap(SymPool& p1, SymPool& p2) noexcept {
304 using std::swap;
305 // clang-format off
306 swap(p1.strings_, p2.strings_ );
307#ifndef FE_ABSL
308 swap(p1.container_, p2.container_);
309#endif
310 swap(p1.pool_, p2.pool_ );
311 // clang-format on
312 }
313
314private:
315 Arena strings_;
316#ifdef FE_ABSL
317 absl::flat_hash_set<const String*, absl::Hash<const String*>, String::Equal> pool_;
318#else
319 Arena container_;
320 std::unordered_set<const String*, String::Hash, String::Equal, Arena::Allocator<const String*>> pool_;
321#endif
322};
323
324static_assert(std::is_trivially_copyable_v<Sym>);
325static_assert(sizeof(uintptr_t) == sizeof(void*), "uintptr_t must match pointer size");
326static_assert(std::has_unique_object_representations_v<uintptr_t>);
327static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big,
328 "mixed endianess not supported");
329
330} // namespace fe
An arena pre-allocates so-called pages of size Arena::page_size_.
Definition arena.h:18
constexpr void deallocate(size_t num_bytes) noexcept
Removes num_bytes again.
Definition arena.h:138
constexpr void * allocate(size_t num_bytes, size_t align)
Get n bytes of fresh memory.
Definition arena.h:105
State state() const noexcept
Definition arena.h:139
Hash set where all strings - wrapped in Symbol - live in.
Definition sym.h:249
Sym::String String
Definition sym.h:251
Sym sym(std::string_view s)
Definition sym.h:271
SymPool & operator=(SymPool)=delete
SymPool(const SymPool &)=delete
Sym sym(const std::string &s)
Definition sym.h:297
SymPool(SymPool &&other) noexcept
Definition sym.h:262
SymPool() noexcept
Definition sym.h:259
constexpr Sym sym(const char *s)
s is a null-terminated C-string.
Definition sym.h:299
friend void swap(SymPool &p1, SymPool &p2) noexcept
Definition sym.h:303
A Symbol just wraps a pointer to Sym::String, so pass Sym itself around as value.
Definition sym.h:36
friend constexpr std::strong_ordering operator<=>(char c, Sym s) noexcept
Definition sym.h:122
friend constexpr auto operator<=>(Sym s1, Sym s2) noexcept
Definition sym.h:115
constexpr auto rend() const noexcept
Definition sym.h:108
constexpr auto begin() const noexcept
Definition sym.h:103
constexpr char front() const noexcept
Definition sym.h:97
constexpr std::string_view operator*() const noexcept
Definition sym.h:167
constexpr bool empty() const noexcept
Definition sym.h:83
static constexpr size_t Short_String_Mask
Definition sym.h:39
static constexpr size_t Short_String_Bytes
Definition sym.h:38
constexpr auto cend() const noexcept
Definition sym.h:106
friend constexpr bool operator==(char c, Sym s) noexcept
Definition sym.h:124
constexpr size_t size() const noexcept
Definition sym.h:84
constexpr Sym() noexcept=default
constexpr char back() const noexcept
Definition sym.h:98
friend constexpr auto operator<=>(Sym lhs, const T &rhs) noexcept
Definition sym.h:131
friend std::ostream & operator<<(std::ostream &o, Sym sym)
Definition sym.h:183
friend constexpr bool operator==(Sym s1, Sym s2) noexcept
Definition sym.h:116
friend constexpr bool operator==(const T &lhs, Sym rhs) noexcept
Definition sym.h:148
constexpr auto crbegin() const noexcept
Definition sym.h:109
constexpr char operator[](size_t i) const noexcept
Definition sym.h:93
friend constexpr bool operator==(Sym s, char c) noexcept
Definition sym.h:123
constexpr auto crend() const noexcept
Definition sym.h:110
constexpr auto rbegin() const noexcept
Definition sym.h:107
constexpr std::string str() const noexcept
This involves a copy.
Definition sym.h:171
constexpr const char * c_str() const noexcept
Definition sym.h:155
friend constexpr auto operator<=>(const T &lhs, Sym rhs) noexcept
Definition sym.h:136
constexpr auto cbegin() const noexcept
Definition sym.h:105
constexpr auto end() const noexcept
Definition sym.h:104
friend constexpr bool operator==(Sym lhs, const T &rhs) noexcept
Definition sym.h:142
constexpr std::string_view view() const noexcept
Definition sym.h:157
friend constexpr std::strong_ordering operator<=>(Sym s, char c) noexcept
Definition sym.h:121
Definition arena.h:10
std::unordered_map< Sym, V, Sym::Hash, Sym::Eq > SymMap
Definition sym.h:242
std::unordered_set< Sym, Sym::Hash, Sym::Eq > SymSet
Definition sym.h:243
bool operator()(Sym a, Sym b) const noexcept
Definition sym.h:195
bool operator()(std::string_view a, Sym b) const noexcept
Definition sym.h:197
bool operator()(Sym a, std::string_view b) const noexcept
Definition sym.h:196
void is_transparent
Definition sym.h:194
size_t operator()(std::string_view v) const noexcept
Definition sym.h:190
void is_transparent
Definition sym.h:188
size_t operator()(Sym s) const noexcept
Definition sym.h:189
constexpr bool operator()(const String *s1, const String *s2) const noexcept
Definition sym.h:50
size_t operator()(const String *s) const noexcept
Definition sym.h:59
constexpr String() noexcept=default
char chars[]
Definition sym.h:47
size_t size
Definition sym.h:46