FE 0.9.2
Header-only C++ frontend library
Loading...
Searching...
No Matches
sym.h
Go to the documentation of this file.
1#pragma once
2
3#include <cassert>
4#include <cstring>
5
6#include <bit>
7#include <iostream>
8#include <string>
9
10#ifdef FE_ABSL
11# include <absl/container/flat_hash_map.h>
12# include <absl/container/flat_hash_set.h>
13#else
14# include <unordered_map>
15# include <unordered_set>
16#endif
17
18#include "fe/arena.h"
19
20namespace fe {
21
22/// A Sym%bol just wraps a pointer to Sym::String, so pass Sym itself around as value.
23/// Sym is compatible with:
24/// * recommended: `std::string_view` (via Sym::view)
25/// * null-terminated C-strings (via Sym::c_str)
26///
27/// This means that retrieving a `std::string_view` or a null-terminated C-string is basically free.
28/// You can also obtain a `std::string` (via Sym::str), but this involves a copy.
29/// With the exception of the empty string, you should only create Sym%bols via SymPool::sym.
30/// This in turn will toss all Sym%bols into a big hash set.
31/// This makes Sym::operator== and Sym::operator!= an O(1) operation.
32/// The empty string is internally handled as `nullptr`.
33/// Thus, you can create a Sym%bol representing an empty string without having access to the SymPool.
34/// @note The empty `std::string`/`std::string_view`, `nullptr`, and `"\0"` are all identified as Sym::Sym().
35/// @warning Big endian version has not been tested.
36class Sym {
37public:
38 static constexpr size_t Short_String_Bytes = sizeof(uintptr_t);
39 static constexpr size_t Short_String_Mask = Short_String_Bytes - 1;
40
41 struct String {
42 constexpr String() noexcept = default;
43 constexpr String(size_t size) noexcept
44 : size(size) {}
45
46 size_t size = 0;
47 char chars[]; // This is actually a C-only feature, but all C++ compilers support that anyway.
48
49 struct Equal {
50 constexpr bool operator()(const String* s1, const String* s2) const noexcept {
51 bool res = s1->size == s2->size;
52 for (size_t i = 0, e = s1->size; res && i != e; ++i)
53 res &= s1->chars[i] == s2->chars[i];
54 return res;
55 }
56 };
57
58 struct Hash {
59 size_t operator()(const String* s) const noexcept {
60 return std::hash<std::string_view>()(std::string_view(s->chars, s->size));
61 }
62 };
63
64#ifdef FE_ABSL
65 template<class H>
66 friend constexpr H AbslHashValue(H h, const String* string) noexcept {
67 return H::combine(std::move(h), std::string_view(string->chars, string->size));
68 }
69#endif
70 };
71
72 static_assert(sizeof(String) == sizeof(size_t), "String.chars should be 0");
73
74private:
75 constexpr Sym(uintptr_t ptr) noexcept
76 : ptr_(ptr) {}
77
78public:
79 constexpr Sym() noexcept = default;
80
81 /// @name Getters
82 ///@{
83 [[nodiscard]] constexpr bool empty() const noexcept { return ptr_ == 0; }
84 [[nodiscard]] constexpr size_t size() const noexcept {
85 if (empty()) return 0;
86 if (auto size = ptr_ & Short_String_Mask) return size;
87 return ((const String*)ptr_)->size;
88 }
89 ///@}
90
91 /// @name Access
92 ///@{
93 constexpr char operator[](size_t i) const noexcept {
94 assert(i < size());
95 return c_str()[i];
96 }
97 constexpr char front() const noexcept { return (*this)[0]; }
98 constexpr char back() const noexcept { return (*this)[size() - 1]; }
99 ///@}
100
101 /// @name Iterators
102 ///@{
103 constexpr auto begin() const noexcept { return c_str(); }
104 constexpr auto end() const noexcept { return c_str() + size(); }
105 constexpr auto cbegin() const noexcept { return begin(); }
106 constexpr auto cend() const noexcept { return end(); }
107 constexpr auto rbegin() const noexcept { return std::reverse_iterator(end()); }
108 constexpr auto rend() const noexcept { return std::reverse_iterator(begin()); }
109 constexpr auto crbegin() const noexcept { return rbegin(); }
110 constexpr auto crend() const noexcept { return rend(); }
111 ///@}
112
113 /// @name Comparison: Sym w/ Sym
114 ///@{
115 friend constexpr auto operator<=>(Sym s1, Sym s2) noexcept { return s1.view() <=> s2.view(); }
116 friend constexpr bool operator==(Sym s1, Sym s2) noexcept { return s1.ptr_ == s2.ptr_; }
117 ///@}
118
119 /// @name Comparison: Sym w/ char
120 ///@{
121 friend constexpr std::strong_ordering operator<=>(Sym s, char c) noexcept { return cmp<false>(s, c); }
122 friend constexpr std::strong_ordering operator<=>(char c, Sym s) noexcept { return cmp<true>(s, c); }
123 friend constexpr bool operator==(Sym s, char c) noexcept { return (s.size() == 1) && (s[0] == c); }
124 friend constexpr bool operator==(char c, Sym s) noexcept { return (s.size() == 1) && (s[0] == c); }
125 ///@}
126
127 /// @name Comparison: Sym w/ convertible to std::string_view
128 ///@{
129 template<typename T>
130 requires std::is_convertible_v<T, std::string_view>
131 friend constexpr auto operator<=>(Sym lhs, const T& rhs) noexcept {
132 return lhs.view() <=> std::string_view(rhs);
133 }
134 template<typename T>
135 requires std::is_convertible_v<T, std::string_view>
136 friend constexpr auto operator<=>(const T& lhs, Sym rhs) noexcept {
137 return std::string_view(lhs) <=> rhs.view();
138 }
139
140 template<typename T>
141 requires std::is_convertible_v<T, std::string_view>
142 friend constexpr bool operator==(Sym lhs, const T& rhs) noexcept {
143 return lhs.view() == std::string_view(rhs);
144 }
145
146 template<typename T>
147 requires std::is_convertible_v<T, std::string_view>
148 friend constexpr bool operator==(const T& lhs, Sym rhs) noexcept {
149 return std::string_view(lhs) == rhs.view();
150 }
151 ///@}
152
153 /// @name Conversions
154 ///@{
155 /// @warning For a *short* Sym%bol (small-string-optimized, see Sym::view) the returned pointer aliases this
156 /// Sym%bol's internal `ptr_`. It is therefore only valid as long as *this* Sym%bol object lives - it dangles
157 /// for a temporary (e.g. `pool.sym("ab").c_str()`). Use Sym::str if the string must outlive the Sym%bol.
158 [[nodiscard]] constexpr const char* c_str() const noexcept { return view().data(); }
159
160 /// @note For a short Sym%bol (size fits into Sym::Short_String_Bytes) the characters are stored inline within
161 /// this object's `ptr_`, so the returned view points into *this* Sym%bol and dangles once it dies; see Sym::c_str.
162 [[nodiscard]] constexpr std::string_view view() const noexcept {
163 if (empty()) return {std::bit_cast<const char*>(&ptr_), 0};
164 // Little endian: 2 a b 0 register: 0ba2
165 // Big endian: a b 0 2 register: ab02
166 uintptr_t offset = std::endian::native == std::endian::little ? 1 : 0;
167 if (auto size = ptr_ & Short_String_Mask) return {std::bit_cast<const char*>(&ptr_) + offset, size};
168 auto S = std::bit_cast<const String*>(ptr_);
169 return std::string_view(S->chars, S->size);
170 }
171 constexpr operator std::string_view() const noexcept { return view(); }
172 constexpr std::string_view operator*() const noexcept { return view(); }
173 // Unfortunately, this doesn't work:
174 // std::string_view operator->() const { return view(); }
175
176 constexpr std::string str() const noexcept { return std::string(view()); } ///< This involves a copy.
177 constexpr explicit operator std::string() const noexcept { return str(); } ///< `explicit` as this involves a copy.
178 constexpr explicit operator bool() const noexcept { return ptr_; } ///< Is not empty?
179 ///@}
180
181#ifdef FE_ABSL
182 template<class H>
183 friend constexpr H AbslHashValue(H h, Sym sym) noexcept {
184 return H::combine(std::move(h), sym.ptr_);
185 }
186#endif
187 friend struct ::std::hash<fe::Sym>;
188 friend std::ostream& operator<<(std::ostream& o, Sym sym) { return o << sym.view(); }
189
190 /// @name Heterogeneous lookups for hash tables.
191 ///@{
192 struct Hash {
193 using is_transparent = void;
194 size_t operator()(Sym s) const noexcept { return std::hash<uintptr_t>()(s.ptr_); }
195 size_t operator()(std::string_view v) const noexcept { return std::hash<std::string_view>()(v); }
196 };
197
198 struct Eq {
199 using is_transparent = void;
200 bool operator()(Sym a, Sym b) const noexcept { return a.ptr_ == b.ptr_; }
201 bool operator()(Sym a, std::string_view b) const noexcept { return a.view() == b; }
202 bool operator()(std::string_view a, Sym b) const noexcept { return a == b.view(); }
203 };
204 ///@}
205
206private:
207 template<bool Rev>
208 static constexpr std::strong_ordering cmp(Sym s, char c) noexcept {
209 const auto n = s.size();
210 if (n == 0) return Rev ? std::strong_ordering::greater : std::strong_ordering::less;
211
212 auto cmp = s[0] <=> c;
213 if (cmp != 0) return cmp;
214
215 return (n == 1) ? std::strong_ordering::equal
216 : (Rev ? std::strong_ordering::less : std::strong_ordering::greater);
217 }
218
219 // Little endian: 2 a b 0 register: 0ba2
220 // Big endian: a b 0 2 register: ab02
221 uintptr_t ptr_ = 0;
222
223 friend class SymPool;
224};
225
226#ifndef DOXYGEN
227} // namespace fe
228
229template<>
230struct std::hash<fe::Sym> {
231 size_t operator()(fe::Sym sym) const noexcept { return std::hash<uintptr_t>()(sym.ptr_); }
232};
233
234namespace fe {
235#endif
236
237/// @name SymMap/SymSet
238/// Set/Map is keyed by pointer - which is hashed in SymPool.
239///@{
240///
241#ifdef FE_ABSL
242template<class V>
243using SymMap = absl::flat_hash_map<Sym, V, Sym::Hash, Sym::Eq>;
244using SymSet = absl::flat_hash_set<Sym, Sym::Hash, Sym::Eq>;
245#else
246template<class V>
247using SymMap = std::unordered_map<Sym, V, Sym::Hash, Sym::Eq>;
248using SymSet = std::unordered_set<Sym, Sym::Hash, Sym::Eq>;
249#endif
250///@}
251
252/// Hash set where all strings - wrapped in Sym%bol - live in.
253/// You can access the SymPool from Driver.
254class SymPool {
255public:
257
258 /// @name Constructor & Destruction
259 ///@{
260 SymPool(const SymPool&) = delete;
261#ifdef FE_ABSL
262 SymPool() noexcept {}
263#else
264 SymPool() noexcept
265 : pool_(container_.allocator<const String*>()) {}
266#endif
267 SymPool(SymPool&& other) noexcept
268 : SymPool() {
269 swap(*this, other);
270 }
272 ///@}
273
274 /// @name sym
275 ///@{
276 Sym sym(std::string_view s) {
277 if (s.empty()) return Sym();
278 auto size = s.size();
279
280 if (size <= Sym::Short_String_Bytes - 2) { // small string: need two more bytes for `\0' and size
281 uintptr_t ptr = size;
282 // Little endian: 2 a b 0 register: 0ba2
283 // Big endian: a b 0 2 register: ab02
284 if constexpr (std::endian::native == std::endian::little)
285 for (uintptr_t i = 0, shift = 8; i != size; ++i, shift += 8)
286 ptr |= (uintptr_t(s[i]) << shift);
287 else
288 for (uintptr_t i = 0, shift = (Sym::Short_String_Bytes - 1) * 8; i != size; ++i, shift -= 8)
289 ptr |= (uintptr_t(s[i]) << shift);
290 return Sym(ptr);
291 }
292
293 auto state = strings_.state();
294 auto ptr = (String*)strings_.allocate(sizeof(String) + s.size() + 1 /*'\0'*/, Sym::Short_String_Bytes);
295 new (ptr) String(s.size());
296 *std::copy(s.begin(), s.end(), ptr->chars) = '\0';
297 auto [i, ins] = pool_.emplace(ptr);
298 if (ins) return Sym(std::bit_cast<uintptr_t>(ptr));
299 strings_.deallocate(state);
300 return Sym(std::bit_cast<uintptr_t>(*i));
301 }
302 Sym sym(const std::string& s) { return sym((std::string_view)s); }
303 /// @p s is a null-terminated C-string.
304 constexpr Sym sym(const char* s) { return s == nullptr ? Sym() : sym(std::string_view(s)); }
305 // TODO we can try to fit s in current page and hence eliminate the explicit use of strlen
306 ///@}
307
308 friend void swap(SymPool& p1, SymPool& p2) noexcept {
309 using std::swap;
310 // clang-format off
311 swap(p1.strings_, p2.strings_ );
312#ifndef FE_ABSL
313 swap(p1.container_, p2.container_);
314#endif
315 swap(p1.pool_, p2.pool_ );
316 // clang-format on
317 }
318
319private:
320 Arena strings_;
321#ifdef FE_ABSL
322 absl::flat_hash_set<const String*, absl::Hash<const String*>, String::Equal> pool_;
323#else
324 Arena container_;
325 std::unordered_set<const String*, String::Hash, String::Equal, Arena::Allocator<const String*>> pool_;
326#endif
327};
328
329static_assert(std::is_trivially_copyable_v<Sym>);
330static_assert(sizeof(uintptr_t) == sizeof(void*), "uintptr_t must match pointer size");
331static_assert(std::has_unique_object_representations_v<uintptr_t>);
332static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big,
333 "mixed endianness not supported");
334
335} // namespace fe
An arena pre-allocates so-called pages of size Arena::page_size_.
Definition arena.h:21
Sym::String String
Definition sym.h:256
Sym sym(std::string_view s)
Definition sym.h:276
SymPool & operator=(SymPool)=delete
SymPool(const SymPool &)=delete
Sym sym(const std::string &s)
Definition sym.h:302
SymPool(SymPool &&other) noexcept
Definition sym.h:267
SymPool() noexcept
Definition sym.h:264
constexpr Sym sym(const char *s)
s is a null-terminated C-string.
Definition sym.h:304
friend void swap(SymPool &p1, SymPool &p2) noexcept
Definition sym.h:308
A Symbol just wraps a pointer to Sym::String, so pass Sym itself around as value.
Definition sym.h:36
friend constexpr std::strong_ordering operator<=>(char c, Sym s) noexcept
Definition sym.h:122
friend constexpr auto operator<=>(Sym lhs, const T &rhs) noexcept
Definition sym.h:131
friend constexpr auto operator<=>(Sym s1, Sym s2) noexcept
Definition sym.h:115
constexpr auto rend() const noexcept
Definition sym.h:108
constexpr auto begin() const noexcept
Definition sym.h:103
constexpr char front() const noexcept
Definition sym.h:97
constexpr std::string_view operator*() const noexcept
Definition sym.h:172
constexpr bool empty() const noexcept
Definition sym.h:83
static constexpr size_t Short_String_Mask
Definition sym.h:39
static constexpr size_t Short_String_Bytes
Definition sym.h:38
constexpr auto cend() const noexcept
Definition sym.h:106
friend constexpr bool operator==(char c, Sym s) noexcept
Definition sym.h:124
constexpr size_t size() const noexcept
Definition sym.h:84
constexpr Sym() noexcept=default
constexpr char back() const noexcept
Definition sym.h:98
friend std::ostream & operator<<(std::ostream &o, Sym sym)
Definition sym.h:188
friend constexpr bool operator==(Sym s1, Sym s2) noexcept
Definition sym.h:116
friend class SymPool
Definition sym.h:223
constexpr auto crbegin() const noexcept
Definition sym.h:109
constexpr char operator[](size_t i) const noexcept
Definition sym.h:93
friend constexpr bool operator==(Sym s, char c) noexcept
Definition sym.h:123
friend constexpr bool operator==(Sym lhs, const T &rhs) noexcept
Definition sym.h:142
constexpr auto crend() const noexcept
Definition sym.h:110
constexpr auto rbegin() const noexcept
Definition sym.h:107
constexpr std::string str() const noexcept
This involves a copy.
Definition sym.h:176
friend constexpr bool operator==(const T &lhs, Sym rhs) noexcept
Definition sym.h:148
friend constexpr auto operator<=>(const T &lhs, Sym rhs) noexcept
Definition sym.h:136
constexpr const char * c_str() const noexcept
Definition sym.h:158
constexpr auto cbegin() const noexcept
Definition sym.h:105
constexpr operator std::string() const noexcept
explicit as this involves a copy.
Definition sym.h:177
constexpr auto end() const noexcept
Definition sym.h:104
constexpr std::string_view view() const noexcept
Definition sym.h:162
friend constexpr std::strong_ordering operator<=>(Sym s, char c) noexcept
Definition sym.h:121
Definition arena.h:13
std::unordered_map< Sym, V, Sym::Hash, Sym::Eq > SymMap
Definition sym.h:247
std::unordered_set< Sym, Sym::Hash, Sym::Eq > SymSet
Definition sym.h:248
bool operator()(Sym a, Sym b) const noexcept
Definition sym.h:200
bool operator()(std::string_view a, Sym b) const noexcept
Definition sym.h:202
bool operator()(Sym a, std::string_view b) const noexcept
Definition sym.h:201
void is_transparent
Definition sym.h:199
size_t operator()(std::string_view v) const noexcept
Definition sym.h:195
void is_transparent
Definition sym.h:193
size_t operator()(Sym s) const noexcept
Definition sym.h:194
constexpr bool operator()(const String *s1, const String *s2) const noexcept
Definition sym.h:50
size_t operator()(const String *s) const noexcept
Definition sym.h:59
constexpr String() noexcept=default
char chars[]
Definition sym.h:47
size_t size
Definition sym.h:46