FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
sym.h
Go to the documentation of this file.
1#pragma once
2
3#include <cassert>
4#include <cstring>
5
6#include <bit>
7#include <iostream>
8#include <string>
9
10#ifdef FE_ABSL
11# include <absl/container/flat_hash_map.h>
12# include <absl/container/flat_hash_set.h>
13#else
14# include <unordered_map>
15# include <unordered_set>
16#endif
17
18#include "fe/arena.h"
19
20static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big,
21 "mixed endianess not supported");
22
23namespace fe {
24
25/// A Sym%bol just wraps a pointer to Sym::String, so pass Sym itself around as value.
26/// Sym is compatible with:
27/// * recommended: `std::string_view` (via Sym::view)
28/// * null-terminated C-strings (via Sym::c_str)
29///
30/// This means that retrieving a `std::string_view` or a null-terminated C-string is basically free.
31/// You can also obtain a `std::string` (via Sym::str), but this involves a copy.
32/// With the exception of the empty string, you should only create Sym%bols via SymPool::sym.
33/// This in turn will toss all Sym%bols into a big hash set.
34/// This makes Sym::operator== and Sym::operator!= an O(1) operation.
35/// The empty string is internally handled as `nullptr`.
36/// Thus, you can create a Sym%bol representing an empty string without having access to the SymPool.
37/// @note The empty `std::string`/`std::string_view`, `nullptr`, and `"\0"` are all identified as Sym::Sym().
38/// @warning Big endian version has not been tested.
39class Sym {
40public:
41 static constexpr size_t Short_String_Bytes = sizeof(uintptr_t);
42 static constexpr size_t Short_String_Mask = Short_String_Bytes - 1;
43
44 struct String {
45 constexpr String() noexcept = default;
46 constexpr String(size_t size) noexcept
47 : size(size) {}
48
49 size_t size = 0;
50 char chars[]; // This is actually a C-only features, but all C++ compilers support that anyway.
51
52 struct Equal {
53 constexpr bool operator()(const String* s1, const String* s2) const noexcept {
54 bool res = s1->size == s2->size;
55 for (size_t i = 0, e = s1->size; res && i != e; ++i) res &= s1->chars[i] == s2->chars[i];
56 return res;
57 }
58 };
59
60 struct Hash {
61 size_t operator()(const String* s) const noexcept {
62 return std::hash<std::string_view>()(std::string_view(s->chars, s->size));
63 }
64 };
65
66#ifdef FE_ABSL
67 template<class H> friend constexpr H AbslHashValue(H h, const String* string) noexcept {
68 return H::combine(std::move(h), std::string_view(string->chars, string->size));
69 }
70#endif
71 };
72
73 static_assert(sizeof(String) == sizeof(size_t), "String.chars should be 0");
74
75private:
76 constexpr Sym(uintptr_t ptr) noexcept
77 : ptr_(ptr) {}
78
79public:
80 constexpr Sym() noexcept = default;
81
82 /// @name Getters
83 ///@{
84 constexpr bool empty() const noexcept { return ptr_ == 0; }
85 constexpr size_t size() const noexcept {
86 if (empty()) return 0;
87 if (auto size = ptr_ & Short_String_Mask) return size;
88 return ((const String*)ptr_)->size;
89 }
90 ///@}
91
92 /// @name Access
93 ///@{
94 constexpr char operator[](size_t i) const noexcept {
95 assert(i < size());
96 return c_str()[i];
97 }
98 constexpr char front() const noexcept { return (*this)[0]; }
99 constexpr char back() const noexcept { return (*this)[size() - 1]; }
100 ///@}
101
102 /// @name Iterators
103 ///@{
104 constexpr auto begin() const noexcept { return c_str(); }
105 constexpr auto end() const noexcept { return c_str() + size(); }
106 constexpr auto cbegin() const noexcept { return begin(); }
107 constexpr auto cend() const noexcept { return end(); }
108 constexpr auto rbegin() const noexcept { return std::reverse_iterator(end()); }
109 constexpr auto rend() const noexcept { return std::reverse_iterator(begin()); }
110 constexpr auto crbegin() const noexcept { return rbegin(); }
111 constexpr auto crend() const noexcept { return rend(); }
112 ///@}
113
114 /// @name Comparisons
115 ///@{
116 constexpr auto operator<=>(Sym other) const noexcept { return this->view() <=> other.view(); }
117 constexpr bool operator==(Sym other) const noexcept { return this->ptr_ == other.ptr_; }
118 constexpr bool operator!=(Sym other) const noexcept { return this->ptr_ != other.ptr_; }
119 constexpr auto operator<=>(char c) const noexcept {
120 auto s = size();
121 if (s == 0) return std::strong_ordering::less;
122 auto cmp = (*this)[0] <=> c;
123 if (s == 1) return cmp;
124 return cmp == 0 ? std::strong_ordering::greater : cmp;
125 }
126 constexpr auto operator==(char c) const noexcept { return (*this) <=> c == 0; }
127 constexpr auto operator!=(char c) const noexcept { return (*this) <=> c != 0; }
128 ///@}
129
130 /// @name Conversions
131 ///@{
132 constexpr const char* c_str() const noexcept { return view().data(); }
133 constexpr operator const char*() const noexcept { return c_str(); }
134
135 constexpr std::string_view view() const noexcept {
136 if (empty()) return {(const char*)&ptr_, 0};
137 // Little endian: 2 a b 0 register: 0ba2
138 // Big endian: a b 0 2 register: ab02
139 uintptr_t offset = std::endian::native == std::endian::little ? 1 : 0;
140 if (auto size = ptr_ & Short_String_Mask) return {(const char*)&ptr_ + offset, size};
141 return std::string_view(((const String*)ptr_)->chars, ((const String*)ptr_)->size);
142 }
143 constexpr operator std::string_view() const noexcept { return view(); }
144 constexpr std::string_view operator*() const noexcept { return view(); }
145 // Unfortunately, this doesn't work:
146 // std::string_view operator->() const { return view(); }
147
148 constexpr std::string str() const noexcept { return std::string(view()); } ///< This involves a copy.
149 constexpr explicit operator std::string() const noexcept { return str(); } ///< `explicit` as this involves a copy.
150 constexpr explicit operator bool() const noexcept { return ptr_; } ///< Is not empty?
151 ///@}
152
153#ifdef FE_ABSL
154 template<class H> friend constexpr H AbslHashValue(H h, Sym sym) noexcept {
155 return H::combine(std::move(h), sym.ptr_);
156 }
157#endif
158 friend struct ::std::hash<fe::Sym>;
159 friend std::ostream& operator<<(std::ostream& o, Sym sym) { return o << sym.view(); }
160
161private:
162 // Little endian: 2 a b 0 register: 0ba2
163 // Big endian: a b 0 2 register: ab02
164 uintptr_t ptr_ = 0;
165
166 friend class SymPool;
167};
168
169#ifndef DOXYGEN
170} // namespace fe
171
172template<> struct std::hash<fe::Sym> {
173 size_t operator()(fe::Sym sym) const noexcept { return std::hash<uintptr_t>()(sym.ptr_); }
174};
175
176namespace fe {
177#endif
178
179/// @name Sym
180/// Set/Map is keyed by pointer - which is hashed in SymPool.
181///@{
182#ifdef FE_ABSL
183template<class V> using SymMap = absl::flat_hash_map<Sym, V>;
184using SymSet = absl::flat_hash_set<Sym>;
185#else
186template<class V> using SymMap = std::unordered_map<Sym, V>;
187using SymSet = std::unordered_set<Sym>;
188#endif
189///@}
190
191/// Hash set where all strings - wrapped in Sym%bol - live in.
192/// You can access the SymPool from Driver.
193class SymPool {
194public:
196
197 /// @name Constructor & Destruction
198 ///@{
199 SymPool(const SymPool&) = delete;
200#ifdef FE_ABSL
201 SymPool() noexcept {}
202#else
203 SymPool() noexcept
204 : pool_(container_.allocator<const String*>()) {}
205#endif
206 SymPool(SymPool&& other) noexcept
207 : SymPool() {
208 swap(*this, other);
209 }
211 ///@}
212
213 /// @name sym
214 ///@{
215 Sym sym(std::string_view s) {
216 if (s.empty()) return Sym();
217 auto size = s.size();
218
219 if (size <= Sym::Short_String_Bytes - 2) { // small string: need two more bytes for `\0' and size
220 uintptr_t ptr = size;
221 // Little endian: 2 a b 0 register: 0ba2
222 // Big endian: a b 0 2 register: ab02
223 if constexpr (std::endian::native == std::endian::little)
224 for (uintptr_t i = 0, shift = 8; i != size; ++i, shift += 8) ptr |= (uintptr_t(s[i]) << shift);
225 else
226 for (uintptr_t i = 0, shift = (Sym::Short_String_Bytes - 1) * 8; i != size; ++i, shift -= 8)
227 ptr |= (uintptr_t(s[i]) << shift);
228 return Sym(ptr);
229 }
230
231 auto state = strings_.state();
232 auto ptr = (String*)strings_.allocate(sizeof(String) + s.size() + 1 /*'\0'*/, Sym::Short_String_Bytes);
233 new (ptr) String(s.size());
234 *std::copy(s.begin(), s.end(), ptr->chars) = '\0';
235 auto [i, ins] = pool_.emplace(ptr);
236 if (ins) return Sym((uintptr_t)ptr);
237 strings_.deallocate(state);
238 return Sym((uintptr_t)*i);
239 }
240 Sym sym(const std::string& s) { return sym((std::string_view)s); }
241 /// @p s is a null-terminated C-string.
242 constexpr Sym sym(const char* s) {
243 return s == nullptr || *s == '\0' ? Sym() : sym(std::string_view(s, strlen(s)));
244 }
245 // TODO we can try to fit s in current page and hence eliminate the explicit use of strlen
246 ///@}
247
248 friend void swap(SymPool& p1, SymPool& p2) noexcept {
249 using std::swap;
250 // clang-format off
251 swap(p1.strings_, p2.strings_ );
252#ifndef FE_ABSL
253 swap(p1.container_, p2.container_);
254#endif
255 swap(p1.pool_, p2.pool_ );
256 // clang-format on
257 }
258
259private:
260 Arena strings_;
261#ifdef FE_ABSL
262 absl::flat_hash_set<const String*, absl::Hash<const String*>, String::Equal> pool_;
263#else
264 Arena container_;
265 std::unordered_set<const String*, String::Hash, String::Equal, Arena::Allocator<const String*>> pool_;
266#endif
267};
268
269} // namespace fe
An arena pre-allocates so-called pages of size Arena::page_size_.
Definition arena.h:18
constexpr void deallocate(size_t num_bytes) noexcept
Removes num_bytes again.
Definition arena.h:124
constexpr void * allocate(size_t num_bytes, size_t align)
Get n bytes of fresh memory.
Definition arena.h:92
State state() const noexcept
Definition arena.h:125
Hash set where all strings - wrapped in Symbol - live in.
Definition sym.h:193
Sym::String String
Definition sym.h:195
Sym sym(std::string_view s)
Definition sym.h:215
SymPool & operator=(SymPool)=delete
SymPool(const SymPool &)=delete
Sym sym(const std::string &s)
Definition sym.h:240
SymPool(SymPool &&other) noexcept
Definition sym.h:206
SymPool() noexcept
Definition sym.h:203
constexpr Sym sym(const char *s)
s is a null-terminated C-string.
Definition sym.h:242
friend void swap(SymPool &p1, SymPool &p2) noexcept
Definition sym.h:248
A Symbol just wraps a pointer to Sym::String, so pass Sym itself around as value.
Definition sym.h:39
constexpr auto operator<=>(Sym other) const noexcept
Definition sym.h:116
constexpr auto rend() const noexcept
Definition sym.h:109
constexpr auto begin() const noexcept
Definition sym.h:104
constexpr char front() const noexcept
Definition sym.h:98
constexpr std::string_view operator*() const noexcept
Definition sym.h:144
constexpr bool empty() const noexcept
Definition sym.h:84
static constexpr size_t Short_String_Mask
Definition sym.h:42
static constexpr size_t Short_String_Bytes
Definition sym.h:41
constexpr auto cend() const noexcept
Definition sym.h:107
constexpr size_t size() const noexcept
Definition sym.h:85
constexpr Sym() noexcept=default
constexpr char back() const noexcept
Definition sym.h:99
friend std::ostream & operator<<(std::ostream &o, Sym sym)
Definition sym.h:159
constexpr bool operator!=(Sym other) const noexcept
Definition sym.h:118
constexpr auto crbegin() const noexcept
Definition sym.h:110
constexpr bool operator==(Sym other) const noexcept
Definition sym.h:117
constexpr char operator[](size_t i) const noexcept
Definition sym.h:94
constexpr auto crend() const noexcept
Definition sym.h:111
constexpr auto rbegin() const noexcept
Definition sym.h:108
constexpr std::string str() const noexcept
This involves a copy.
Definition sym.h:148
constexpr const char * c_str() const noexcept
Definition sym.h:132
constexpr auto operator<=>(char c) const noexcept
Definition sym.h:119
constexpr auto cbegin() const noexcept
Definition sym.h:106
constexpr auto operator==(char c) const noexcept
Definition sym.h:126
constexpr auto end() const noexcept
Definition sym.h:105
constexpr std::string_view view() const noexcept
Definition sym.h:135
constexpr auto operator!=(char c) const noexcept
Definition sym.h:127
Definition arena.h:10
std::unordered_map< Sym, V > SymMap
Definition sym.h:186
std::unordered_set< Sym > SymSet
Definition sym.h:187
constexpr bool operator()(const String *s1, const String *s2) const noexcept
Definition sym.h:53
size_t operator()(const String *s) const noexcept
Definition sym.h:61
constexpr String() noexcept=default
char chars[]
Definition sym.h:50
size_t size
Definition sym.h:49