FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
sym.h
Go to the documentation of this file.
1#pragma once
2
3#include <cassert>
4#include <cstring>
5
6#include <bit>
7#include <iostream>
8#include <string>
9
10#ifdef FE_ABSL
11# include <absl/container/flat_hash_map.h>
12# include <absl/container/flat_hash_set.h>
13#else
14# include <unordered_map>
15# include <unordered_set>
16#endif
17
18#include "fe/arena.h"
19
20static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big,
21 "mixed endianess not supported");
22
23namespace fe {
24
25/// A Sym%bol just wraps a pointer to Sym::String, so pass Sym itself around as value.
26/// Sym is compatible with:
27/// * recommended: `std::string_view` (via Sym::view)
28/// * null-terminated C-strings (via Sym::c_str)
29///
30/// This means that retrieving a `std::string_view` or a null-terminated C-string is basically free.
31/// You can also obtain a `std::string` (via Sym::str), but this involves a copy.
32/// With the exception of the empty string, you should only create Sym%bols via SymPool::sym.
33/// This in turn will toss all Sym%bols into a big hash set.
34/// This makes Sym::operator== and Sym::operator!= an O(1) operation.
35/// The empty string is internally handled as `nullptr`.
36/// Thus, you can create a Sym%bol representing an empty string without having access to the SymPool.
37/// @note The empty `std::string`/`std::string_view`, `nullptr`, and `"\0"` are all identified as Sym::Sym().
38/// @warning Big endian version has not been tested.
39class Sym {
40public:
41 static constexpr size_t Short_String_Bytes = sizeof(uintptr_t);
42 static constexpr size_t Short_String_Mask = Short_String_Bytes - 1;
43
44 struct String {
45 String() noexcept = default;
46 String(size_t size)
47 : size(size) {}
48
49 size_t size;
50 char chars[]; // This is actually a C-only features, but all C++ compilers support that anyway.
51
52 struct Equal {
53 bool operator()(const String* s1, const String* s2) const {
54 bool res = s1->size == s2->size;
55 for (size_t i = 0, e = s1->size; res && i != e; ++i) res &= s1->chars[i] == s2->chars[i];
56 return res;
57 }
58 };
59
60 struct Hash {
61 size_t operator()(const String* s) const {
62 return std::hash<std::string_view>()(std::string_view(s->chars, s->size));
63 }
64 };
65
66#ifdef FE_ABSL
67 template<class H> friend H AbslHashValue(H h, const String* string) {
68 return H::combine(std::move(h), std::string_view(string->chars, string->size));
69 }
70#endif
71 };
72
73 static_assert(sizeof(String) == sizeof(size_t), "String.chars should be 0");
74
75private:
76 Sym(uintptr_t ptr)
77 : ptr_(ptr) {}
78
79public:
80 Sym() noexcept = default;
81
82 /// @name Getters
83 ///@{
84 bool empty() const { return ptr_ == 0; }
85 size_t size() const {
86 if (empty()) return 0;
87 if (auto size = ptr_ & Short_String_Mask) return size;
88 return ((const String*)ptr_)->size;
89 }
90 ///@}
91
92 /// @name Access
93 ///@{
94 char operator[](size_t i) const {
95 assert(i < size());
96 return c_str()[i];
97 }
98 char front() const { return (*this)[0]; }
99 char back() const { return (*this)[size() - 1]; }
100 ///@}
101
102 /// @name Iterators
103 ///@{
104 auto begin() const { return c_str(); }
105 auto end() const { return c_str() + size(); }
106 auto cbegin() const { return begin(); }
107 auto cend() const { return end(); }
108 auto rbegin() const { return std::reverse_iterator(end()); }
109 auto rend() const { return std::reverse_iterator(begin()); }
110 auto crbegin() const { return rbegin(); }
111 auto crend() const { return rend(); }
112 ///@}
113
114 /// @name Comparisons
115 ///@{
116 auto operator<=>(Sym other) const { return this->view() <=> other.view(); }
117 bool operator==(Sym other) const { return this->ptr_ == other.ptr_; }
118 bool operator!=(Sym other) const { return this->ptr_ != other.ptr_; }
119 auto operator<=>(char c) const {
120 auto s = size();
121 if (s == 0) return std::strong_ordering::less;
122 auto cmp = (*this)[0] <=> c;
123 if (s == 1) return cmp;
124 return cmp == 0 ? std::strong_ordering::greater : cmp;
125 }
126 auto operator==(char c) const { return (*this) <=> c == 0; }
127 auto operator!=(char c) const { return (*this) <=> c != 0; }
128 ///@}
129
130 /// @name Conversions
131 ///@{
132 const char* c_str() const { return view().data(); }
133 operator const char*() const { return c_str(); }
134
135 std::string_view view() const {
136 if (empty()) return {(const char*)&ptr_, 0};
137 // Little endian: 2 a b 0 register: 0ba2
138 // Big endian: a b 0 2 register: ab02
139 uintptr_t offset = std::endian::native == std::endian::little ? 1 : 0;
140 if (auto size = ptr_ & Short_String_Mask) return {(const char*)&ptr_ + offset, size};
141 return std::string_view(((const String*)ptr_)->chars, ((const String*)ptr_)->size);
142 }
143 operator std::string_view() const { return view(); }
144 std::string_view operator*() const { return view(); }
145 // Unfortunately, this doesn't work:
146 // std::string_view operator->() const { return view(); }
147
148 std::string str() const { return std::string(view()); } ///< This involves a copy.
149 explicit operator std::string() const { return str(); } ///< `explicit` as this involves a copy.
150
151 explicit operator bool() const { return ptr_; } ///< Is not empty?
152 ///@}
153
154#ifdef FE_ABSL
155 template<class H> friend H AbslHashValue(H h, Sym sym) { return H::combine(std::move(h), sym.ptr_); }
156#endif
157 friend struct ::std::hash<fe::Sym>;
158 friend std::ostream& operator<<(std::ostream& o, Sym sym) { return o << sym.view(); }
159
160private:
161 // Little endian: 2 a b 0 register: 0ba2
162 // Big endian: a b 0 2 register: ab02
163 uintptr_t ptr_ = 0;
164
165 friend class SymPool;
166};
167
168#ifndef DOXYGEN
169} // namespace fe
170
171template<> struct std::hash<fe::Sym> {
172 size_t operator()(fe::Sym sym) const { return std::hash<uintptr_t>()(sym.ptr_); }
173};
174
175namespace fe {
176#endif
177
178/// @name Sym
179/// Set/Map is keyed by pointer - which is hashed in SymPool.
180///@{
181#ifdef FE_ABSL
182template<class V> using SymMap = absl::flat_hash_map<Sym, V>;
183using SymSet = absl::flat_hash_set<Sym>;
184#else
185template<class V> using SymMap = std::unordered_map<Sym, V>;
186using SymSet = std::unordered_set<Sym>;
187#endif
188///@}
189
190/// Hash set where all strings - wrapped in Sym%bol - live in.
191/// You can access the SymPool from Driver.
192class SymPool {
193public:
195
196 /// @name Constructor & Destruction
197 ///@{
198 SymPool(const SymPool&) = delete;
199#ifdef FE_ABSL
200 SymPool() noexcept {}
201#else
202 SymPool() noexcept
203 : pool_(container_.allocator<const String*>()) {}
204#endif
205 SymPool(SymPool&& other) noexcept
206 : SymPool() {
207 swap(*this, other);
208 }
210 ///@}
211
212 /// @name sym
213 ///@{
214 Sym sym(std::string_view s) {
215 if (s.empty()) return Sym();
216 auto size = s.size();
217
218 if (size <= Sym::Short_String_Bytes - 2) { // small string: need two more bytes for `\0' and size
219 uintptr_t ptr = size;
220 // Little endian: 2 a b 0 register: 0ba2
221 // Big endian: a b 0 2 register: ab02
222 if constexpr (std::endian::native == std::endian::little)
223 for (uintptr_t i = 0, shift = 8; i != size; ++i, shift += 8) ptr |= (uintptr_t(s[i]) << shift);
224 else
225 for (uintptr_t i = 0, shift = (Sym::Short_String_Bytes - 1) * 8; i != size; ++i, shift -= 8)
226 ptr |= (uintptr_t(s[i]) << shift);
227 return Sym(ptr);
228 }
229
230 auto state = strings_.state();
231 auto ptr = (String*)strings_.align(Sym::Short_String_Bytes).allocate(sizeof(String) + s.size() + 1 /*'\0'*/);
232 new (ptr) String(s.size());
233 *std::copy(s.begin(), s.end(), ptr->chars) = '\0';
234 auto [i, ins] = pool_.emplace(ptr);
235 if (ins) return Sym((uintptr_t)ptr);
236 strings_.deallocate(state);
237 return Sym((uintptr_t)*i);
238 }
239 Sym sym(const std::string& s) { return sym((std::string_view)s); }
240 /// @p s is a null-terminated C-string.
241 Sym sym(const char* s) { return s == nullptr || *s == '\0' ? Sym() : sym(std::string_view(s, strlen(s))); }
242 // TODO we can try to fit s in current page and hence eliminate the explicit use of strlen
243 ///@}
244
245 friend void swap(SymPool& p1, SymPool& p2) noexcept {
246 using std::swap;
247 // clang-format off
248 swap(p1.strings_, p2.strings_ );
249#ifndef FE_ABSL
250 swap(p1.container_, p2.container_);
251#endif
252 swap(p1.pool_, p2.pool_ );
253 // clang-format on
254 }
255
256private:
257 Arena strings_;
258#ifdef FE_ABSL
259 absl::flat_hash_set<const String*, absl::Hash<const String*>, String::Equal> pool_;
260#else
261 Arena container_;
262 std::unordered_set<const String*, String::Hash, String::Equal, Arena::Allocator<const String*>> pool_;
263#endif
264};
265
266} // namespace fe
An arena pre-allocates so-called pages of size Arena::page_size_.
Definition arena.h:17
Arena & align(size_t a)
Align next allocate(size_t) to a.
Definition arena.h:92
void deallocate(size_t num_bytes)
Definition arena.h:116
void * allocate(size_t num_bytes)
Get n bytes of fresh memory.
Definition arena.h:95
State state() const
Definition arena.h:127
Hash set where all strings - wrapped in Symbol - live in.
Definition sym.h:192
Sym sym(const char *s)
s is a null-terminated C-string.
Definition sym.h:241
Sym::String String
Definition sym.h:194
Sym sym(std::string_view s)
Definition sym.h:214
SymPool & operator=(SymPool)=delete
SymPool(const SymPool &)=delete
Sym sym(const std::string &s)
Definition sym.h:239
SymPool(SymPool &&other) noexcept
Definition sym.h:205
SymPool() noexcept
Definition sym.h:202
friend void swap(SymPool &p1, SymPool &p2) noexcept
Definition sym.h:245
A Symbol just wraps a pointer to Sym::String, so pass Sym itself around as value.
Definition sym.h:39
const char * c_str() const
Definition sym.h:132
Sym() noexcept=default
std::string str() const
This involves a copy.
Definition sym.h:148
auto crend() const
Definition sym.h:111
auto cend() const
Definition sym.h:107
bool empty() const
Definition sym.h:84
auto operator<=>(char c) const
Definition sym.h:119
static constexpr size_t Short_String_Mask
Definition sym.h:42
static constexpr size_t Short_String_Bytes
Definition sym.h:41
std::string_view operator*() const
Definition sym.h:144
size_t size() const
Definition sym.h:85
bool operator==(Sym other) const
Definition sym.h:117
friend std::ostream & operator<<(std::ostream &o, Sym sym)
Definition sym.h:158
auto operator!=(char c) const
Definition sym.h:127
auto begin() const
Definition sym.h:104
auto end() const
Definition sym.h:105
char front() const
Definition sym.h:98
auto operator==(char c) const
Definition sym.h:126
char operator[](size_t i) const
Definition sym.h:94
auto rbegin() const
Definition sym.h:108
auto rend() const
Definition sym.h:109
std::string_view view() const
Definition sym.h:135
auto operator<=>(Sym other) const
Definition sym.h:116
char back() const
Definition sym.h:99
bool operator!=(Sym other) const
Definition sym.h:118
auto cbegin() const
Definition sym.h:106
auto crbegin() const
Definition sym.h:110
Definition arena.h:9
std::unordered_map< Sym, V > SymMap
Definition sym.h:185
std::unordered_set< Sym > SymSet
Definition sym.h:186
bool operator()(const String *s1, const String *s2) const
Definition sym.h:53
size_t operator()(const String *s) const
Definition sym.h:61
String() noexcept=default
char chars[]
Definition sym.h:50
size_t size
Definition sym.h:49