FE 0.6.0
A header-only C++ library for writing frontends
Loading...
Searching...
No Matches
sym.h
Go to the documentation of this file.
1#pragma once
2
3#include <cassert>
4#include <cstring>
5
6#include <bit>
7#include <iostream>
8#include <string>
9
10#ifdef FE_ABSL
11# include <absl/container/flat_hash_map.h>
12# include <absl/container/flat_hash_set.h>
13#else
14# include <unordered_map>
15# include <unordered_set>
16#endif
17
18#include "fe/arena.h"
19
20static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big,
21 "mixed endianess not supported");
22
23namespace fe {
24
25/// A Sym%bol just wraps a pointer to Sym::String, so pass Sym itself around as value.
26/// Sym is compatible with:
27/// * recommended: `std::string_view` (via Sym::view)
28/// * null-terminated C-strings (via Sym::c_str)
29///
30/// This means that retrieving a `std::string_view` or a null-terminated C-string is basically free.
31/// You can also obtain a `std::string` (via Sym::str), but this involves a copy.
32/// With the exception of the empty string, you should only create Sym%bols via SymPool::sym.
33/// This in turn will toss all Sym%bols into a big hash set.
34/// This makes Sym::operator== and Sym::operator!= an O(1) operation.
35/// The empty string is internally handled as `nullptr`.
36/// Thus, you can create a Sym%bol representing an empty string without having access to the SymPool.
37/// @note The empty `std::string`/`std::string_view`, `nullptr`, and `"\0"` are all identified as Sym::Sym().
38/// @warning Big endian version has not been tested.
39class Sym {
40public:
41 static constexpr size_t Short_String_Bytes = sizeof(uintptr_t);
42 static constexpr size_t Short_String_Mask = Short_String_Bytes - 1;
43
44 struct String {
45 constexpr String() noexcept = default;
46 constexpr String(size_t size) noexcept
47 : size(size) {}
48
49 size_t size = 0;
50 char chars[]; // This is actually a C-only features, but all C++ compilers support that anyway.
51
52 struct Equal {
53 constexpr bool operator()(const String* s1, const String* s2) const noexcept {
54 bool res = s1->size == s2->size;
55 for (size_t i = 0, e = s1->size; res && i != e; ++i) res &= s1->chars[i] == s2->chars[i];
56 return res;
57 }
58 };
59
60 struct Hash {
61 size_t operator()(const String* s) const noexcept {
62 return std::hash<std::string_view>()(std::string_view(s->chars, s->size));
63 }
64 };
65
66#ifdef FE_ABSL
67 template<class H> friend constexpr H AbslHashValue(H h, const String* string) noexcept {
68 return H::combine(std::move(h), std::string_view(string->chars, string->size));
69 }
70#endif
71 };
72
73 static_assert(sizeof(String) == sizeof(size_t), "String.chars should be 0");
74
75private:
76 constexpr Sym(uintptr_t ptr) noexcept
77 : ptr_(ptr) {}
78
79public:
80 constexpr Sym() noexcept = default;
81
82 /// @name Getters
83 ///@{
84 constexpr bool empty() const noexcept { return ptr_ == 0; }
85 constexpr size_t size() const noexcept {
86 if (empty()) return 0;
87 if (auto size = ptr_ & Short_String_Mask) return size;
88 return ((const String*)ptr_)->size;
89 }
90 ///@}
91
92 /// @name Access
93 ///@{
94 constexpr char operator[](size_t i) const noexcept {
95 assert(i < size());
96 return c_str()[i];
97 }
98 constexpr char front() const noexcept { return (*this)[0]; }
99 constexpr char back() const noexcept { return (*this)[size() - 1]; }
100 ///@}
101
102 /// @name Iterators
103 ///@{
104 constexpr auto begin() const noexcept { return c_str(); }
105 constexpr auto end() const noexcept { return c_str() + size(); }
106 constexpr auto cbegin() const noexcept { return begin(); }
107 constexpr auto cend() const noexcept { return end(); }
108 constexpr auto rbegin() const noexcept { return std::reverse_iterator(end()); }
109 constexpr auto rend() const noexcept { return std::reverse_iterator(begin()); }
110 constexpr auto crbegin() const noexcept { return rbegin(); }
111 constexpr auto crend() const noexcept { return rend(); }
112 ///@}
113
114 /// @name Comparisons
115 ///@{
116 constexpr auto operator<=>(Sym other) const noexcept { return this->view() <=> other.view(); }
117 constexpr bool operator==(Sym other) const noexcept { return this->ptr_ == other.ptr_; }
118 constexpr bool operator!=(Sym other) const noexcept { return this->ptr_ != other.ptr_; }
119 constexpr auto operator<=>(char c) const noexcept {
120 auto s = size();
121 if (s == 0) return std::strong_ordering::less;
122 auto cmp = (*this)[0] <=> c;
123 if (s == 1) return cmp;
124 return cmp == 0 ? std::strong_ordering::greater : cmp;
125 }
126 constexpr auto operator==(char c) const noexcept { return (*this) <=> c == 0; }
127 constexpr auto operator!=(char c) const noexcept { return (*this) <=> c != 0; }
128 ///@}
129
130 /// @name Conversions
131 ///@{
132 constexpr const char* c_str() const noexcept { return view().data(); }
133 constexpr operator const char*() const noexcept { return c_str(); }
134
135 constexpr std::string_view view() const noexcept {
136 if (empty()) return {std::bit_cast<const char*>(&ptr_), 0};
137 // Little endian: 2 a b 0 register: 0ba2
138 // Big endian: a b 0 2 register: ab02
139 uintptr_t offset = std::endian::native == std::endian::little ? 1 : 0;
140 if (auto size = ptr_ & Short_String_Mask) return {std::bit_cast<const char*>(&ptr_) + offset, size};
141 auto S = std::bit_cast<const String*>(ptr_);
142 return std::string_view(S->chars, S->size);
143 }
144 constexpr operator std::string_view() const noexcept { return view(); }
145 constexpr std::string_view operator*() const noexcept { return view(); }
146 // Unfortunately, this doesn't work:
147 // std::string_view operator->() const { return view(); }
148
149 constexpr std::string str() const noexcept { return std::string(view()); } ///< This involves a copy.
150 constexpr explicit operator std::string() const noexcept { return str(); } ///< `explicit` as this involves a copy.
151 constexpr explicit operator bool() const noexcept { return ptr_; } ///< Is not empty?
152 ///@}
153
154#ifdef FE_ABSL
155 template<class H> friend constexpr H AbslHashValue(H h, Sym sym) noexcept {
156 return H::combine(std::move(h), sym.ptr_);
157 }
158#endif
159 friend struct ::std::hash<fe::Sym>;
160 friend std::ostream& operator<<(std::ostream& o, Sym sym) { return o << sym.view(); }
161
162private:
163 // Little endian: 2 a b 0 register: 0ba2
164 // Big endian: a b 0 2 register: ab02
165 uintptr_t ptr_ = 0;
166
167 friend class SymPool;
168};
169
170#ifndef DOXYGEN
171} // namespace fe
172
173template<> struct std::hash<fe::Sym> {
174 size_t operator()(fe::Sym sym) const noexcept { return std::hash<uintptr_t>()(sym.ptr_); }
175};
176
177namespace fe {
178#endif
179
180/// @name Sym
181/// Set/Map is keyed by pointer - which is hashed in SymPool.
182///@{
183#ifdef FE_ABSL
184template<class V> using SymMap = absl::flat_hash_map<Sym, V>;
185using SymSet = absl::flat_hash_set<Sym>;
186#else
187template<class V> using SymMap = std::unordered_map<Sym, V>;
188using SymSet = std::unordered_set<Sym>;
189#endif
190///@}
191
192/// Hash set where all strings - wrapped in Sym%bol - live in.
193/// You can access the SymPool from Driver.
194class SymPool {
195public:
197
198 /// @name Constructor & Destruction
199 ///@{
200 SymPool(const SymPool&) = delete;
201#ifdef FE_ABSL
202 SymPool() noexcept {}
203#else
204 SymPool() noexcept
205 : pool_(container_.allocator<const String*>()) {}
206#endif
207 SymPool(SymPool&& other) noexcept
208 : SymPool() {
209 swap(*this, other);
210 }
212 ///@}
213
214 /// @name sym
215 ///@{
216 Sym sym(std::string_view s) {
217 if (s.empty()) return Sym();
218 auto size = s.size();
219
220 if (size <= Sym::Short_String_Bytes - 2) { // small string: need two more bytes for `\0' and size
221 uintptr_t ptr = size;
222 // Little endian: 2 a b 0 register: 0ba2
223 // Big endian: a b 0 2 register: ab02
224 if constexpr (std::endian::native == std::endian::little)
225 for (uintptr_t i = 0, shift = 8; i != size; ++i, shift += 8) ptr |= (uintptr_t(s[i]) << shift);
226 else
227 for (uintptr_t i = 0, shift = (Sym::Short_String_Bytes - 1) * 8; i != size; ++i, shift -= 8)
228 ptr |= (uintptr_t(s[i]) << shift);
229 return Sym(ptr);
230 }
231
232 auto state = strings_.state();
233 auto ptr = (String*)strings_.allocate(sizeof(String) + s.size() + 1 /*'\0'*/, Sym::Short_String_Bytes);
234 new (ptr) String(s.size());
235 *std::copy(s.begin(), s.end(), ptr->chars) = '\0';
236 auto [i, ins] = pool_.emplace(ptr);
237 if (ins) return Sym(std::bit_cast<uintptr_t>(ptr));
238 strings_.deallocate(state);
239 return Sym(std::bit_cast<uintptr_t>(*i));
240 }
241 Sym sym(const std::string& s) { return sym((std::string_view)s); }
242 /// @p s is a null-terminated C-string.
243 constexpr Sym sym(const char* s) {
244 return s == nullptr || *s == '\0' ? Sym() : sym(std::string_view(s, strlen(s)));
245 }
246 // TODO we can try to fit s in current page and hence eliminate the explicit use of strlen
247 ///@}
248
249 friend void swap(SymPool& p1, SymPool& p2) noexcept {
250 using std::swap;
251 // clang-format off
252 swap(p1.strings_, p2.strings_ );
253#ifndef FE_ABSL
254 swap(p1.container_, p2.container_);
255#endif
256 swap(p1.pool_, p2.pool_ );
257 // clang-format on
258 }
259
260private:
261 Arena strings_;
262#ifdef FE_ABSL
263 absl::flat_hash_set<const String*, absl::Hash<const String*>, String::Equal> pool_;
264#else
265 Arena container_;
266 std::unordered_set<const String*, String::Hash, String::Equal, Arena::Allocator<const String*>> pool_;
267#endif
268};
269
270} // namespace fe
An arena pre-allocates so-called pages of size Arena::page_size_.
Definition arena.h:18
constexpr void deallocate(size_t num_bytes) noexcept
Removes num_bytes again.
Definition arena.h:124
constexpr void * allocate(size_t num_bytes, size_t align)
Get n bytes of fresh memory.
Definition arena.h:92
State state() const noexcept
Definition arena.h:125
Hash set where all strings - wrapped in Symbol - live in.
Definition sym.h:194
Sym::String String
Definition sym.h:196
Sym sym(std::string_view s)
Definition sym.h:216
SymPool & operator=(SymPool)=delete
SymPool(const SymPool &)=delete
Sym sym(const std::string &s)
Definition sym.h:241
SymPool(SymPool &&other) noexcept
Definition sym.h:207
SymPool() noexcept
Definition sym.h:204
constexpr Sym sym(const char *s)
s is a null-terminated C-string.
Definition sym.h:243
friend void swap(SymPool &p1, SymPool &p2) noexcept
Definition sym.h:249
A Symbol just wraps a pointer to Sym::String, so pass Sym itself around as value.
Definition sym.h:39
constexpr auto operator<=>(Sym other) const noexcept
Definition sym.h:116
constexpr auto rend() const noexcept
Definition sym.h:109
constexpr auto begin() const noexcept
Definition sym.h:104
constexpr char front() const noexcept
Definition sym.h:98
constexpr std::string_view operator*() const noexcept
Definition sym.h:145
constexpr bool empty() const noexcept
Definition sym.h:84
static constexpr size_t Short_String_Mask
Definition sym.h:42
static constexpr size_t Short_String_Bytes
Definition sym.h:41
constexpr auto cend() const noexcept
Definition sym.h:107
constexpr size_t size() const noexcept
Definition sym.h:85
constexpr Sym() noexcept=default
constexpr char back() const noexcept
Definition sym.h:99
friend std::ostream & operator<<(std::ostream &o, Sym sym)
Definition sym.h:160
constexpr bool operator!=(Sym other) const noexcept
Definition sym.h:118
constexpr auto crbegin() const noexcept
Definition sym.h:110
constexpr bool operator==(Sym other) const noexcept
Definition sym.h:117
constexpr char operator[](size_t i) const noexcept
Definition sym.h:94
constexpr auto crend() const noexcept
Definition sym.h:111
constexpr auto rbegin() const noexcept
Definition sym.h:108
constexpr std::string str() const noexcept
This involves a copy.
Definition sym.h:149
constexpr const char * c_str() const noexcept
Definition sym.h:132
constexpr auto operator<=>(char c) const noexcept
Definition sym.h:119
constexpr auto cbegin() const noexcept
Definition sym.h:106
constexpr auto operator==(char c) const noexcept
Definition sym.h:126
constexpr auto end() const noexcept
Definition sym.h:105
constexpr std::string_view view() const noexcept
Definition sym.h:135
constexpr auto operator!=(char c) const noexcept
Definition sym.h:127
Definition arena.h:10
std::unordered_map< Sym, V > SymMap
Definition sym.h:187
std::unordered_set< Sym > SymSet
Definition sym.h:188
constexpr bool operator()(const String *s1, const String *s2) const noexcept
Definition sym.h:53
size_t operator()(const String *s) const noexcept
Definition sym.h:61
constexpr String() noexcept=default
char chars[]
Definition sym.h:50
size_t size
Definition sym.h:49