SlHelpers
PCRE2.h
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #pragma once
4 
5 #include <pcre2.h>
6 #include <string_view>
7 
8 #include "../helpers/LastError.h"
9 
10 namespace SlPCRE2 {
11 
15 struct MatchIterator {
16  MatchIterator() = delete;
17 
19  auto operator*() const noexcept {
20  return matchByIdx(m_ovector, m_subject, m_idx);
21  }
22 
24  MatchIterator &operator++() noexcept { ++m_idx; return *this; }
26  MatchIterator &operator--() noexcept { --m_idx; return *this; }
28  MatchIterator operator++(int) noexcept {
29  auto old = *this;
30  ++m_idx;
31  return old;
32  }
34  MatchIterator operator--(int) noexcept {
35  auto old = *this;
36  --m_idx;
37  return old;
38  }
40  auto operator==(const MatchIterator &other) const noexcept { return m_idx == other.m_idx; }
42  auto operator!=(const MatchIterator &other) const noexcept { return !operator==(other); }
43 
48  auto index() const noexcept { return m_idx; }
49 private:
50  friend struct Matches;
51  friend class PCRE2;
52  static std::string_view matchByIdx(size_t *ovector, std::string_view subject,
53  unsigned index) {
54  const auto start = ovector[2 * index];
55  const auto len = ovector[2 * index + 1] - start;
56  return subject.substr(start, len);
57  }
58 
59  explicit MatchIterator(unsigned int idx, size_t *ovector = nullptr,
60  std::string_view subject = "")
61  : m_idx(idx), m_ovector(ovector), m_subject(std::move(subject)) {}
62 
63  unsigned m_idx;
64  size_t *m_ovector;
65  std::string_view m_subject;
66 };
67 
71 struct Matches {
72  Matches() = delete;
73 
75  auto begin() const noexcept { return MatchIterator(0, m_ovector, m_subject); }
77  auto end() const noexcept { return MatchIterator(m_matches); }
78 
84  auto operator[](std::size_t idx) const noexcept {
85  return MatchIterator::matchByIdx(m_ovector, m_subject, idx);
86  }
87 
88 private:
89  friend class PCRE2;
90 
91  explicit Matches(std::string_view subject, size_t *ovector, unsigned matches)
92  : m_matches(matches), m_ovector(ovector), m_subject(std::move(subject)) {}
93 
94  unsigned m_matches;
95  size_t *m_ovector;
96  std::string_view m_subject;
97 };
98 
102 class PCRE2 {
103 public:
105  PCRE2() noexcept : m_code(nullptr), m_matchData(nullptr) {}
106  ~PCRE2() noexcept { free(); }
107 
108  PCRE2(const PCRE2 &) = delete;
109  PCRE2 &operator=(const PCRE2 &) = delete;
110 
112  PCRE2(PCRE2 &&other) noexcept : m_lastError(std::move(other.m_lastError)),
113  m_code(other.m_code), m_matchData(other.m_matchData) {
114  other.m_code = nullptr;
115  other.m_matchData = nullptr;
116  }
118  PCRE2 &operator=(PCRE2 &&other) noexcept {
119  if (this != &other) {
120  free();
121  m_lastError = std::move(other.m_lastError);
122  std::swap(m_code, other.m_code);
123  std::swap(m_matchData, other.m_matchData);
124  }
125  return *this;
126  }
127 
134  bool compile(std::string_view regex, uint32_t options = 0) noexcept {
135  free();
136 
137  int err;
138  PCRE2_SIZE lastOff;
139  m_code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(regex.data()), regex.length(),
140  options, &err, &lastOff, nullptr);
141  if (!m_code) {
142  m_lastError.reset().setError(errToStr(err));
143  m_lastError.set<0>(err);
144  m_lastError.set<1>(lastOff);
145  return false;
146  }
147 
148  m_matchData = pcre2_match_data_create_from_pattern(m_code, nullptr);
149  if (!m_matchData) {
150  m_lastError.reset().setError("failed to allocate match data");
151  m_lastError.set<0>(PCRE2_ERROR_NOMEMORY);
152  return false;
153  }
154 
155  return true;
156  }
157 
163  int match(std::string_view subject) noexcept {
164  return pcre2_match(m_code, reinterpret_cast<PCRE2_SPTR>(subject.data()),
165  subject.length(), 0, 0, m_matchData, nullptr);
166  }
167 
174  auto ovector() const { return pcre2_get_ovector_pointer(m_matchData); }
175 
181  static std::string errToStr(int err) {
182  std::string s(256, 0);
183  auto len = pcre2_get_error_message(err, reinterpret_cast<PCRE2_UCHAR *>(s.data()),
184  s.length());
185  if (len < 0)
186  return {};
187  s.resize(len);
188  return s;
189  }
190 
197  auto matches(std::string_view subject, unsigned matches) const noexcept {
198  return Matches(subject, ovector(), matches);
199  }
200 
207  auto matchByIdx(std::string_view subject, unsigned index) const noexcept {
208  return MatchIterator::matchByIdx(ovector(), subject, index);
209  }
210 
212  auto lastErrno() const noexcept { return m_lastError.get<0>(); }
214  auto lastError() const noexcept { return m_lastError.lastError(); }
216  auto lastOffset() const noexcept { return m_lastError.get<1>(); }
217 
219  bool valid() const noexcept { return m_code; }
221  operator bool() const noexcept { return valid(); }
223  bool operator!() const noexcept { return !valid(); }
224 private:
225  void free() {
226  pcre2_match_data_free(m_matchData);
227  m_matchData = nullptr;
228  pcre2_code_free(m_code);
229  m_code = nullptr;
230  }
232  pcre2_code *m_code;
233  pcre2_match_data *m_matchData;
234 };
235 
236 }
auto operator==(const MatchIterator &other) const noexcept
Compare two MatchIterators.
Definition: PCRE2.h:40
MatchIterator & operator++() noexcept
Move to the next match.
Definition: PCRE2.h:24
auto operator!=(const MatchIterator &other) const noexcept
Compare two MatchIterators.
Definition: PCRE2.h:42
bool operator!() const noexcept
! wrapper around valid()
Definition: PCRE2.h:223
PCRE2 & operator=(PCRE2 &&other) noexcept
Move assignment.
Definition: PCRE2.h:118
auto matches(std::string_view subject, unsigned matches) const noexcept
Returns Matches (a pseudo-vector of matches) in subject.
Definition: PCRE2.h:197
auto lastOffset() const noexcept
Get offset of last error (to the regex string)
Definition: PCRE2.h:216
auto lastErrno() const noexcept
Return the last error number.
Definition: PCRE2.h:212
std::tuple_element_t< idx, Tuple > & get() noexcept
Get n-th error member.
Definition: LastError.h:22
auto index() const noexcept
Get current index of the match.
Definition: PCRE2.h:48
Perl-compatible regex.
Definition: PCRE2.h:102
const std::string & lastError() const &noexcept
Obtain the stored string.
Definition: LastError.h:75
auto end() const noexcept
Get past last match (the end iterator)
Definition: PCRE2.h:77
int match(std::string_view subject) noexcept
PCRE2 against subject.
Definition: PCRE2.h:163
bool compile(std::string_view regex, uint32_t options=0) noexcept
Compile PCRE2 regex with passed options.
Definition: PCRE2.h:134
auto ovector() const
Returns offset (into the subject string) vector for all matches.
Definition: PCRE2.h:174
PCRE2() noexcept
Constructs an empty PCRE2.
Definition: PCRE2.h:105
static std::string errToStr(int err)
Converts PCRE2 error code err to string.
Definition: PCRE2.h:181
auto matchByIdx(std::string_view subject, unsigned index) const noexcept
Returns one match – a substring of subject.
Definition: PCRE2.h:207
Iterator over matches.
Definition: PCRE2.h:15
auto operator*() const noexcept
Obtain the current match.
Definition: PCRE2.h:19
auto lastError() const noexcept
Return the last error string if any.
Definition: PCRE2.h:214
auto operator[](std::size_t idx) const noexcept
Get n-th match.
Definition: PCRE2.h:84
bool valid() const noexcept
Test whether PCRE2 is valid.
Definition: PCRE2.h:219
void set(Arg &&val) noexcept
Set n-th error member.
Definition: LastError.h:34
MatchIterator operator++(int) noexcept
Move to the next match.
Definition: PCRE2.h:28
LastErrorStr & reset() noexcept
Wipe out everything.
Definition: LastError.h:55
MatchIterator & operator--() noexcept
Move to the previous match.
Definition: PCRE2.h:26
auto begin() const noexcept
Get first match (the begin iterator)
Definition: PCRE2.h:75
void setError(T &&str) requires(std
Store a string into this error.
Definition: LastError.h:66
MatchIterator operator--(int) noexcept
Move to the previous match.
Definition: PCRE2.h:34
Definition: PCRE2.h:10
Pseudo-vector of matches.
Definition: PCRE2.h:71
PCRE2(PCRE2 &&other) noexcept
Move constructor.
Definition: PCRE2.h:112