LLVM  8.0.1
GlobPattern.cpp
Go to the documentation of this file.
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a glob pattern matcher.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/Errc.h"
19 
20 using namespace llvm;
21 
22 static bool hasWildcard(StringRef S) {
23  return S.find_first_of("?*[") != StringRef::npos;
24 }
25 
26 // Expands character ranges and returns a bitmap.
27 // For example, "a-cf-hz" is expanded to "abcfghz".
29  BitVector BV(256, false);
30 
31  // Expand X-Y.
32  for (;;) {
33  if (S.size() < 3)
34  break;
35 
36  uint8_t Start = S[0];
37  uint8_t End = S[2];
38 
39  // If it doesn't start with something like X-Y,
40  // consume the first character and proceed.
41  if (S[1] != '-') {
42  BV[Start] = true;
43  S = S.substr(1);
44  continue;
45  }
46 
47  // It must be in the form of X-Y.
48  // Validate it and then interpret the range.
49  if (Start > End)
50  return make_error<StringError>("invalid glob pattern: " + Original,
52 
53  for (int C = Start; C <= End; ++C)
54  BV[(uint8_t)C] = true;
55  S = S.substr(3);
56  }
57 
58  for (char C : S)
59  BV[(uint8_t)C] = true;
60  return BV;
61 }
62 
63 // This is a scanner for the glob pattern.
64 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
65 // (which is a negative form of "[<chars>]"), or a non-meta character.
66 // This function returns the first token in S.
68  switch (S[0]) {
69  case '*':
70  S = S.substr(1);
71  // '*' is represented by an empty bitvector.
72  // All other bitvectors are 256-bit long.
73  return BitVector();
74  case '?':
75  S = S.substr(1);
76  return BitVector(256, true);
77  case '[': {
78  size_t End = S.find(']', 1);
79  if (End == StringRef::npos)
80  return make_error<StringError>("invalid glob pattern: " + Original,
82 
83  StringRef Chars = S.substr(1, End - 1);
84  S = S.substr(End + 1);
85  if (Chars.startswith("^")) {
86  Expected<BitVector> BV = expand(Chars.substr(1), Original);
87  if (!BV)
88  return BV.takeError();
89  return BV->flip();
90  }
91  return expand(Chars, Original);
92  }
93  default:
94  BitVector BV(256, false);
95  BV[(uint8_t)S[0]] = true;
96  S = S.substr(1);
97  return BV;
98  }
99 }
100 
102  GlobPattern Pat;
103 
104  // S doesn't contain any metacharacter,
105  // so the regular string comparison should work.
106  if (!hasWildcard(S)) {
107  Pat.Exact = S;
108  return Pat;
109  }
110 
111  // S is something like "foo*". We can use startswith().
112  if (S.endswith("*") && !hasWildcard(S.drop_back())) {
113  Pat.Prefix = S.drop_back();
114  return Pat;
115  }
116 
117  // S is something like "*foo". We can use endswith().
118  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
119  Pat.Suffix = S.drop_front();
120  return Pat;
121  }
122 
123  // Otherwise, we need to do real glob pattern matching.
124  // Parse the pattern now.
125  StringRef Original = S;
126  while (!S.empty()) {
127  Expected<BitVector> BV = scan(S, Original);
128  if (!BV)
129  return BV.takeError();
130  Pat.Tokens.push_back(*BV);
131  }
132  return Pat;
133 }
134 
136  if (Exact)
137  return S == *Exact;
138  if (Prefix)
139  return S.startswith(*Prefix);
140  if (Suffix)
141  return S.endswith(*Suffix);
142  return matchOne(Tokens, S);
143 }
144 
145 // Runs glob pattern Pats against string S.
146 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
147  for (;;) {
148  if (Pats.empty())
149  return S.empty();
150 
151  // If Pats[0] is '*', try to match Pats[1..] against all possible
152  // tail strings of S to see at least one pattern succeeds.
153  if (Pats[0].size() == 0) {
154  Pats = Pats.slice(1);
155  if (Pats.empty())
156  // Fast path. If a pattern is '*', it matches anything.
157  return true;
158  for (size_t I = 0, E = S.size(); I < E; ++I)
159  if (matchOne(Pats, S.substr(I)))
160  return true;
161  return false;
162  }
163 
164  // If Pats[0] is not '*', it must consume one character.
165  if (S.empty() || !Pats[0][(uint8_t)S[0]])
166  return false;
167  Pats = Pats.slice(1);
168  S = S.substr(1);
169  }
170 }
uint64_t CallInst * C
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool match(StringRef S) const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
Error takeError()
Take ownership of the stored error.
Definition: Error.h:553
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:279
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool hasWildcard(StringRef S)
Definition: GlobPattern.cpp:22
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:28
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the last N elements dropped.
Definition: StringRef.h:654
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_front(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the first N elements dropped.
Definition: StringRef.h:645
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
static Expected< GlobPattern > create(StringRef Pat)
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:179
static const size_t npos
Definition: StringRef.h:51
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:395
#define I(x, y, z)
Definition: MD5.cpp:58
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:298
static Expected< BitVector > scan(StringRef &S, StringRef Original)
Definition: GlobPattern.cpp:67
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144