117 """Returns a sequence of Tokens.
118
119 Args:
120 source: string of C++ source code.
121
122 Yields:
123 Token that represents the next token in the source.
124 """
125
126 valid_identifier_chars = VALID_IDENTIFIER_CHARS
127 hex_digits = HEX_DIGITS
128 int_or_float_digits = INT_OR_FLOAT_DIGITS
129 int_or_float_digits2 = int_or_float_digits |
set(
'.')
130
131
132 ignore_errors = False
133 count_ifs = 0
134
135 i = 0
136 end = len(source)
137 while i < end:
138
139 while i < end and source[i].isspace():
140 i += 1
141 if i >= end:
142 return
143
144 token_type = UNKNOWN
145 start = i
146 c = source[i]
147 if c.isalpha() or c == '_':
148 token_type = NAME
149 while source[i] in valid_identifier_chars:
150 i += 1
151
152
153 if (source[i] == "'" and (i - start) == 1 and
154 source[start:i] in 'uUL'):
155
156 token_type = CONSTANT
157 i = _GetChar(source, start, i)
158 elif source[i] == "'" and source[start:i] in _STR_PREFIXES:
159 token_type = CONSTANT
160 i = _GetString(source, start, i)
161 elif c == '/' and source[i+1] == '/':
162 i = source.find('\n', i)
163 if i == -1:
164 i = end
165 continue
166 elif c == '/' and source[i+1] == '*':
167 i = source.find('*/', i) + 2
168 continue
169 elif c in ':+-<>&|*=':
170 token_type = SYNTAX
171 i += 1
172 new_ch = source[i]
173 if new_ch == c and c != '>':
174 i += 1
175 elif c == '-' and new_ch == '>':
176 i += 1
177 elif new_ch == '=':
178 i += 1
179 elif c in '()[]{}~!?^%;/.,':
180 token_type = SYNTAX
181 i += 1
182 if c == '.' and source[i].isdigit():
183 token_type = CONSTANT
184 i += 1
185 while source[i] in int_or_float_digits:
186 i += 1
187
188 for suffix in ('l', 'f'):
189 if suffix == source[i:i+1].lower():
190 i += 1
191 break
192 elif c.isdigit():
193 token_type = CONSTANT
194 if c == '0' and source[i+1] in 'xX':
195
196 i += 2
197 while source[i] in hex_digits:
198 i += 1
199 else:
200 while source[i] in int_or_float_digits2:
201 i += 1
202
203 for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):
204 size = len(suffix)
205 if suffix == source[i:i+size].lower():
206 i += size
207 break
208 elif c == '"':
209 token_type = CONSTANT
210 i = _GetString(source, start, i)
211 elif c == "'":
212 token_type = CONSTANT
213 i = _GetChar(source, start, i)
214 elif c == '#':
215 token_type = PREPROCESSOR
216 got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()
217 if got_if:
218 count_ifs += 1
219 elif source[i:i+6] == '#endif':
220 count_ifs -= 1
221 if count_ifs == 0:
222 ignore_errors = False
223
224
225 while 1:
226 i1 = source.find('\n', i)
227 i2 = source.find('//', i)
228 i3 = source.find('/*', i)
229 i4 = source.find('"', i)
230
231
232 i = min([x for x in (i1, i2, i3, i4, end) if x != -1])
233
234
235 if source[i] == '"':
236 i = source.find('"', i+1) + 1
237 assert i > 0
238 continue
239
240 if not (i == i1 and source[i-1] == '\\'):
241 if got_if:
242 condition = source[start+4:i].lstrip()
243 if (condition.startswith('0') or
244 condition.startswith('(0)')):
245 ignore_errors = True
246 break
247 i += 1
248 elif c == '\\':
249
250 i += 1
251 continue
252 elif ignore_errors:
253
254
255
256
257
258 i += 1
259 else:
260 sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %
261 ('?', i, c, source[i-10:i+10]))
262 raise RuntimeError('unexpected token')
263
264 if i <= 0:
265 print('Invalid index, exiting now.')
266 return
267 yield Token(token_type, source[start:i], start, i)
268
269