tesseract v5.3.3.20231005
ast.py
Go to the documentation of this file.
1#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20# FIXME:
21# * Tokens should never be exported, need to convert to Nodes
22# (return types, parameters, etc.)
23# * Handle static class data for templatized classes
24# * Handle casts (both C++ and C-style)
25# * Handle conditions and loops (if/else, switch, for, while/do)
26#
27# TODO much, much later:
28# * Handle #define
29# * exceptions
30
31
32try:
33 # Python 3.x
34 import builtins
35except ImportError:
36 # Python 2.x
37 import __builtin__ as builtins
38
39import collections
40import sys
41import traceback
42
43from cpp import keywords
44from cpp import tokenize
45from cpp import utils
46
47
48if not hasattr(builtins, 'reversed'):
49 # Support Python 2.3 and earlier.
50 def reversed(seq):
51 for i in range(len(seq)-1, -1, -1):
52 yield seq[i]
53
54if not hasattr(builtins, 'next'):
55 # Support Python 2.5 and earlier.
56 def next(obj):
57 return obj.next()
58
59
60VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
61
62FUNCTION_NONE = 0x00
63FUNCTION_CONST = 0x01
64FUNCTION_VIRTUAL = 0x02
65FUNCTION_PURE_VIRTUAL = 0x04
66FUNCTION_CTOR = 0x08
67FUNCTION_DTOR = 0x10
68FUNCTION_ATTRIBUTE = 0x20
69FUNCTION_UNKNOWN_ANNOTATION = 0x40
70FUNCTION_THROW = 0x80
71FUNCTION_OVERRIDE = 0x100
72
73"""
74These are currently unused. Should really handle these properly at some point.
75
76TYPE_MODIFIER_INLINE = 0x010000
77TYPE_MODIFIER_EXTERN = 0x020000
78TYPE_MODIFIER_STATIC = 0x040000
79TYPE_MODIFIER_CONST = 0x080000
80TYPE_MODIFIER_REGISTER = 0x100000
81TYPE_MODIFIER_VOLATILE = 0x200000
82TYPE_MODIFIER_MUTABLE = 0x400000
83
84TYPE_MODIFIER_MAP = {
85 'inline': TYPE_MODIFIER_INLINE,
86 'extern': TYPE_MODIFIER_EXTERN,
87 'static': TYPE_MODIFIER_STATIC,
88 'const': TYPE_MODIFIER_CONST,
89 'register': TYPE_MODIFIER_REGISTER,
90 'volatile': TYPE_MODIFIER_VOLATILE,
91 'mutable': TYPE_MODIFIER_MUTABLE,
92 }
93"""
94
95_INTERNAL_TOKEN = 'internal'
96_NAMESPACE_POP = 'ns-pop'
97
98
99# TODO(nnorwitz): use this as a singleton for templated_types, etc
100# where we don't want to create a new empty dict each time. It is also const.
101class _NullDict(object):
102 __contains__ = lambda self: False
103 keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
104
105
106# TODO(nnorwitz): move AST nodes into a separate module.
107class Node(object):
108 """Base AST node."""
109
110 def __init__(self, start, end):
111 self.start = start
112 self.end = end
113
114 def IsDeclaration(self):
115 """Returns bool if this node is a declaration."""
116 return False
117
118 def IsDefinition(self):
119 """Returns bool if this node is a definition."""
120 return False
121
122 def IsExportable(self):
123 """Returns bool if this node exportable from a header file."""
124 return False
125
126 def Requires(self, node):
127 """Does this AST node require the definition of the node passed in?"""
128 return False
129
130 def XXX__str__(self):
131 return self._StringHelper(self.__class__.__name__, '')
132
133 def _StringHelper(self, name, suffix):
134 if not utils.DEBUG:
135 return '%s(%s)' % (name, suffix)
136 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
137
138 def __repr__(self):
139 return str(self)
140
141
143 def __init__(self, start, end, name, definition):
144 Node.__init__(self, start, end)
145 self.name = name
146 self.definition = definition
147
148 def __str__(self):
149 value = '%s %s' % (self.name, self.definition)
150 return self._StringHelper(self.__class__.__name__, value)
151
152
154 def __init__(self, start, end, filename, system):
155 Node.__init__(self, start, end)
156 self.filename = filename
157 self.system = system
158
159 def __str__(self):
160 fmt = '"%s"'
161 if self.system:
162 fmt = '<%s>'
163 return self._StringHelper(self.__class__.__name__, fmt % self.filename)
164
165
166class Goto(Node):
167 def __init__(self, start, end, label):
168 Node.__init__(self, start, end)
169 self.label = label
170
171 def __str__(self):
172 return self._StringHelper(self.__class__.__name__, str(self.label))
173
174
175class Expr(Node):
176 def __init__(self, start, end, expr):
177 Node.__init__(self, start, end)
178 self.expr = expr
179
180 def Requires(self, node):
181 # TODO(nnorwitz): impl.
182 return False
183
184 def __str__(self):
185 return self._StringHelper(self.__class__.__name__, str(self.expr))
186
187
189 pass
190
191
192class Delete(Expr):
193 pass
194
195
197 def __init__(self, start, end, expr, namespace):
198 Expr.__init__(self, start, end, expr)
199 self.namespace = namespace[:]
200
201
202class Using(Node):
203 def __init__(self, start, end, names):
204 Node.__init__(self, start, end)
205 self.names = names
206
207 def __str__(self):
208 return self._StringHelper(self.__class__.__name__, str(self.names))
209
210
212 def __init__(self, start, end, name, parameter_type, default):
213 Node.__init__(self, start, end)
214 self.name = name
215 self.type = parameter_type
216 self.default = default
217
218 def Requires(self, node):
219 # TODO(nnorwitz): handle namespaces, etc.
220 return self.type.name == node.name
221
222 def __str__(self):
223 name = str(self.type)
224 suffix = '%s %s' % (name, self.name)
225 if self.default:
226 suffix += ' = ' + ''.join([d.name for d in self.default])
227 return self._StringHelper(self.__class__.__name__, suffix)
228
229
231 def __init__(self, start, end, name, namespace):
232 Node.__init__(self, start, end)
233 self.name = name
234 self.namespace = namespace[:]
235
236 def FullName(self):
237 prefix = ''
238 if self.namespace and self.namespace[-1]:
239 prefix = '::'.join(self.namespace) + '::'
240 return prefix + self.name
241
242 def _TypeStringHelper(self, suffix):
243 if self.namespace:
244 names = [n or '<anonymous>' for n in self.namespace]
245 suffix += ' in ' + '::'.join(names)
246 return self._StringHelper(self.__class__.__name__, suffix)
247
248
249# TODO(nnorwitz): merge with Parameter in some way?
251 def __init__(self, start, end, name, var_type, initial_value, namespace):
252 _GenericDeclaration.__init__(self, start, end, name, namespace)
253 self.type = var_type
254 self.initial_value = initial_value
255
256 def Requires(self, node):
257 # TODO(nnorwitz): handle namespaces, etc.
258 return self.type.name == node.name
259
260 def ToString(self):
261 """Return a string that tries to reconstitute the variable decl."""
262 suffix = '%s %s' % (self.type, self.name)
263 if self.initial_value:
264 suffix += ' = ' + self.initial_value
265 return suffix
266
267 def __str__(self):
268 return self._StringHelper(self.__class__.__name__, self.ToString())
269
270
272 def __init__(self, start, end, name, alias, namespace):
273 _GenericDeclaration.__init__(self, start, end, name, namespace)
274 self.alias = alias
275
276 def IsDefinition(self):
277 return True
278
279 def IsExportable(self):
280 return True
281
282 def Requires(self, node):
283 # TODO(nnorwitz): handle namespaces, etc.
284 name = node.name
285 for token in self.alias:
286 if token is not None and name == token.name:
287 return True
288 return False
289
290 def __str__(self):
291 suffix = '%s, %s' % (self.name, self.alias)
292 return self._TypeStringHelper(suffix)
293
294
296 def __init__(self, start, end, name, fields, namespace):
297 _GenericDeclaration.__init__(self, start, end, name, namespace)
298 self.fields = fields
299
300 def IsDefinition(self):
301 return True
302
303 def IsExportable(self):
304 return True
305
306 def __str__(self):
307 suffix = '%s, {%s}' % (self.name, self.fields)
308 return self._TypeStringHelper(suffix)
309
310
312 pass
313
314
315class Enum(_NestedType):
316 pass
317
318
320 def __init__(self, start, end, name, bases, templated_types, body, namespace):
321 _GenericDeclaration.__init__(self, start, end, name, namespace)
322 self.bases = bases
323 self.body = body
324 self.templated_types = templated_types
325
326 def IsDeclaration(self):
327 return self.bases is None and self.body is None
328
329 def IsDefinition(self):
330 return not self.IsDeclarationIsDeclaration()
331
332 def IsExportable(self):
333 return not self.IsDeclarationIsDeclaration()
334
335 def Requires(self, node):
336 # TODO(nnorwitz): handle namespaces, etc.
337 if self.bases:
338 for token_list in self.bases:
339 # TODO(nnorwitz): bases are tokens, do name comparison.
340 for token in token_list:
341 if token.name == node.name:
342 return True
343 # TODO(nnorwitz): search in body too.
344 return False
345
346 def __str__(self):
347 name = self.name
348 if self.templated_types:
349 name += '<%s>' % self.templated_types
350 suffix = '%s, %s, %s' % (name, self.bases, self.body)
351 return self._TypeStringHelper(suffix)
352
353
355 pass
356
357
358class Function(_GenericDeclaration):
359 def __init__(self, start, end, name, return_type, parameters,
360 modifiers, templated_types, body, namespace):
361 _GenericDeclaration.__init__(self, start, end, name, namespace)
362 converter = TypeConverter(namespace)
363 self.return_type = converter.CreateReturnType(return_type)
364 self.parameters = converter.ToParameters(parameters)
365 self.modifiers = modifiers
366 self.body = body
367 self.templated_types = templated_types
368
369 def IsDeclaration(self):
370 return self.body is None
371
372 def IsDefinition(self):
373 return self.body is not None
374
375 def IsExportable(self):
376 if self.return_type and 'static' in self.return_type.modifiers:
377 return False
378 return None not in self.namespace
379
380 def Requires(self, node):
381 if self.parameters:
382 # TODO(nnorwitz): parameters are tokens, do name comparison.
383 for p in self.parameters:
384 if p.name == node.name:
385 return True
386 # TODO(nnorwitz): search in body too.
387 return False
388
389 def __str__(self):
390 # TODO(nnorwitz): add templated_types.
391 suffix = ('%s %s(%s), 0x%02x, %s' %
392 (self.return_type, self.name, self.parameters,
393 self.modifiers, self.body))
394 return self._TypeStringHelper(suffix)
395
396
398 def __init__(self, start, end, name, in_class, return_type, parameters,
399 modifiers, templated_types, body, namespace):
400 Function.__init__(self, start, end, name, return_type, parameters,
401 modifiers, templated_types, body, namespace)
402 # TODO(nnorwitz): in_class could also be a namespace which can
403 # mess up finding functions properly.
404 self.in_class = in_class
405
406
408 """Type used for any variable (eg class, primitive, struct, etc)."""
409
410 def __init__(self, start, end, name, templated_types, modifiers,
411 reference, pointer, array):
412 """
413 Args:
414 name: str name of main type
415 templated_types: [Class (Type?)] template type info between <>
416 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
417 reference, pointer, array: bools
418 """
419 _GenericDeclaration.__init__(self, start, end, name, [])
420 self.templated_types = templated_types
421 if not name and modifiers:
422 self.namename = modifiers.pop()
423 self.modifiers = modifiers
424 self.reference = reference
425 self.pointer = pointer
426 self.array = array
427
428 def __str__(self):
429 prefix = ''
430 if self.modifiers:
431 prefix = ' '.join(self.modifiers) + ' '
432 name = str(self.namename)
433 if self.templated_types:
434 name += '<%s>' % self.templated_types
435 suffix = prefix + name
436 if self.reference:
437 suffix += '&'
438 if self.pointer:
439 suffix += '*'
440 if self.array:
441 suffix += '[]'
442 return self._TypeStringHelper(suffix)
443
444 # By definition, Is* are always False. A Type can only exist in
445 # some sort of variable declaration, parameter, or return value.
446 def IsDeclaration(self):
447 return False
448
449 def IsDefinition(self):
450 return False
451
452 def IsExportable(self):
453 return False
454
455
456class TypeConverter(object):
457
458 def __init__(self, namespace_stack):
459 self.namespace_stack = namespace_stack
460
461 def _GetTemplateEnd(self, tokens, start):
462 count = 1
463 end = start
464 while 1:
465 token = tokens[end]
466 end += 1
467 if token.name == '<':
468 count += 1
469 elif token.name == '>':
470 count -= 1
471 if count == 0:
472 break
473 return tokens[start:end-1], end
474
475 def ToType(self, tokens):
476 """Convert [Token,...] to [Class(...), ] useful for base classes.
477 For example, code like class Foo : public Bar<x, y> { ... };
478 the "Bar<x, y>" portion gets converted to an AST.
479
480 Returns:
481 [Class(...), ...]
482 """
483 result = []
484 name_tokens = []
485 reference = pointer = array = False
486
487 def AddType(templated_types):
488 # Partition tokens into name and modifier tokens.
489 names = []
490 modifiers = []
491 for t in name_tokens:
492 if keywords.IsKeyword(t.name):
493 modifiers.append(t.name)
494 else:
495 names.append(t.name)
496 name = ''.join(names)
497 if name_tokens:
498 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
499 name, templated_types, modifiers,
500 reference, pointer, array))
501 del name_tokens[:]
502
503 i = 0
504 end = len(tokens)
505 while i < end:
506 token = tokens[i]
507 if token.name == '<':
508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
509 AddType(self.ToType(new_tokens))
510 # If there is a comma after the template, we need to consume
511 # that here otherwise it becomes part of the name.
512 i = new_end
513 reference = pointer = array = False
514 elif token.name == ',':
515 AddType([])
516 reference = pointer = array = False
517 elif token.name == '*':
518 pointer = True
519 elif token.name == '&':
520 reference = True
521 elif token.name == '[':
522 pointer = True
523 elif token.name == ']':
524 pass
525 else:
526 name_tokens.append(token)
527 i += 1
528
529 if name_tokens:
530 # No '<' in the tokens, just a simple name and no template.
531 AddType([])
532 return result
533
534 def DeclarationToParts(self, parts, needs_name_removed):
535 name = None
536 default = []
537 if needs_name_removed:
538 # Handle default (initial) values properly.
539 for i, t in enumerate(parts):
540 if t.name == '=':
541 default = parts[i+1:]
542 name = parts[i-1].name
543 if name == ']' and parts[i-2].name == '[':
544 name = parts[i-3].name
545 i -= 1
546 parts = parts[:i-1]
547 break
548 else:
549 if parts[-1].token_type == tokenize.NAME:
550 name = parts.pop().name
551 else:
552 # TODO(nnorwitz): this is a hack that happens for code like
553 # Register(Foo<T>); where it thinks this is a function call
554 # but it's actually a declaration.
555 name = '???'
556 modifiers = []
557 type_name = []
558 other_tokens = []
559 templated_types = []
560 i = 0
561 end = len(parts)
562 while i < end:
563 p = parts[i]
564 if keywords.IsKeyword(p.name):
565 modifiers.append(p.name)
566 elif p.name == '<':
567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
568 templated_types = self.ToType(templated_tokens)
569 i = new_end - 1
570 # Don't add a spurious :: to data members being initialized.
571 next_index = i + 1
572 if next_index < end and parts[next_index].name == '::':
573 i += 1
574 elif p.name in ('[', ']', '='):
575 # These are handled elsewhere.
576 other_tokens.append(p)
577 elif p.name not in ('*', '&', '>'):
578 # Ensure that names have a space between them.
579 if (type_name and type_name[-1].token_type == tokenize.NAME and
580 p.token_type == tokenize.NAME):
581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
582 type_name.append(p)
583 else:
584 other_tokens.append(p)
585 i += 1
586 type_name = ''.join([t.name for t in type_name])
587 return name, type_name, templated_types, modifiers, default, other_tokens
588
589 def ToParameters(self, tokens):
590 if not tokens:
591 return []
592
593 result = []
594 name = type_name = ''
595 type_modifiers = []
596 pointer = reference = array = False
597 first_token = None
598 default = []
599
600 def AddParameter(end):
601 if default:
602 del default[0] # Remove flag.
603 parts = self.DeclarationToParts(type_modifiers, True)
604 (name, type_name, templated_types, modifiers,
605 unused_default, unused_other_tokens) = parts
606 parameter_type = Type(first_token.start, first_token.end,
607 type_name, templated_types, modifiers,
608 reference, pointer, array)
609 p = Parameter(first_token.start, end, name,
610 parameter_type, default)
611 result.append(p)
612
613 template_count = 0
614 brace_count = 0
615 for s in tokens:
616 if not first_token:
617 first_token = s
618
619 # Check for braces before templates, as we can have unmatched '<>'
620 # inside default arguments.
621 if s.name == '{':
622 brace_count += 1
623 elif s.name == '}':
624 brace_count -= 1
625 if brace_count > 0:
626 type_modifiers.append(s)
627 continue
628
629 if s.name == '<':
630 template_count += 1
631 elif s.name == '>':
632 template_count -= 1
633 if template_count > 0:
634 type_modifiers.append(s)
635 continue
636
637 if s.name == ',':
638 AddParameter(s.start)
639 name = type_name = ''
640 type_modifiers = []
641 pointer = reference = array = False
642 first_token = None
643 default = []
644 elif s.name == '*':
645 pointer = True
646 elif s.name == '&':
647 reference = True
648 elif s.name == '[':
649 array = True
650 elif s.name == ']':
651 pass # Just don't add to type_modifiers.
652 elif s.name == '=':
653 # Got a default value. Add any value (None) as a flag.
654 default.append(None)
655 elif default:
656 default.append(s)
657 else:
658 type_modifiers.append(s)
659 AddParameter(tokens[-1].end)
660 return result
661
662 def CreateReturnType(self, return_type_seq):
663 if not return_type_seq:
664 return None
665 start = return_type_seq[0].start
666 end = return_type_seq[-1].end
667 _, name, templated_types, modifiers, default, other_tokens = \
668 self.DeclarationToParts(return_type_seq, False)
669 names = [n.name for n in other_tokens]
670 reference = '&' in names
671 pointer = '*' in names
672 array = '[' in names
673 return Type(start, end, name, templated_types, modifiers,
674 reference, pointer, array)
675
676 def GetTemplateIndices(self, names):
677 # names is a list of strings.
678 start = names.index('<')
679 end = len(names) - 1
680 while end > 0:
681 if names[end] == '>':
682 break
683 end -= 1
684 return start, end+1
685
686class AstBuilder(object):
687 def __init__(self, token_stream, filename, in_class='', visibility=None,
688 namespace_stack=[]):
689 self.tokens = token_stream
690 self.filename = filename
691 # TODO(nnorwitz): use a better data structure (deque) for the queue.
692 # Switching directions of the "queue" improved perf by about 25%.
693 # Using a deque should be even better since we access from both sides.
694 self.token_queue = []
695 self.namespace_stack = namespace_stack[:]
696 self.in_class = in_class
697 if in_class is None:
699 else:
700 self.in_class_name_only = in_class.split('::')[-1]
701 self.visibility = visibility
702 self.in_function = False
703 self.current_token = None
704 # Keep the state whether we are currently handling a typedef or not.
705 self._handling_typedef = False
706
708
709 def HandleError(self, msg, token):
710 printable_queue = list(reversed(self.token_queue[-20:]))
711 sys.stderr.write('Got %s in %s @ %s %s\n' %
712 (msg, self.filename, token, printable_queue))
713
714 def Generate(self):
715 while 1:
716 token = self._GetNextToken()
717 if not token:
718 break
719
720 # Get the next token.
721 self.current_token = token
722
723 # Dispatch on the next token type.
724 if token.token_type == _INTERNAL_TOKEN:
725 if token.name == _NAMESPACE_POP:
726 self.namespace_stack.pop()
727 continue
728
729 try:
730 result = self._GenerateOne(token)
731 if result is not None:
732 yield result
733 except:
734 self.HandleError('exception', token)
735 raise
736
737 def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
738 ref_pointer_name_seq, templated_types, value=None):
739 reference = '&' in ref_pointer_name_seq
740 pointer = '*' in ref_pointer_name_seq
741 array = '[' in ref_pointer_name_seq
742 var_type = Type(pos_token.start, pos_token.end, type_name,
743 templated_types, type_modifiers,
744 reference, pointer, array)
745 return VariableDeclaration(pos_token.start, pos_token.end,
746 name, var_type, value, self.namespace_stack)
747
748 def _GenerateOne(self, token):
749 if token.token_type == tokenize.NAME:
750 if (keywords.IsKeyword(token.name) and
751 not keywords.IsBuiltinType(token.name)):
752 if token.name == 'enum':
753 # Pop the next token and only put it back if it's not
754 # 'class'. This allows us to support the two-token
755 # 'enum class' keyword as if it were simply 'enum'.
756 next = self._GetNextToken()
757 if next.name != 'class':
758 self._AddBackToken(next)
759
760 method = getattr(self, 'handle_' + token.name)
761 return method()
762 elif token.name == self.in_class_name_only:
763 # The token name is the same as the class, must be a ctor if
764 # there is a paren. Otherwise, it's the return type.
765 # Peek ahead to get the next token to figure out which.
766 next = self._GetNextToken()
767 self._AddBackToken(next)
768 if next.token_type == tokenize.SYNTAX and next.name == '(':
769 return self._GetMethod([token], FUNCTION_CTOR, None, True)
770 # Fall through--handle like any other method.
771
772 # Handle data or function declaration/definition.
773 syntax = tokenize.SYNTAX
774 temp_tokens, last_token = \
776 '(', ';', '{', '[')
777 temp_tokens.insert(0, token)
778 if last_token.name == '(':
779 # If there is an assignment before the paren,
780 # this is an expression, not a method.
781 expr = bool([e for e in temp_tokens if e.name == '='])
782 if expr:
783 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
784 temp_tokens.append(last_token)
785 temp_tokens.extend(new_temp)
786 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
787
788 if last_token.name == '[':
789 # Handle array, this isn't a method, unless it's an operator.
790 # TODO(nnorwitz): keep the size somewhere.
791 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
792 temp_tokens.append(last_token)
793 if temp_tokens[-2].name == 'operator':
794 temp_tokens.append(self._GetNextToken())
795 else:
796 temp_tokens2, last_token = \
797 self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
798 temp_tokens.extend(temp_tokens2)
799
800 if last_token.name == ';':
801 # Handle data, this isn't a method.
802 parts = self.converter.DeclarationToParts(temp_tokens, True)
803 (name, type_name, templated_types, modifiers, default,
804 unused_other_tokens) = parts
805
806 t0 = temp_tokens[0]
807 names = [t.name for t in temp_tokens]
808 if templated_types:
809 start, end = self.converter.GetTemplateIndices(names)
810 names = names[:start] + names[end:]
811 default = ''.join([t.name for t in default])
812 return self._CreateVariable(t0, name, type_name, modifiers,
813 names, templated_types, default)
814 if last_token.name == '{':
815 self._AddBackTokens(temp_tokens[1:])
816 self._AddBackToken(last_token)
817 method_name = temp_tokens[0].name
818 method = getattr(self, 'handle_' + method_name, None)
819 if not method:
820 # Must be declaring a variable.
821 # TODO(nnorwitz): handle the declaration.
822 return None
823 return method()
824 return self._GetMethod(temp_tokens, 0, None, False)
825 elif token.token_type == tokenize.SYNTAX:
826 if token.name == '~' and self.in_class:
827 # Must be a dtor (probably not in method body).
828 token = self._GetNextToken()
829 # self.in_class can contain A::Name, but the dtor will only
830 # be Name. Make sure to compare against the right value.
831 if (token.token_type == tokenize.NAME and
832 token.name == self.in_class_name_only):
833 return self._GetMethod([token], FUNCTION_DTOR, None, True)
834 # TODO(nnorwitz): handle a lot more syntax.
835 elif token.token_type == tokenize.PREPROCESSOR:
836 # TODO(nnorwitz): handle more preprocessor directives.
837 # token starts with a #, so remove it and strip whitespace.
838 name = token.name[1:].lstrip()
839 if name.startswith('include'):
840 # Remove "include".
841 name = name[7:].strip()
842 assert name
843 # Handle #include <newline> "header-on-second-line.h".
844 if name.startswith('\\'):
845 name = name[1:].strip()
846 assert name[0] in '<"', token
847 assert name[-1] in '>"', token
848 system = name[0] == '<'
849 filename = name[1:-1]
850 return Include(token.start, token.end, filename, system)
851 if name.startswith('define'):
852 # Remove "define".
853 name = name[6:].strip()
854 assert name
855 value = ''
856 for i, c in enumerate(name):
857 if c.isspace():
858 value = name[i:].lstrip()
859 name = name[:i]
860 break
861 return Define(token.start, token.end, name, value)
862 if name.startswith('if') and name[2:3].isspace():
863 condition = name[3:].strip()
864 if condition.startswith('0') or condition.startswith('(0)'):
865 self._SkipIf0Blocks()
866 return None
867
868 def _GetTokensUpTo(self, expected_token_type, expected_token):
869 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
870
871 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
872 last_token = self._GetNextToken()
873 tokens = []
874 while (last_token.token_type != expected_token_type or
875 last_token.name not in expected_tokens):
876 tokens.append(last_token)
877 last_token = self._GetNextToken()
878 return tokens, last_token
879
880 # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
881 # expected token.
882 def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type,
883 *expected_tokens):
884 last_token = self._GetNextToken()
885 tokens = []
886 nesting = 0
887 while (nesting > 0 or
888 last_token.token_type != expected_token_type or
889 last_token.name not in expected_tokens):
890 tokens.append(last_token)
891 last_token = self._GetNextToken()
892 if last_token.name == '<':
893 nesting += 1
894 elif last_token.name == '>':
895 nesting -= 1
896 return tokens, last_token
897
898 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
899 def _IgnoreUpTo(self, token_type, token):
900 unused_tokens = self._GetTokensUpTo(token_type, token)
901
902 def _SkipIf0Blocks(self):
903 count = 1
904 while 1:
905 token = self._GetNextToken()
906 if token.token_type != tokenize.PREPROCESSOR:
907 continue
908
909 name = token.name[1:].lstrip()
910 if name.startswith('endif'):
911 count -= 1
912 if count == 0:
913 break
914 elif name.startswith('if'):
915 count += 1
916
917 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
918 if GetNextToken is None:
919 GetNextToken = self._GetNextToken
920 # Assumes the current token is open_paren and we will consume
921 # and return up to the close_paren.
922 count = 1
923 token = GetNextToken()
924 while 1:
925 if token.token_type == tokenize.SYNTAX:
926 if token.name == open_paren:
927 count += 1
928 elif token.name == close_paren:
929 count -= 1
930 if count == 0:
931 break
932 yield token
933 token = GetNextToken()
934 yield token
935
936 def _GetParameters(self):
937 return self._GetMatchingChar('(', ')')
938
939 def GetScope(self):
940 return self._GetMatchingChar('{', '}')
941
942 def _GetNextToken(self):
943 if self.token_queue:
944 return self.token_queue.pop()
945 try:
946 return next(self.tokens)
947 except StopIteration:
948 return
949
950 def _AddBackToken(self, token):
951 if token.whence == tokenize.WHENCE_STREAM:
952 token.whence = tokenize.WHENCE_QUEUE
953 self.token_queue.insert(0, token)
954 else:
955 assert token.whence == tokenize.WHENCE_QUEUE, token
956 self.token_queue.append(token)
957
958 def _AddBackTokens(self, tokens):
959 if tokens:
960 if tokens[-1].whence == tokenize.WHENCE_STREAM:
961 for token in tokens:
962 token.whence = tokenize.WHENCE_QUEUE
963 self.token_queue[:0] = reversed(tokens)
964 else:
965 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
966 self.token_queue.extend(reversed(tokens))
967
968 def GetName(self, seq=None):
969 """Returns ([tokens], next_token_info)."""
970 GetNextToken = self._GetNextToken
971 if seq is not None:
972 it = iter(seq)
973 GetNextToken = lambda: next(it)
974 next_token = GetNextToken()
975 tokens = []
976 last_token_was_name = False
977 while (next_token.token_type == tokenize.NAME or
978 (next_token.token_type == tokenize.SYNTAX and
979 next_token.name in ('::', '<'))):
980 # Two NAMEs in a row means the identifier should terminate.
981 # It's probably some sort of variable declaration.
982 if last_token_was_name and next_token.token_type == tokenize.NAME:
983 break
984 last_token_was_name = next_token.token_type == tokenize.NAME
985 tokens.append(next_token)
986 # Handle templated names.
987 if next_token.name == '<':
988 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
989 last_token_was_name = True
990 next_token = GetNextToken()
991 return tokens, next_token
992
993 def GetMethod(self, modifiers, templated_types):
994 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
995 assert len(return_type_and_name) >= 1
996 return self._GetMethod(return_type_and_name, modifiers, templated_types,
997 False)
998
999 def _GetMethod(self, return_type_and_name, modifiers, templated_types,
1000 get_paren):
1001 template_portion = None
1002 if get_paren:
1003 token = self._GetNextToken()
1004 assert token.token_type == tokenize.SYNTAX, token
1005 if token.name == '<':
1006 # Handle templatized dtors.
1007 template_portion = [token]
1008 template_portion.extend(self._GetMatchingChar('<', '>'))
1009 token = self._GetNextToken()
1010 assert token.token_type == tokenize.SYNTAX, token
1011 assert token.name == '(', token
1012
1013 name = return_type_and_name.pop()
1014 # Handle templatized ctors.
1015 if name.name == '>':
1016 index = 1
1017 while return_type_and_name[index].name != '<':
1018 index += 1
1019 template_portion = return_type_and_name[index:] + [name]
1020 del return_type_and_name[index:]
1021 name = return_type_and_name.pop()
1022 elif name.name == ']':
1023 rt = return_type_and_name
1024 assert rt[-1].name == '[', return_type_and_name
1025 assert rt[-2].name == 'operator', return_type_and_name
1026 name_seq = return_type_and_name[-2:]
1027 del return_type_and_name[-2:]
1028 name = tokenize.Token(tokenize.NAME, 'operator[]',
1029 name_seq[0].start, name.end)
1030 # Get the open paren so _GetParameters() below works.
1031 unused_open_paren = self._GetNextToken()
1032
1033 # TODO(nnorwitz): store template_portion.
1034 return_type = return_type_and_name
1035 indices = name
1036 if return_type:
1037 indices = return_type[0]
1038
1039 # Force ctor for templatized ctors.
1040 if name.name == self.in_class and not modifiers:
1041 modifiers |= FUNCTION_CTOR
1042 parameters = list(self._GetParameters())
1043 del parameters[-1] # Remove trailing ')'.
1044
1045 # Handling operator() is especially weird.
1046 if name.name == 'operator' and not parameters:
1047 token = self._GetNextToken()
1048 assert token.name == '(', token
1049 parameters = list(self._GetParameters())
1050 del parameters[-1] # Remove trailing ')'.
1051
1052 token = self._GetNextToken()
1053 while token.token_type == tokenize.NAME:
1054 modifier_token = token
1055 token = self._GetNextToken()
1056 if modifier_token.name == 'const':
1057 modifiers |= FUNCTION_CONST
1058 elif modifier_token.name == '__attribute__':
1059 # TODO(nnorwitz): handle more __attribute__ details.
1060 modifiers |= FUNCTION_ATTRIBUTE
1061 assert token.name == '(', token
1062 # Consume everything between the (parens).
1063 unused_tokens = list(self._GetMatchingChar('(', ')'))
1064 token = self._GetNextToken()
1065 elif modifier_token.name == 'throw':
1066 modifiers |= FUNCTION_THROW
1067 assert token.name == '(', token
1068 # Consume everything between the (parens).
1069 unused_tokens = list(self._GetMatchingChar('(', ')'))
1070 token = self._GetNextToken()
1071 elif modifier_token.name == 'override':
1072 modifiers |= FUNCTION_OVERRIDE
1073 elif modifier_token.name == modifier_token.name.upper():
1074 # HACK(nnorwitz): assume that all upper-case names
1075 # are some macro we aren't expanding.
1076 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1077 else:
1078 self.HandleError('unexpected token', modifier_token)
1079
1080 assert token.token_type == tokenize.SYNTAX, token
1081 # Handle ctor initializers.
1082 if token.name == ':':
1083 # TODO(nnorwitz): anything else to handle for initializer list?
1084 while token.name != ';' and token.name != '{':
1085 token = self._GetNextToken()
1086
1087 # Handle pointer to functions that are really data but look
1088 # like method declarations.
1089 if token.name == '(':
1090 if parameters[0].name == '*':
1091 # name contains the return type.
1092 name = parameters.pop()
1093 # parameters contains the name of the data.
1094 modifiers = [p.name for p in parameters]
1095 # Already at the ( to open the parameter list.
1096 function_parameters = list(self._GetMatchingChar('(', ')'))
1097 del function_parameters[-1] # Remove trailing ')'.
1098 # TODO(nnorwitz): store the function_parameters.
1099 token = self._GetNextToken()
1100 assert token.token_type == tokenize.SYNTAX, token
1101 assert token.name == ';', token
1102 return self._CreateVariable(indices, name.name, indices.name,
1103 modifiers, '', None)
1104 # At this point, we got something like:
1105 # return_type (type::*name_)(params);
1106 # This is a data member called name_ that is a function pointer.
1107 # With this code: void (sq_type::*field_)(string&);
1108 # We get: name=void return_type=[] parameters=sq_type ... field_
1109 # TODO(nnorwitz): is return_type always empty?
1110 # TODO(nnorwitz): this isn't even close to being correct.
1111 # Just put in something so we don't crash and can move on.
1112 real_name = parameters[-1]
1113 modifiers = [p.name for p in self._GetParameters()]
1114 del modifiers[-1] # Remove trailing ')'.
1115 return self._CreateVariable(indices, real_name.name, indices.name,
1116 modifiers, '', None)
1117
1118 if token.name == '{':
1119 body = list(self.GetScope())
1120 del body[-1] # Remove trailing '}'.
1121 else:
1122 body = None
1123 if token.name == '=':
1124 token = self._GetNextToken()
1125
1126 if token.name == 'default' or token.name == 'delete':
1127 # Ignore explicitly defaulted and deleted special members
1128 # in C++11.
1129 token = self._GetNextToken()
1130 else:
1131 # Handle pure-virtual declarations.
1132 assert token.token_type == tokenize.CONSTANT, token
1133 assert token.name == '0', token
1134 modifiers |= FUNCTION_PURE_VIRTUAL
1135 token = self._GetNextToken()
1136
1137 if token.name == '[':
1138 # TODO(nnorwitz): store tokens and improve parsing.
1139 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1140 tokens = list(self._GetMatchingChar('[', ']'))
1141 token = self._GetNextToken()
1142
1143 assert token.name == ';', (token, return_type_and_name, parameters)
1144
1145 # Looks like we got a method, not a function.
1146 if len(return_type) > 2 and return_type[-1].name == '::':
1147 return_type, in_class = \
1148 self._GetReturnTypeAndClassName(return_type)
1149 return Method(indices.start, indices.end, name.name, in_class,
1150 return_type, parameters, modifiers, templated_types,
1151 body, self.namespace_stack)
1152 return Function(indices.start, indices.end, name.name, return_type,
1153 parameters, modifiers, templated_types, body,
1154 self.namespace_stack)
1155
1156 def _GetReturnTypeAndClassName(self, token_seq):
1157 # Splitting the return type from the class name in a method
1158 # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1159 # Where is the return type and where is the class name?
1160 # The heuristic used is to pull the last name as the class name.
1161 # This includes all the templated type info.
1162 # TODO(nnorwitz): if there is only One name like in the
1163 # example above, punt and assume the last bit is the class name.
1164
1165 # Ignore a :: prefix, if exists so we can find the first real name.
1166 i = 0
1167 if token_seq[0].name == '::':
1168 i = 1
1169 # Ignore a :: suffix, if exists.
1170 end = len(token_seq) - 1
1171 if token_seq[end-1].name == '::':
1172 end -= 1
1173
1174 # Make a copy of the sequence so we can append a sentinel
1175 # value. This is required for GetName will has to have some
1176 # terminating condition beyond the last name.
1177 seq_copy = token_seq[i:end]
1178 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1179 names = []
1180 while i < end:
1181 # Iterate through the sequence parsing out each name.
1182 new_name, next = self.GetName(seq_copy[i:])
1183 assert new_name, 'Got empty new_name, next=%s' % next
1184 # We got a pointer or ref. Add it to the name.
1185 if next and next.token_type == tokenize.SYNTAX:
1186 new_name.append(next)
1187 names.append(new_name)
1188 i += len(new_name)
1189
1190 # Now that we have the names, it's time to undo what we did.
1191
1192 # Remove the sentinel value.
1193 names[-1].pop()
1194 # Flatten the token sequence for the return type.
1195 return_type = [e for seq in names[:-1] for e in seq]
1196 # The class name is the last name.
1197 class_name = names[-1]
1198 return return_type, class_name
1199
1200 def handle_bool(self):
1201 pass
1202
1203 def handle_char(self):
1204 pass
1205
1206 def handle_int(self):
1207 pass
1208
1209 def handle_long(self):
1210 pass
1211
1212 def handle_short(self):
1213 pass
1214
1215 def handle_double(self):
1216 pass
1217
1218 def handle_float(self):
1219 pass
1220
1221 def handle_void(self):
1222 pass
1223
1225 pass
1226
1228 pass
1229
1230 def handle_signed(self):
1231 pass
1232
1233 def _GetNestedType(self, ctor):
1234 name = None
1235 name_tokens, token = self.GetName()
1236 if name_tokens:
1237 name = ''.join([t.name for t in name_tokens])
1238
1239 # Handle forward declarations.
1240 if token.token_type == tokenize.SYNTAX and token.name == ';':
1241 return ctor(token.start, token.end, name, None,
1242 self.namespace_stack)
1243
1244 if token.token_type == tokenize.NAME and self._handling_typedef:
1245 self._AddBackToken(token)
1246 return ctor(token.start, token.end, name, None,
1247 self.namespace_stack)
1248
1249 # Must be the type declaration.
1250 fields = list(self._GetMatchingChar('{', '}'))
1251 del fields[-1] # Remove trailing '}'.
1252 if token.token_type == tokenize.SYNTAX and token.name == '{':
1253 next = self._GetNextToken()
1254 new_type = ctor(token.start, token.end, name, fields,
1255 self.namespace_stack)
1256 # A name means this is an anonymous type and the name
1257 # is the variable declaration.
1258 if next.token_type != tokenize.NAME:
1259 return new_type
1260 name = new_type
1261 token = next
1262
1263 # Must be variable declaration using the type prefixed with keyword.
1264 assert token.token_type == tokenize.NAME, token
1265 return self._CreateVariable(token, token.name, name, [], '', None)
1266
1267 def handle_struct(self):
1268 # Special case the handling typedef/aliasing of structs here.
1269 # It would be a pain to handle in the class code.
1270 name_tokens, var_token = self.GetName()
1271 if name_tokens:
1272 next_token = self._GetNextToken()
1273 is_syntax = (var_token.token_type == tokenize.SYNTAX and
1274 var_token.name[0] in '*&')
1275 is_variable = (var_token.token_type == tokenize.NAME and
1276 next_token.name == ';')
1277 variable = var_token
1278 if is_syntax and not is_variable:
1279 variable = next_token
1280 temp = self._GetNextToken()
1281 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1282 # Handle methods declared to return a struct.
1283 t0 = name_tokens[0]
1284 struct = tokenize.Token(tokenize.NAME, 'struct',
1285 t0.start-7, t0.start-2)
1286 type_and_name = [struct]
1287 type_and_name.extend(name_tokens)
1288 type_and_name.extend((var_token, next_token))
1289 return self._GetMethod(type_and_name, 0, None, False)
1290 assert temp.name == ';', (temp, name_tokens, var_token)
1291 if is_syntax or (is_variable and not self._handling_typedef):
1292 modifiers = ['struct']
1293 type_name = ''.join([t.name for t in name_tokens])
1294 position = name_tokens[0]
1295 return self._CreateVariable(position, variable.name, type_name,
1296 modifiers, var_token.name, None)
1297 name_tokens.extend((var_token, next_token))
1298 self._AddBackTokens(name_tokens)
1299 else:
1300 self._AddBackToken(var_token)
1301 return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1302
1303 def handle_union(self):
1304 return self._GetNestedType(Union)
1305
1306 def handle_enum(self):
1307 return self._GetNestedType(Enum)
1308
1309 def handle_auto(self):
1310 # TODO(nnorwitz): warn about using auto? Probably not since it
1311 # will be reclaimed and useful for C++0x.
1312 pass
1313
1315 pass
1316
1317 def handle_const(self):
1318 pass
1319
1320 def handle_inline(self):
1321 pass
1322
1323 def handle_extern(self):
1324 pass
1325
1326 def handle_static(self):
1327 pass
1328
1330 # What follows must be a method.
1331 token = token2 = self._GetNextToken()
1332 if token.name == 'inline':
1333 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1334 token2 = self._GetNextToken()
1335 if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1336 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1337 assert token.token_type == tokenize.NAME or token.name == '::', token
1338 return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates(
1339 tokenize.SYNTAX, '(') # )
1340 return_type_and_name.insert(0, token)
1341 if token2 is not token:
1342 return_type_and_name.insert(1, token2)
1343 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1344 None, False)
1345
1347 pass
1348
1350 pass
1351
1352 def handle_public(self):
1353 assert self.in_class
1354 self.visibility = VISIBILITY_PUBLIC
1355
1357 assert self.in_class
1358 self.visibility = VISIBILITY_PROTECTED
1359
1361 assert self.in_class
1362 self.visibility = VISIBILITY_PRIVATE
1363
1364 def handle_friend(self):
1365 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1366 assert tokens
1367 t0 = tokens[0]
1368 return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1369
1371 pass
1372
1374 pass
1375
1377 pass
1378
1380 pass
1381
1382 def handle_new(self):
1383 pass
1384
1385 def handle_delete(self):
1386 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1387 assert tokens
1388 return Delete(tokens[0].start, tokens[0].end, tokens)
1389
1391 token = self._GetNextToken()
1392 if (token.token_type == tokenize.NAME and
1393 keywords.IsKeyword(token.name)):
1394 # Token must be struct/enum/union/class.
1395 method = getattr(self, 'handle_' + token.name)
1396 self._handling_typedef = True
1397 tokens = [method()]
1398 self._handling_typedef = False
1399 else:
1400 tokens = [token]
1401
1402 # Get the remainder of the typedef up to the semi-colon.
1403 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1404
1405 # TODO(nnorwitz): clean all this up.
1406 assert tokens
1407 name = tokens.pop()
1408 indices = name
1409 if tokens:
1410 indices = tokens[0]
1411 if not indices:
1412 indices = token
1413 if name.name == ')':
1414 # HACK(nnorwitz): Handle pointers to functions "properly".
1415 if (len(tokens) >= 4 and
1416 tokens[1].name == '(' and tokens[2].name == '*'):
1417 tokens.append(name)
1418 name = tokens[3]
1419 elif name.name == ']':
1420 # HACK(nnorwitz): Handle arrays properly.
1421 if len(tokens) >= 2:
1422 tokens.append(name)
1423 name = tokens[1]
1424 new_type = tokens
1425 if tokens and isinstance(tokens[0], tokenize.Token):
1426 new_type = self.converter.ToType(tokens)[0]
1427 return Typedef(indices.start, indices.end, name.name,
1428 new_type, self.namespace_stack)
1429
1430 def handle_typeid(self):
1431 pass # Not needed yet.
1432
1434 pass # Not needed yet.
1435
1436 def _GetTemplatedTypes(self):
1437 result = collections.OrderedDict()
1438 tokens = list(self._GetMatchingChar('<', '>'))
1439 len_tokens = len(tokens) - 1 # Ignore trailing '>'.
1440 i = 0
1441 while i < len_tokens:
1442 key = tokens[i].name
1443 i += 1
1444 if keywords.IsKeyword(key) or key == ',':
1445 continue
1446 type_name = default = None
1447 if i < len_tokens:
1448 i += 1
1449 if tokens[i-1].name == '=':
1450 assert i < len_tokens, '%s %s' % (i, tokens)
1451 default, unused_next_token = self.GetName(tokens[i:])
1452 i += len(default)
1453 else:
1454 if tokens[i-1].name != ',':
1455 # We got something like: Type variable.
1456 # Re-adjust the key (variable) and type_name (Type).
1457 key = tokens[i-1].name
1458 type_name = tokens[i-2]
1459
1460 result[key] = (type_name, default)
1461 return result
1462
1464 token = self._GetNextToken()
1465 assert token.token_type == tokenize.SYNTAX, token
1466 assert token.name == '<', token
1467 templated_types = self._GetTemplatedTypes()
1468 # TODO(nnorwitz): for now, just ignore the template params.
1469 token = self._GetNextToken()
1470 if token.token_type == tokenize.NAME:
1471 if token.name == 'class':
1472 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1473 elif token.name == 'struct':
1474 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1475 elif token.name == 'friend':
1476 return self.handle_friend()
1477 self._AddBackToken(token)
1478 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1479 tokens.append(last)
1480 self._AddBackTokens(tokens)
1481 if last.name == '(':
1482 return self.GetMethod(FUNCTION_NONE, templated_types)
1483 # Must be a variable definition.
1484 return None
1485
1486 def handle_true(self):
1487 pass # Nothing to do.
1488
1489 def handle_false(self):
1490 pass # Nothing to do.
1491
1492 def handle_asm(self):
1493 pass # Not needed yet.
1494
1495 def handle_class(self):
1496 return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1497
1498 def _GetBases(self):
1499 # Get base classes.
1500 bases = []
1501 while 1:
1502 token = self._GetNextToken()
1503 assert token.token_type == tokenize.NAME, token
1504 # TODO(nnorwitz): store kind of inheritance...maybe.
1505 if token.name not in ('public', 'protected', 'private'):
1506 # If inheritance type is not specified, it is private.
1507 # Just put the token back so we can form a name.
1508 # TODO(nnorwitz): it would be good to warn about this.
1509 self._AddBackToken(token)
1510 else:
1511 # Check for virtual inheritance.
1512 token = self._GetNextToken()
1513 if token.name != 'virtual':
1514 self._AddBackToken(token)
1515 else:
1516 # TODO(nnorwitz): store that we got virtual for this base.
1517 pass
1518 base, next_token = self.GetName()
1519 bases_ast = self.converter.ToType(base)
1520 assert len(bases_ast) == 1, bases_ast
1521 bases.append(bases_ast[0])
1522 assert next_token.token_type == tokenize.SYNTAX, next_token
1523 if next_token.name == '{':
1524 token = next_token
1525 break
1526 # Support multiple inheritance.
1527 assert next_token.name == ',', next_token
1528 return bases, token
1529
1530 def _GetClass(self, class_type, visibility, templated_types):
1531 class_name = None
1532 class_token = self._GetNextToken()
1533 if class_token.token_type != tokenize.NAME:
1534 assert class_token.token_type == tokenize.SYNTAX, class_token
1535 token = class_token
1536 else:
1537 # Skip any macro (e.g. storage class specifiers) after the
1538 # 'class' keyword.
1539 next_token = self._GetNextToken()
1540 if next_token.token_type == tokenize.NAME:
1541 self._AddBackToken(next_token)
1542 else:
1543 self._AddBackTokens([class_token, next_token])
1544 name_tokens, token = self.GetName()
1545 class_name = ''.join([t.name for t in name_tokens])
1546 bases = None
1547 if token.token_type == tokenize.SYNTAX:
1548 if token.name == ';':
1549 # Forward declaration.
1550 return class_type(class_token.start, class_token.end,
1551 class_name, None, templated_types, None,
1552 self.namespace_stack)
1553 if token.name in '*&':
1554 # Inline forward declaration. Could be method or data.
1555 name_token = self._GetNextToken()
1556 next_token = self._GetNextToken()
1557 if next_token.name == ';':
1558 # Handle data
1559 modifiers = ['class']
1560 return self._CreateVariable(class_token, name_token.name,
1561 class_name,
1562 modifiers, token.name, None)
1563 else:
1564 # Assume this is a method.
1565 tokens = (class_token, token, name_token, next_token)
1566 self._AddBackTokens(tokens)
1567 return self.GetMethod(FUNCTION_NONE, None)
1568 if token.name == ':':
1569 bases, token = self._GetBases()
1570
1571 body = None
1572 if token.token_type == tokenize.SYNTAX and token.name == '{':
1573 assert token.token_type == tokenize.SYNTAX, token
1574 assert token.name == '{', token
1575
1576 ast = AstBuilder(self.GetScope(), self.filename, class_name,
1577 visibility, self.namespace_stack)
1578 body = list(ast.Generate())
1579
1580 if not self._handling_typedef:
1581 token = self._GetNextToken()
1582 if token.token_type != tokenize.NAME:
1583 assert token.token_type == tokenize.SYNTAX, token
1584 assert token.name == ';', token
1585 else:
1586 new_class = class_type(class_token.start, class_token.end,
1587 class_name, bases, None,
1588 body, self.namespace_stack)
1589
1590 modifiers = []
1591 return self._CreateVariable(class_token,
1592 token.name, new_class,
1593 modifiers, token.name, None)
1594 else:
1595 if not self._handling_typedef:
1596 self.HandleError('non-typedef token', token)
1597 self._AddBackToken(token)
1598
1599 return class_type(class_token.start, class_token.end, class_name,
1600 bases, templated_types, body, self.namespace_stack)
1601
1603 # Support anonymous namespaces.
1604 name = None
1605 name_tokens, token = self.GetName()
1606 if name_tokens:
1607 name = ''.join([t.name for t in name_tokens])
1608 self.namespace_stack.append(name)
1609 assert token.token_type == tokenize.SYNTAX, token
1610 # Create an internal token that denotes when the namespace is complete.
1611 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1612 None, None)
1613 internal_token.whence = token.whence
1614 if token.name == '=':
1615 # TODO(nnorwitz): handle aliasing namespaces.
1616 name, next_token = self.GetName()
1617 assert next_token.name == ';', next_token
1618 self._AddBackToken(internal_token)
1619 else:
1620 assert token.name == '{', token
1621 tokens = list(self.GetScope())
1622 # Replace the trailing } with the internal namespace pop token.
1623 tokens[-1] = internal_token
1624 # Handle namespace with nothing in it.
1625 self._AddBackTokens(tokens)
1626 return None
1627
1628 def handle_using(self):
1629 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1630 assert tokens
1631 return Using(tokens[0].start, tokens[0].end, tokens)
1632
1634 assert self.in_class
1635 # Nothing much to do.
1636 # TODO(nnorwitz): maybe verify the method name == class name.
1637 # This must be a ctor.
1638 return self.GetMethod(FUNCTION_CTOR, None)
1639
1640 def handle_this(self):
1641 pass # Nothing to do.
1642
1644 # Pull off the next token(s?) and make that part of the method name.
1645 pass
1646
1647 def handle_sizeof(self):
1648 pass
1649
1650 def handle_case(self):
1651 pass
1652
1653 def handle_switch(self):
1654 pass
1655
1657 token = self._GetNextToken()
1658 assert token.token_type == tokenize.SYNTAX
1659 assert token.name == ':'
1660
1661 def handle_if(self):
1662 pass
1663
1664 def handle_else(self):
1665 pass
1666
1667 def handle_return(self):
1668 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1669 if not tokens:
1670 return Return(self.current_token.start, self.current_token.end, None)
1671 return Return(tokens[0].start, tokens[0].end, tokens)
1672
1673 def handle_goto(self):
1674 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1675 assert len(tokens) == 1, str(tokens)
1676 return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1677
1678 def handle_try(self):
1679 pass # Not needed yet.
1680
1681 def handle_catch(self):
1682 pass # Not needed yet.
1683
1684 def handle_throw(self):
1685 pass # Not needed yet.
1686
1687 def handle_while(self):
1688 pass
1689
1690 def handle_do(self):
1691 pass
1692
1693 def handle_for(self):
1694 pass
1695
1696 def handle_break(self):
1697 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1698
1700 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1701
1702
1703def BuilderFromSource(source, filename):
1704 """Utility method that returns an AstBuilder from source code.
1705
1706 Args:
1707 source: 'C++ source code'
1708 filename: 'file1'
1709
1710 Returns:
1711 AstBuilder
1712 """
1713 return AstBuilder(tokenize.GetTokens(source), filename)
1714
1715
1716def PrintIndentifiers(filename, should_print):
1717 """Prints all identifiers for a C++ source file.
1718
1719 Args:
1720 filename: 'file1'
1721 should_print: predicate with signature: bool Function(token)
1722 """
1723 source = utils.ReadFile(filename, False)
1724 if source is None:
1725 sys.stderr.write('Unable to find: %s\n' % filename)
1726 return
1727
1728 #print('Processing %s' % actual_filename)
1729 builder = BuilderFromSource(source, filename)
1730 try:
1731 for node in builder.Generate():
1732 if should_print(node):
1733 print(node.name)
1734 except KeyboardInterrupt:
1735 return
1736 except:
1737 pass
1738
1739
1740def PrintAllIndentifiers(filenames, should_print):
1741 """Prints all identifiers for each C++ source file in filenames.
1742
1743 Args:
1744 filenames: ['file1', 'file2', ...]
1745 should_print: predicate with signature: bool Function(token)
1746 """
1747 for path in filenames:
1748 PrintIndentifiers(path, should_print)
1749
1750
1751def main(argv):
1752 for filename in argv[1:]:
1753 source = utils.ReadFile(filename)
1754 if source is None:
1755 continue
1756
1757 print('Processing %s' % filename)
1758 builder = BuilderFromSource(source, filename)
1759 try:
1760 entire_ast = filter(None, builder.Generate())
1761 except KeyboardInterrupt:
1762 return
1763 except:
1764 # Already printed a warning, print the traceback and continue.
1765 traceback.print_exc()
1766 else:
1767 if utils.DEBUG:
1768 for ast in entire_ast:
1769 print(ast)
1770
1771
1772if __name__ == '__main__':
1773 main(sys.argv)
LIST pop(LIST list)
Definition: oldlist.cpp:166
def reversed(seq)
Definition: ast.py:50
def next(obj)
Definition: ast.py:56
def BuilderFromSource(source, filename)
Definition: ast.py:1703
def PrintIndentifiers(filename, should_print)
Definition: ast.py:1716
def PrintAllIndentifiers(filenames, should_print)
Definition: ast.py:1740
def main(argv)
Definition: ast.py:1751
def __init__(self, start, end)
Definition: ast.py:110
def IsExportable(self)
Definition: ast.py:122
def Requires(self, node)
Definition: ast.py:126
def IsDefinition(self)
Definition: ast.py:118
def IsDeclaration(self)
Definition: ast.py:114
def XXX__str__(self)
Definition: ast.py:130
def __repr__(self)
Definition: ast.py:138
def _StringHelper(self, name, suffix)
Definition: ast.py:133
def __str__(self)
Definition: ast.py:148
def __init__(self, start, end, name, definition)
Definition: ast.py:143
def __init__(self, start, end, filename, system)
Definition: ast.py:154
def __str__(self)
Definition: ast.py:159
def __str__(self)
Definition: ast.py:171
def __init__(self, start, end, label)
Definition: ast.py:167
def __str__(self)
Definition: ast.py:184
def Requires(self, node)
Definition: ast.py:180
def __init__(self, start, end, expr)
Definition: ast.py:176
def __init__(self, start, end, expr, namespace)
Definition: ast.py:197
def __init__(self, start, end, names)
Definition: ast.py:203
def __str__(self)
Definition: ast.py:207
def Requires(self, node)
Definition: ast.py:218
def __init__(self, start, end, name, parameter_type, default)
Definition: ast.py:212
def __str__(self)
Definition: ast.py:222
def _TypeStringHelper(self, suffix)
Definition: ast.py:242
def __init__(self, start, end, name, namespace)
Definition: ast.py:231
def Requires(self, node)
Definition: ast.py:256
def __init__(self, start, end, name, var_type, initial_value, namespace)
Definition: ast.py:251
def IsDefinition(self)
Definition: ast.py:276
def __str__(self)
Definition: ast.py:290
def Requires(self, node)
Definition: ast.py:282
def IsExportable(self)
Definition: ast.py:279
def __init__(self, start, end, name, alias, namespace)
Definition: ast.py:272
def __str__(self)
Definition: ast.py:306
def __init__(self, start, end, name, fields, namespace)
Definition: ast.py:296
def IsExportable(self)
Definition: ast.py:303
def IsDefinition(self)
Definition: ast.py:300
def __str__(self)
Definition: ast.py:346
def IsExportable(self)
Definition: ast.py:332
def Requires(self, node)
Definition: ast.py:335
def IsDeclaration(self)
Definition: ast.py:326
templated_types
Definition: ast.py:324
def __init__(self, start, end, name, bases, templated_types, body, namespace)
Definition: ast.py:320
def IsDefinition(self)
Definition: ast.py:329
def IsExportable(self)
Definition: ast.py:375
def Requires(self, node)
Definition: ast.py:380
def __str__(self)
Definition: ast.py:389
def __init__(self, start, end, name, return_type, parameters, modifiers, templated_types, body, namespace)
Definition: ast.py:360
def IsDeclaration(self)
Definition: ast.py:369
def IsDefinition(self)
Definition: ast.py:372
def __init__(self, start, end, name, in_class, return_type, parameters, modifiers, templated_types, body, namespace)
Definition: ast.py:399
def __str__(self)
Definition: ast.py:428
def IsDeclaration(self)
Definition: ast.py:446
templated_types
Definition: ast.py:420
def IsExportable(self)
Definition: ast.py:452
def __init__(self, start, end, name, templated_types, modifiers, reference, pointer, array)
Definition: ast.py:411
def IsDefinition(self)
Definition: ast.py:449
def GetTemplateIndices(self, names)
Definition: ast.py:676
def DeclarationToParts(self, parts, needs_name_removed)
Definition: ast.py:534
def CreateReturnType(self, return_type_seq)
Definition: ast.py:662
def ToType(self, tokens)
Definition: ast.py:475
def _GetTemplateEnd(self, tokens, start)
Definition: ast.py:461
def __init__(self, namespace_stack)
Definition: ast.py:458
def ToParameters(self, tokens)
Definition: ast.py:589
def handle_for(self)
Definition: ast.py:1693
def handle_reinterpret_cast(self)
Definition: ast.py:1379
def _GetTemplatedTypes(self)
Definition: ast.py:1436
def handle_long(self)
Definition: ast.py:1209
def handle_auto(self)
Definition: ast.py:1309
def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type, *expected_tokens)
Definition: ast.py:883
def handle_template(self)
Definition: ast.py:1463
def handle_switch(self)
Definition: ast.py:1653
def handle_public(self)
Definition: ast.py:1352
def _AddBackToken(self, token)
Definition: ast.py:950
def handle_break(self)
Definition: ast.py:1696
def handle_volatile(self)
Definition: ast.py:1346
def _SkipIf0Blocks(self)
Definition: ast.py:902
def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None)
Definition: ast.py:917
def GetName(self, seq=None)
Definition: ast.py:968
def handle_enum(self)
Definition: ast.py:1306
def handle_if(self)
Definition: ast.py:1661
def HandleError(self, msg, token)
Definition: ast.py:709
def handle_virtual(self)
Definition: ast.py:1329
def handle_extern(self)
Definition: ast.py:1323
def handle_typename(self)
Definition: ast.py:1433
def handle_const_cast(self)
Definition: ast.py:1373
def handle_wchar_t(self)
Definition: ast.py:1224
def handle_continue(self)
Definition: ast.py:1699
def handle_do(self)
Definition: ast.py:1690
def _GetBases(self)
Definition: ast.py:1498
def handle_explicit(self)
Definition: ast.py:1633
def _GetNestedType(self, ctor)
Definition: ast.py:1233
def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens)
Definition: ast.py:871
def handle_signed(self)
Definition: ast.py:1230
def handle_dynamic_cast(self)
Definition: ast.py:1376
def handle_mutable(self)
Definition: ast.py:1349
def handle_default(self)
Definition: ast.py:1656
def handle_try(self)
Definition: ast.py:1678
def handle_int(self)
Definition: ast.py:1206
def handle_using(self)
Definition: ast.py:1628
def handle_unsigned(self)
Definition: ast.py:1227
def handle_void(self)
Definition: ast.py:1221
def handle_operator(self)
Definition: ast.py:1643
def handle_typedef(self)
Definition: ast.py:1390
def handle_return(self)
Definition: ast.py:1667
def handle_goto(self)
Definition: ast.py:1673
def handle_new(self)
Definition: ast.py:1382
def handle_register(self)
Definition: ast.py:1314
def _GetParameters(self)
Definition: ast.py:936
def handle_private(self)
Definition: ast.py:1360
def handle_class(self)
Definition: ast.py:1495
def _GetTokensUpTo(self, expected_token_type, expected_token)
Definition: ast.py:868
def handle_union(self)
Definition: ast.py:1303
def handle_const(self)
Definition: ast.py:1317
def _AddBackTokens(self, tokens)
Definition: ast.py:958
def handle_catch(self)
Definition: ast.py:1681
def handle_protected(self)
Definition: ast.py:1356
def handle_delete(self)
Definition: ast.py:1385
def handle_float(self)
Definition: ast.py:1218
def handle_while(self)
Definition: ast.py:1687
def handle_struct(self)
Definition: ast.py:1267
def __init__(self, token_stream, filename, in_class='', visibility=None, namespace_stack=[])
Definition: ast.py:688
def handle_inline(self)
Definition: ast.py:1320
def handle_static_cast(self)
Definition: ast.py:1370
def handle_friend(self)
Definition: ast.py:1364
def _GetReturnTypeAndClassName(self, token_seq)
Definition: ast.py:1156
def _GetNextToken(self)
Definition: ast.py:942
def handle_typeid(self)
Definition: ast.py:1430
def handle_case(self)
Definition: ast.py:1650
def handle_bool(self)
Definition: ast.py:1200
def handle_asm(self)
Definition: ast.py:1492
def handle_sizeof(self)
Definition: ast.py:1647
def handle_double(self)
Definition: ast.py:1215
def _GetMethod(self, return_type_and_name, modifiers, templated_types, get_paren)
Definition: ast.py:1000
def handle_true(self)
Definition: ast.py:1486
def handle_throw(self)
Definition: ast.py:1684
def handle_this(self)
Definition: ast.py:1640
def handle_static(self)
Definition: ast.py:1326
def GetScope(self)
Definition: ast.py:939
def handle_short(self)
Definition: ast.py:1212
def handle_char(self)
Definition: ast.py:1203
def _GenerateOne(self, token)
Definition: ast.py:748
def handle_namespace(self)
Definition: ast.py:1602
def handle_else(self)
Definition: ast.py:1664
def Generate(self)
Definition: ast.py:714
def _CreateVariable(self, pos_token, name, type_name, type_modifiers, ref_pointer_name_seq, templated_types, value=None)
Definition: ast.py:738
def GetMethod(self, modifiers, templated_types)
Definition: ast.py:993
def _GetClass(self, class_type, visibility, templated_types)
Definition: ast.py:1530
def handle_false(self)
Definition: ast.py:1489
def _IgnoreUpTo(self, token_type, token)
Definition: ast.py:899