1 #-----------------------------------------------------------------------------
4 # Author: David M. Beazley (beazley@cs.uchicago.edu)
5 # Department of Computer Science
6 # University of Chicago
9 # Copyright (C) 2001, David M. Beazley
11 # $Header: /home/davidb/src/cvs/python-rwhoisd/rwhoisd/yacc.py,v 1.1 2003/04/22 02:19:07 davidb Exp $
13 # This library is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU Lesser General Public
15 # License as published by the Free Software Foundation; either
16 # version 2.1 of the License, or (at your option) any later version.
18 # This library is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # Lesser General Public License for more details.
23 # You should have received a copy of the GNU Lesser General Public
24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 # See the file COPYING for a complete copy of the LGPL.
30 # This implements an LR parser that is constructed from grammar rules defined
31 # as Python functions. Roughly speaking, this module is a cross between
32 # John Aycock's Spark system and the GNU bison utility.
34 # Disclaimer: This is a work in progress. SLR parsing seems to work fairly
35 # well and there is extensive error checking. LALR(1) is in progress. The
36 # rest of this file is a bit of a mess. Please pardon the dust.
38 # The current implementation is only somewhat object-oriented. The
39 # LR parser itself is defined in terms of an object (which allows multiple
40 # parsers to co-exist). However, most of the variables used during table
41 # construction are defined in terms of global variables. Users shouldn't
42 # notice unless they are trying to define multiple parsers at the same
43 # time using threads (in which case they should have their head examined).
44 #-----------------------------------------------------------------------------
48 #-----------------------------------------------------------------------------
49 # === User configurable parameters ===
51 # Change these to modify the default behavior of yacc (if you wish)
52 #-----------------------------------------------------------------------------
54 yaccdebug = 1 # Debugging mode. If set, yacc generates a
55 # a 'parser.out' file in the current directory
57 debug_file = 'parser.out' # Default name of the debugging file
58 tab_module = 'parsetab' # Default name of the table module
59 default_lr = 'SLR' # Default LR table generation method
61 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
63 import re, types, sys, cStringIO, md5, os.path
65 # Exception raised for yacc-related errors
66 class YaccError(Exception): pass
68 #-----------------------------------------------------------------------------
69 # === LR Parsing Engine ===
71 # The following classes are used for the LR parser itself. These are not
72 # used during table construction and are independent of the actual LR
73 # table generation algorithm
74 #-----------------------------------------------------------------------------
76 # This class is used to hold non-terminal grammar symbols during parsing.
77 # It normally has the following attributes set:
78 # .type = Grammar symbol type
79 # .value = Symbol value
80 # .lineno = Starting line number
81 # .endlineno = Ending line number (optional, set automatically)
84 def __str__(self): return self.type
85 def __repr__(self): return str(self)
87 # This class is a wrapper around the objects actually passed to each
88 # grammar rule. Index lookup and assignment actually assign the
89 # .value attribute of the underlying YaccSymbol object.
90 # The lineno() method returns the line number of a given
91 # item (or 0 if not defined). The linespan() method returns
92 # a tuple of (startline,endline) representing the range of lines
100 def __getitem__(self,n):
101 return self.slice[n].value
103 def __setitem__(self,n,v):
104 self.slice[n].value = v
107 return getattr(self.slice[n],"lineno",0)
109 def linespan(self,n):
110 startline = getattr(self.slice[n],"lineno",0)
111 endline = getattr(self.slice[n],"endlineno",startline)
112 return startline,endline
114 def pushback(self,n):
116 raise ValueError, "Expected a positive value"
117 if n > (len(self.slice)-1):
118 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
120 self.pbstack.append(self.slice[-i-1])
122 # The LR Parsing engine. This is defined as a class so that multiple parsers
123 # can exist in the same process. A user never instantiates this directly.
124 # Instead, the global yacc() function should be used to create a suitable Parser
128 def __init__(self,magic=None):
130 # This is a hack to keep users from trying to instantiate a Parser
134 raise YaccError, "Can't instantiate Parser. Use yacc() instead."
136 # Reset internal state
137 self.productions = None # List of productions
138 self.errorfunc = None # Error handling function
139 self.action = { } # LR Action table
140 self.goto = { } # LR goto table
141 self.require = { } # Attribute require table
142 self.method = "Unknown LR" # Table construction method used
148 del self.statestack[:]
152 self.symstack.append(sym)
153 self.statestack.append(0)
155 def parse(self,input=None,lexer=None,debug=0):
156 lookahead = None # Current lookahead symbol
157 lookaheadstack = [ ] # Stack of lookahead symbols
158 actions = self.action # Local reference to action table
159 goto = self.goto # Local reference to goto table
160 prod = self.productions # Local reference to production list
161 pslice = YaccSlice(None) # Slice object passed to grammar rules
162 pslice.parser = self # Parser object
163 self.errorcount = 0 # Used during error recovery
165 # If no lexer was given, we will try to use the lex module
171 # If input was supplied, pass to lexer
176 get_token = lexer.token
178 statestack = [ ] # Stack of parsing states
179 self.statestack = statestack
180 symstack = [ ] # Stack of grammar symbols
181 self.symstack = symstack
183 errtoken = None # Err token
185 # The start state is assumed to be (0,$)
192 # Get the next symbol on the input. If a lookahead symbol
193 # is already set, we just use that. Otherwise, we'll pull
194 # the next token off of the lookaheadstack or from the lexer
196 if not lookaheadstack:
197 lookahead = get_token() # Get the next token
199 lookahead = lookaheadstack.pop()
201 lookahead = YaccSymbol()
204 print "%-20s : %s" % (lookahead, [xx.type for xx in symstack])
206 # Check the action table
208 ltype = lookahead.type
209 t = actions.get((s,ltype),None)
213 # shift a symbol on the stack
215 # Error, end of input
216 print "yacc: Parse error. EOF"
219 symstack.append(lookahead)
222 # Decrease error count on successful shift
223 if self.errorcount > 0:
229 # reduce a symbol on the stack, emit a production
234 # Get production function
236 sym.type = pname # Production name
240 targ = symstack[-plen-1:]
243 sym.lineno = targ[1].lineno
244 sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
245 except AttributeError:
248 del statestack[-plen:]
254 # Call the grammar rule with our special slice object
257 # Validate attributes of the resulting value attribute
261 # r = Requires.get(t0.type,None)
266 # for fname in field:
270 # except StandardError:
273 # print "%s:%d: Rule %s doesn't set required attribute '%s'" % \
274 # (p.file,p.line,p.name,".".join(field))
275 # except TypeError,LookupError:
276 # print "Bad requires directive " % r
280 # If there was a pushback, put that on the stack
282 lookaheadstack.append(lookahead)
283 for _t in pslice.pbstack:
284 lookaheadstack.append(_t)
288 statestack.append(goto[statestack[-1],pname])
293 return getattr(n,"value",None)
296 # We have some kind of parsing error here. To handle this,
297 # we are going to push the current token onto the tokenstack
298 # and replace it with an 'error' token. If there are any synchronization
299 # rules, they may catch it.
301 # In addition to pushing the error token, we call call the user defined p_error()
302 # function if this is the first syntax error. This function is only called
303 # if errorcount == 0.
305 if not self.errorcount:
306 self.errorcount = error_count
308 if errtoken.type == '$':
309 errtoken = None # End of file!
311 global errok,token,restart
312 errok = self.errok # Set some special functions available in error recovery
314 restart = self.restart
315 tok = self.errorfunc(errtoken)
316 del errok, token, restart # Delete special functions
318 if not self.errorcount:
319 # User must have done some kind of panic mode recovery on their own. The returned token
320 # is the next lookahead
326 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
329 print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type)
331 print "yacc: Syntax error, token=%s" % errtoken.type
333 print "yacc: Parse error in input. EOF"
337 self.errorcount = error_count
339 # case 1: the statestack only has 1 entry on it. If we're in this state, the
340 # entire parse has been rolled back and we're completely hosed. The token is
341 # discarded and we just keep going.
343 if len(statestack) <= 1 and lookahead.type != '$':
346 # Nuke the pushback stack
347 del lookaheadstack[:]
350 # case 2: the statestack has a couple of entries on it, but we're
351 # at the end of the file. nuke the top entry and generate an error token
353 # Start nuking entries on the stack
354 if lookahead.type == '$':
355 # Whoa. We're really hosed here. Bail out
358 if lookahead.type != 'error':
360 if sym.type == 'error':
361 # Hmmm. Error is on top of stack, we'll just nuke input
362 # symbol and continue
367 if hasattr(lookahead,"lineno"):
368 t.lineno = lookahead.lineno
370 lookaheadstack.append(lookahead)
378 # Call an error function here
379 raise RuntimeError, "yacc: internal parser error!!!\n"
381 # -----------------------------------------------------------------------------
382 # === Parser Construction ===
384 # The following functions and variables are used to implement the yacc() function
385 # itself. This is pretty hairy stuff involving lots of error checking,
386 # construction of LR items, kernels, and so forth. Although a lot of
387 # this work is done using global variables, the resulting Parser object
388 # is completely self contained--meaning that it is safe to repeatedly
389 # call yacc() with different grammars in the same application.
390 # -----------------------------------------------------------------------------
392 # -----------------------------------------------------------------------------
395 # This function checks to see if there are duplicated p_rulename() functions
396 # in the parser module file. Without this function, it is really easy for
397 # users to make mistakes by cutting and pasting code fragments (and it's a real
398 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
399 # we just do a little regular expression pattern matching of def statements
400 # to try and detect duplicates.
401 # -----------------------------------------------------------------------------
403 def validate_file(filename):
404 base,ext = os.path.splitext(filename)
405 if ext != '.py': return 1 # No idea. Assume it's okay.
409 lines = f.readlines()
414 # Match def p_funcname(
415 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
423 prev = counthash.get(name)
425 counthash[name] = linen
427 print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
432 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
433 def validate_dict(d):
434 for n,v in d.items():
435 if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue
436 if n[0:2] == 't_': continue
439 print "yacc: Warning. '%s' not defined as a function" % n
440 if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
442 doc = v.__doc__.split(" ")
444 print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n)
445 except StandardError:
448 # -----------------------------------------------------------------------------
449 # === GRAMMAR FUNCTIONS ===
451 # The following global variables and functions are used to store, manipulate,
452 # and verify the grammar rules specified by the user.
453 # -----------------------------------------------------------------------------
455 # Initialize all of the global variables used during grammar construction
456 def initialize_vars():
457 global Productions, Prodnames, Prodmap, Terminals
458 global Nonterminals, First, Follow, Precedence, LRitems
459 global Errorfunc, Signature, Requires
461 Productions = [None] # A list of all of the productions. The first
462 # entry is always reserved for the purpose of
463 # building an augmented grammar
465 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
466 # productions of that nonterminal.
468 Prodmap = { } # A dictionary that is only used to detect duplicate
471 Terminals = { } # A dictionary mapping the names of terminal symbols to a
472 # list of the rules where they are used.
474 Nonterminals = { } # A dictionary mapping names of nonterminals to a list
475 # of rule numbers where they are used.
477 First = { } # A dictionary of precomputed FIRST(x) symbols
479 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
481 Precedence = { } # Precedence rules for each terminal. Contains tuples of the
482 # form ('right',level) or ('nonassoc', level) or ('left',level)
484 LRitems = [ ] # A list of all LR items for the grammar. These are the
485 # productions with the "dot" like E -> E . PLUS E
487 Errorfunc = None # User defined error handler
489 Signature = md5.new() # Digital signature of the grammar rules, precedence
490 # and other information. Used to determined when a
491 # parsing table needs to be regenerated.
493 Requires = { } # Requires list
495 # File objects used when creating the parser.out debugging file
497 _vf = cStringIO.StringIO()
498 _vfc = cStringIO.StringIO()
500 # -----------------------------------------------------------------------------
503 # This class stores the raw information about a single production or grammar rule.
504 # It has a few required attributes:
506 # name - Name of the production (nonterminal)
507 # prod - A list of symbols making up its production
508 # number - Production number.
510 # In addition, a few additional attributes are used to help with debugging or
511 # optimization of table generation.
513 # file - File where production action is defined.
514 # lineno - Line number where action is defined
515 # func - Action function
516 # prec - Precedence level
517 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
518 # then lr_next refers to 'E -> E PLUS . E'
519 # lr_index - LR item index (location of the ".") in the prod list.
520 # len - Length of the production (number of symbols on right hand side)
521 # -----------------------------------------------------------------------------
524 def __init__(self,**kw):
525 for k,v in kw.items():
528 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure
533 s = "%s -> %s" % (self.name," ".join(self.prod))
535 s = "%s -> <empty>" % self.name
541 # Compute lr_items from the production
543 if n > len(self.prod): return None
546 p.prod = list(self.prod)
547 p.number = self.number
550 p.prod = tuple(p.prod)
554 # Precompute list of productions immediately following
556 p.lrafter = Prodnames[p.prod[n+1]]
557 except (IndexError,KeyError),e:
560 p.lrbefore = p.prod[n-1]
566 class MiniProduction:
570 def is_identifier(s):
572 if not (c.isalnum() or c == '_'): return 0
575 # -----------------------------------------------------------------------------
578 # Given an action function, this function assembles a production rule.
579 # The production rule is assumed to be found in the function's docstring.
580 # This rule has the general syntax:
582 # name1 ::= production1
587 # name2 ::= production1
590 # -----------------------------------------------------------------------------
592 def add_production(f,file,line,prodname,syms):
594 if Terminals.has_key(prodname):
595 print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname)
597 if prodname == 'error':
598 print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname)
601 if not is_identifier(prodname):
602 print "%s:%d: Illegal rule name '%s'" % (file,line,prodname)
606 if not is_identifier(s) and s != '%prec':
607 print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)
610 # See if the rule is already in the rulemap
611 map = "%s -> %s" % (prodname,syms)
612 if Prodmap.has_key(map):
614 print "%s:%d: Duplicate rule %s." % (file,line, m)
615 print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line)
624 p.number = len(Productions)
627 Productions.append(p)
629 if not Nonterminals.has_key(prodname):
630 Nonterminals[prodname] = [ ]
632 # Add all terminals to Terminals
634 while i < len(p.prod):
638 precname = p.prod[i+1]
640 print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line)
643 prec = Precedence.get(precname,None)
645 print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname)
653 if Terminals.has_key(t):
654 Terminals[t].append(p.number)
655 # Is a terminal. We'll assign a precedence to p based on this
656 if not hasattr(p,"prec"):
657 p.prec = Precedence.get(t,('right',0))
659 if not Nonterminals.has_key(t):
660 Nonterminals[t] = [ ]
661 Nonterminals[t].append(p.number)
664 if not hasattr(p,"prec"):
667 # Set final length of productions
669 p.prod = tuple(p.prod)
671 # Calculate unique syms in the production
677 # Add to the global productions list
679 Prodnames[p.name].append(p)
681 Prodnames[p.name] = [ p ]
684 # Given a raw rule function, this function rips out its doc string
685 # and adds rules to the grammar
688 line = f.func_code.co_firstlineno
689 file = f.func_code.co_filename
692 if f.func_code.co_argcount > 1:
693 print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
696 if f.func_code.co_argcount < 1:
697 print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
701 # Split the doc string into lines
702 pstrings = f.__doc__.splitlines()
711 # This is a continuation of a previous rule
713 print "%s:%d: Misplaced '|'." % (file,dline)
728 if assign != ':' and assign != '::=':
729 print "%s:%d: Syntax error. Expected ':'" % (file,dline)
731 e = add_production(f,file,dline,prodname,syms)
733 except StandardError:
734 print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps)
737 print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__)
741 # Cycle checking code (Michael Dyck)
743 def compute_reachable():
745 Find each symbol that can be reached from the start symbol.
746 Print a warning for any nonterminals that can't be reached.
747 (Unused terminals have already had their warning.)
750 for s in Terminals.keys() + Nonterminals.keys():
753 mark_reachable_from( Productions[0].prod[0], Reachable )
755 for s in Nonterminals.keys():
757 print "yacc: Symbol '%s' is unreachable." % s
759 def mark_reachable_from(s, Reachable):
761 Mark all symbols that are reachable from symbol s.
764 # We've already reached symbol s.
767 for p in Prodnames.get(s,[]):
769 mark_reachable_from(r, Reachable)
771 # -----------------------------------------------------------------------------
772 # compute_terminates()
774 # This function looks at the various parsing rules and tries to detect
775 # infinite recursion cycles (grammar rules where there is no possible way
776 # to derive a string of only terminals).
777 # -----------------------------------------------------------------------------
778 def compute_terminates():
780 Raise an error for any symbols that don't terminate.
785 for t in Terminals.keys():
792 # Initialize to false:
793 for n in Nonterminals.keys():
796 # Then propagate termination until no change:
799 for (n,pl) in Prodnames.items():
800 # Nonterminal n terminates iff any of its productions terminates.
802 # Production p terminates iff all of its rhs symbols terminate.
804 if not Terminates[s]:
805 # The symbol s does not terminate,
806 # so production p does not terminate.
810 # didn't break from the loop,
811 # so every symbol s terminates
812 # so production p terminates.
816 # symbol n terminates!
817 if not Terminates[n]:
820 # Don't need to consider any more productions for this n.
827 for (s,terminates) in Terminates.items():
829 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
830 # s is used-but-not-defined, and we've already warned of that,
831 # so it would be overkill to say that it's also non-terminating.
834 print "yacc: Infinite recursion detected for symbol '%s'." % s
839 # -----------------------------------------------------------------------------
840 # verify_productions()
842 # This function examines all of the supplied rules to see if they seem valid.
843 # -----------------------------------------------------------------------------
844 def verify_productions(cycle_check=1):
846 for p in Productions:
850 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
851 print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s)
856 # Now verify all of the tokens
858 _vf.write("Unused terminals:\n\n")
859 for s,v in Terminals.items():
860 if s != 'error' and not v:
861 print "yacc: Warning. Token '%s' defined, but not used." % s
862 if yaccdebug: _vf.write(" %s\n"% s)
865 # Print out all of the productions
867 _vf.write("\nGrammar\n\n")
868 for i in range(1,len(Productions)):
869 _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
872 # Verify the use of all productions
873 for s,v in Nonterminals.items():
876 print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s)
881 print "yacc: Warning. There is 1 unused token."
883 print "yacc: Warning. There are %d unused tokens." % unused_tok
886 print "yacc: Warning. There is 1 unused rule."
888 print "yacc: Warning. There are %d unused rules." % unused_prod
891 _vf.write("\nTerminals, with rules where they appear\n\n")
892 ks = Terminals.keys()
895 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
896 _vf.write("\nNonterminals, with rules where they appear\n\n")
897 ks = Nonterminals.keys()
900 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
904 error += compute_terminates()
905 # error += check_cycles()
908 # -----------------------------------------------------------------------------
911 # This function walks the list of productions and builds a complete set of the
912 # LR items. The LR items are stored in two ways: First, they are uniquely
913 # numbered and placed in the list _lritems. Second, a linked list of LR items
914 # is built for each production. For example:
920 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
921 # -----------------------------------------------------------------------------
924 for p in Productions:
930 lastlri.lr_next = lri
932 lri.lr_num = len(LRitems)
937 # In order for the rest of the parser generator to work, we need to
938 # guarantee that no more lritems are generated. Therefore, we nuke
939 # the p.lr_item method. (Only used in debugging)
940 # Production.lr_item = None
942 # -----------------------------------------------------------------------------
945 # Given a list of precedence rules, add to the precedence table.
946 # -----------------------------------------------------------------------------
948 def add_precedence(plist):
956 if prec != 'left' and prec != 'right' and prec != 'nonassoc':
957 print "yacc: Invalid precedence '%s'" % prec
960 if Precedence.has_key(t):
961 print "yacc: Precedence already specified for terminal '%s'" % t
964 Precedence[t] = (prec,plevel)
966 print "yacc: Invalid precedence table."
971 # -----------------------------------------------------------------------------
974 # Compute the augmented grammar. This is just a rule S' -> start where start
975 # is the starting symbol.
976 # -----------------------------------------------------------------------------
978 def augment_grammar(start=None):
980 start = Productions[1].name
981 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
982 Productions[0].usyms = [ start ]
983 Nonterminals[start].append(0)
986 # -------------------------------------------------------------------------
989 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
991 # During execution of compute_first1, the result may be incomplete.
992 # Afterward (e.g., when called from compute_follow()), it will be complete.
993 # -------------------------------------------------------------------------
996 # We are computing First(x1,x2,x3,...,xn)
1001 # Add all the non-<empty> symbols of First[x] to the result.
1004 x_produces_empty = 1
1006 if f not in result: result.append(f)
1008 if x_produces_empty:
1009 # We have to consider the next x in beta,
1010 # i.e. stay in the loop.
1013 # We don't have to consider any further symbols in beta.
1016 # There was no 'break' from the loop,
1017 # so x_produces_empty was true for all x in beta,
1018 # so beta produces empty as well.
1019 result.append('<empty>')
1025 # Given a non-terminal. This function computes the set of all symbols
1026 # that might follow it. Dragon book, p. 189.
1028 def compute_follow(start=None):
1029 # Add '$' to the follow list of the start symbol
1030 for k in Nonterminals.keys():
1034 start = Productions[1].name
1036 Follow[start] = [ '$' ]
1040 for p in Productions[1:]:
1041 # Here is the production set
1042 for i in range(len(p.prod)):
1044 if Nonterminals.has_key(B):
1045 # Okay. We got a non-terminal in a production
1046 fst = first(p.prod[i+1:])
1049 if f != '<empty>' and f not in Follow[B]:
1054 if hasempty or i == (len(p.prod)-1):
1055 # Add elements of follow(a) to follow(b)
1056 for f in Follow[p.name]:
1057 if f not in Follow[B]:
1060 if not didadd: break
1063 _vf.write('\nFollow:\n')
1064 for k in Nonterminals.keys():
1065 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1067 # -------------------------------------------------------------------------
1070 # Compute the value of FIRST1(X) for all symbols
1071 # -------------------------------------------------------------------------
1072 def compute_first1():
1075 for t in Terminals.keys():
1079 First['#'] = ['#'] # what's this for?
1083 # Initialize to the empty set:
1084 for n in Nonterminals.keys():
1087 # Then propagate symbols until no change:
1090 for n in Nonterminals.keys():
1091 for p in Prodnames[n]:
1092 for f in first(p.prod):
1093 if f not in First[n]:
1094 First[n].append( f )
1100 _vf.write('\nFirst:\n')
1101 for k in Nonterminals.keys():
1102 _vf.write("%-20s : %s\n" %
1103 (k, " ".join([str(s) for s in First[k]])))
1105 # -----------------------------------------------------------------------------
1106 # === SLR Generation ===
1108 # The following functions are used to construct SLR (Simple LR) parsing tables
1109 # as described on p.221-229 of the dragon book.
1110 # -----------------------------------------------------------------------------
1112 # Global variables for the LR parsing engine
1114 global _lr_action, _lr_goto, _lr_method
1115 global _lr_goto_cache
1117 _lr_action = { } # Action table
1118 _lr_goto = { } # Goto table
1119 _lr_method = "Unknown" # LR method used
1120 _lr_goto_cache = { }
1122 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1123 # prodlist is a list of productions.
1125 _add_count = 0 # Counter used to detect cycles
1131 prodlist = Productions
1133 # Add everything in I to J
1140 if x.lr0_added == _add_count: continue
1143 x.lr0_added = _add_count
1148 # Compute the LR(0) goto function goto(I,X) where I is a set
1149 # of LR(0) items and X is a grammar symbol. This function is written
1150 # in a way that guarantees uniqueness of the generated goto sets
1151 # (i.e. the same goto set will never be returned as two different Python
1152 # objects). With uniqueness, we can later do fast set comparisons using
1153 # id(obj) instead of element-wise comparison.
1156 # First we look for a previously cached entry
1157 g = _lr_goto_cache.get((id(I),x),None)
1160 # Now we generate the goto set in a way that guarantees uniqueness
1163 s = _lr_goto_cache.get(x,None)
1166 _lr_goto_cache[x] = s
1171 if n and n.lrbefore == x:
1172 s1 = s.get(id(n),None)
1185 _lr_goto_cache[(id(I),x)] = g
1188 # Compute the kernel of a set of LR(0) items
1192 if p.name == "S'" or p.lr_index > 0 or p.len == 0:
1199 # Compute the LR(0) sets of item function
1202 C = [ lr0_closure([Productions[0].lr_next]) ]
1205 _lr0_cidhash[id(I)] = i
1208 # Loop over the items in C and each grammar symbols
1214 # Collect all of the symbols that could possibly be in the goto(I,X) sets
1220 for x in asyms.keys():
1223 if _lr0_cidhash.has_key(id(g)): continue
1224 _lr0_cidhash[id(g)] = len(C)
1229 # -----------------------------------------------------------------------------
1232 # This function constructs an SLR table.
1233 # -----------------------------------------------------------------------------
1234 def slr_parse_table():
1236 goto = _lr_goto # Goto array
1237 action = _lr_action # Action array
1238 actionp = { } # Action production array (temporary)
1245 print "yacc: Generating SLR parsing table..."
1247 _vf.write("\n\nParsing method: SLR\n\n")
1249 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1250 # This determines the number of states
1254 # Build the parser table, state by state
1257 # Loop over each production in I
1258 actlist = [ ] # List of actions
1261 _vf.write("\nstate %d\n\n" % st)
1263 _vf.write(" (%d) %s\n" % (p.number, str(p)))
1268 if p.prod[-1] == ".":
1270 # Start symbol. Accept!
1274 # We are at the end of a production. Reduce!
1275 for a in Follow[p.name]:
1276 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1277 r = action.get((st,a),None)
1279 # Whoa. Have a shift/reduce or reduce/reduce conflict
1281 # Need to decide on shift or reduce here
1282 # By default we favor shifting. Need to add
1283 # some precedence rules here.
1284 sprec,slevel = Productions[actionp[st,a].number].prec
1285 rprec,rlevel = Precedence.get(a,('right',0))
1286 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1287 # We really need to reduce here.
1288 action[st,a] = -p.number
1290 if not slevel and not rlevel:
1291 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1292 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1294 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1297 # Hmmm. Guess we'll keep the shift
1298 if not slevel and not rlevel:
1299 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1300 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1303 # Reduce/reduce conflict. In this case, we favor the rule
1304 # that was defined first in the grammar file
1305 oldp = Productions[-r]
1306 pp = Productions[p.number]
1307 if oldp.line > pp.line:
1308 action[st,a] = -p.number
1310 # print "Reduce/reduce conflict in state %d" % st
1312 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
1313 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
1315 print "Unknown conflict in state %d" % st
1317 action[st,a] = -p.number
1321 a = p.prod[i+1] # Get symbol right after the "."
1322 if Terminals.has_key(a):
1324 j = _lr0_cidhash.get(id(g),-1)
1326 # We are in a shift state
1327 actlist.append((a,p,"shift and go to state %d" % j))
1328 r = action.get((st,a),None)
1330 # Whoa have a shift/reduce or shift/shift conflict
1333 print "Shift/shift conflict in state %d" % st
1335 # Do a precedence check.
1336 # - if precedence of reduce rule is higher, we reduce.
1337 # - if precedence of reduce is same and left assoc, we reduce.
1338 # - otherwise we shift
1339 rprec,rlevel = Productions[actionp[st,a].number].prec
1340 sprec,slevel = Precedence.get(a,('right',0))
1341 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1342 # We decide to shift here... highest precedence to shift
1345 if not slevel and not rlevel:
1347 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1348 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1349 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1352 # Hmmm. Guess we'll keep the reduce
1353 if not slevel and not rlevel:
1355 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1356 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1359 print "Unknown conflict in state %d" % st
1364 except StandardError,e:
1365 raise YaccError, "Hosed in slr_parse_table", e
1367 # Print the actions associated with each terminal
1369 for a,p,m in actlist:
1370 if action.has_key((st,a)):
1371 if p is actionp[st,a]:
1372 _vf.write(" %-15s %s\n" % (a,m))
1374 for a,p,m in actlist:
1375 if action.has_key((st,a)):
1376 if p is not actionp[st,a]:
1377 _vf.write(" ! %-15s [ %s ]\n" % (a,m))
1379 # Construct the goto table for this state
1385 if Nonterminals.has_key(s):
1387 for n in nkeys.keys():
1389 j = _lr0_cidhash.get(id(g),-1)
1393 _vf.write(" %-15s shift and go to state %d\n" % (n,j))
1397 if n_srconflict == 1:
1398 print "yacc: %d shift/reduce conflict" % n_srconflict
1399 if n_srconflict > 1:
1400 print "yacc: %d shift/reduce conflicts" % n_srconflict
1401 if n_rrconflict == 1:
1402 print "yacc: %d reduce/reduce conflict" % n_rrconflict
1403 if n_rrconflict > 1:
1404 print "yacc: %d reduce/reduce conflicts" % n_rrconflict
1407 # -----------------------------------------------------------------------------
1408 # ==== LALR(1) Parsing ====
1409 # **** UNFINISHED! 6/16/01
1410 # -----------------------------------------------------------------------------
1413 # Compute the lr1_closure of a set I. I is a list of tuples (p,a) where
1414 # p is a LR0 item and a is a terminal
1419 global _lr1_add_count
1425 # Loop over items (p,a) in I.
1429 # p = [ A -> alpha . B beta]
1431 # For each production B -> gamma
1432 for B in p.lr1_after:
1433 f = tuple(p.lr1_beta + (a,))
1435 # For each terminal b in first(Beta a)
1437 # Check if (B -> . gamma, b) is in J
1438 # Only way this can happen is if the add count mismatches
1440 if pn.lr_added.get(b,0) == _lr1_add_count: continue
1441 pn.lr_added[b] = _lr1_add_count
1447 def lalr_parse_table():
1449 # Compute some lr1 information about all of the productions
1452 after = p.prod[p.lr_index + 1]
1453 p.lr1_after = Prodnames[after]
1454 p.lr1_beta = p.prod[p.lr_index + 2:]
1460 # Compute the LR(0) items
1464 CK.append(lr0_kernel(I))
1468 # -----------------------------------------------------------------------------
1469 # ==== LR Utility functions ====
1470 # -----------------------------------------------------------------------------
1472 # -----------------------------------------------------------------------------
1473 # _lr_write_tables()
1475 # This function writes the LR parsing tables to a file
1476 # -----------------------------------------------------------------------------
1478 def lr_write_tables(modulename=tab_module):
1479 filename = modulename + ".py"
1481 f = open(filename,"w")
1485 # This file is automatically generated. Do not edit.
1490 """ % (filename, repr(_lr_method), repr(Signature.digest())))
1492 # Change smaller to 0 to go back to original tables
1495 # Factor out names to try and make smaller
1499 for k,v in _lr_action.items():
1507 f.write("\n_lr_action_items = {")
1508 for k,v in items.items():
1509 f.write("%r:([" % k)
1521 for _k, _v in _lr_action_items.items():
1522 for _x,_y in zip(_v[0],_v[1]):
1523 _lr_action[(_x,_k)] = _y
1524 del _lr_action_items
1528 f.write("\n_lr_action = { ");
1529 for k,v in _lr_action.items():
1530 f.write("(%r,%r):%r," % (k[0],k[1],v))
1534 # Factor out names to try and make smaller
1537 for k,v in _lr_goto.items():
1545 f.write("\n_lr_goto_items = {")
1546 for k,v in items.items():
1547 f.write("%r:([" % k)
1559 for _k, _v in _lr_goto_items.items():
1560 for _x,_y in zip(_v[0],_v[1]):
1561 _lr_goto[(_x,_k)] = _y
1565 f.write("\n_lr_goto = { ");
1566 for k,v in _lr_goto.items():
1567 f.write("(%r,%r):%r," % (k[0],k[1],v))
1570 # Write production table
1571 f.write("_lr_productions = [\n")
1572 for p in Productions:
1575 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
1577 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len))
1584 print "Unable to create '%s'" % filename
1588 def lr_read_tables(module=tab_module,optimize=0):
1589 global _lr_action, _lr_goto, _lr_productions, _lr_method
1591 exec "import %s as parsetab" % module
1593 if (optimize) or (Signature.digest() == parsetab._lr_signature):
1594 _lr_action = parsetab._lr_action
1595 _lr_goto = parsetab._lr_goto
1596 _lr_productions = parsetab._lr_productions
1597 _lr_method = parsetab._lr_method
1602 except (ImportError,AttributeError):
1605 # -----------------------------------------------------------------------------
1608 # Build the parser module
1609 # -----------------------------------------------------------------------------
1611 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0):
1619 # Add starting symbol to signature
1621 Signature.update(start)
1623 # Try to figure out what module we are working with
1625 # User supplied a module object.
1626 if not isinstance(module, types.ModuleType):
1627 raise ValueError,"Expected a module"
1629 ldict = module.__dict__
1632 # No module given. We might be able to get information from the caller.
1633 # Throw an exception and unwind the traceback to get the globals
1637 except RuntimeError:
1638 e,b,t = sys.exc_info()
1640 f = f.f_back # Walk out to our calling function
1641 ldict = f.f_globals # Grab its globals dictionary
1643 # If running in optimized mode. We're going to
1645 if (optimize and lr_read_tables(tabmodule,1)):
1648 for p in _lr_productions:
1650 Productions.append(None)
1652 m = MiniProduction()
1658 m.func = ldict[p[2]]
1659 Productions.append(m)
1662 # Get the tokens map
1663 tokens = ldict.get("tokens",None)
1666 raise YaccError,"module does not define a list 'tokens'"
1667 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
1668 raise YaccError,"tokens must be a list or tuple."
1670 # Check to see if a requires dictionary is defined.
1671 requires = ldict.get("require",None)
1673 if not (isinstance(requires,types.DictType)):
1674 raise YaccError,"require must be a dictionary."
1676 for r,v in requires.items():
1678 if not (isinstance(v,types.ListType)):
1680 v1 = [x.split(".") for x in v]
1682 except StandardError:
1683 print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
1686 # Build the dictionary of terminals. We a record a 0 in the
1687 # dictionary to track whether or not a terminal is actually
1688 # used in the grammar
1690 if 'error' in tokens:
1691 print "yacc: Illegal token 'error'. Is a reserved word."
1692 raise YaccError,"Illegal token name"
1695 if Terminals.has_key(n):
1696 print "yacc: Warning. Token '%s' multiply defined." % n
1699 Terminals['error'] = [ ]
1701 # Get the precedence map (if any)
1702 prec = ldict.get("precedence",None)
1704 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
1705 raise YaccError,"precedence must be a list or tuple."
1706 add_precedence(prec)
1707 Signature.update(repr(prec))
1710 if not Precedence.has_key(n):
1711 Precedence[n] = ('right',0) # Default, right associative, 0 precedence
1713 # Look for error handler
1714 ef = ldict.get('p_error',None)
1716 if not isinstance(ef,types.FunctionType):
1717 raise YaccError,"'p_error' defined, but is not a function."
1718 eline = ef.func_code.co_firstlineno
1719 efile = ef.func_code.co_filename
1722 if (ef.func_code.co_argcount != 1):
1723 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
1727 print "yacc: Warning. no p_error() function is defined."
1729 # Get the list of built-in functions with p_ prefix
1730 symbols = [ldict[f] for f in ldict.keys()
1731 if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_'
1732 and ldict[f].__name__ != 'p_error')]
1734 # Check for non-empty symbols
1735 if len(symbols) == 0:
1736 raise YaccError,"no rules of the form p_rulename are defined."
1738 # Sort the symbols by line number
1739 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
1741 # Add all of the symbols to the grammar
1743 if (add_function(f)) < 0:
1746 files[f.func_code.co_filename] = None
1748 # Make a signature of the docstrings
1751 Signature.update(f.__doc__)
1756 raise YaccError,"Unable to construct parser."
1758 if not lr_read_tables(tabmodule):
1761 for filename in files.keys():
1762 if not validate_file(filename):
1765 # Validate dictionary
1766 validate_dict(ldict)
1768 if start and not Prodnames.has_key(start):
1769 raise YaccError,"Bad starting symbol '%s'" % start
1771 augment_grammar(start)
1772 error = verify_productions(cycle_check=check_recursion)
1773 otherfunc = [ldict[f] for f in ldict.keys()
1774 if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')]
1777 raise YaccError,"Unable to construct parser."
1781 compute_follow(start)
1785 elif method == 'LALR1':
1789 raise YaccError, "Unknown parsing method '%s'" % method
1791 lr_write_tables(tabmodule)
1795 f = open(debug_file,"w")
1796 f.write(_vfc.getvalue())
1798 f.write(_vf.getvalue())
1801 print "yacc: can't create '%s'" % debug_file,e
1803 # Made it here. Create a parser object and set up its internal state.
1804 # Set global parse() method to bound method of parser object.
1807 p.productions = Productions
1808 p.errorfunc = Errorfunc
1809 p.action = _lr_action
1811 p.method = _lr_method
1812 p.require = Requires
1817 # Clean up all of the globals we created
1822 # yacc_cleanup function. Delete all of the global variables
1823 # used during table construction
1826 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1827 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1829 global Productions, Prodnames, Prodmap, Terminals
1830 global Nonterminals, First, Follow, Precedence, LRitems
1831 global Errorfunc, Signature, Requires
1833 del Productions, Prodnames, Prodmap, Terminals
1834 del Nonterminals, First, Follow, Precedence, LRitems
1835 del Errorfunc, Signature, Requires
1841 # Stub that raises an error if parsing is attempted without first calling yacc()
1842 def parse(*args,**kwargs):
1843 raise YaccError, "yacc: No parser built with yacc()"