rwhoisd/yacc.py

   1 #-----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Author: David M. Beazley (beazley@cs.uchicago.edu)
   5 #         Department of Computer Science
   6 #         University of Chicago
   7 #         Chicago, IL 60637
   8 #
   9 # Copyright (C) 2001, David M. Beazley
  10 #
  11 # $Header: /home/davidb/src/cvs/python-rwhoisd/rwhoisd/yacc.py,v 1.1 2003/04/22 02:19:07 davidb Exp $
  12 #
  13 # This library is free software; you can redistribute it and/or
  14 # modify it under the terms of the GNU Lesser General Public
  15 # License as published by the Free Software Foundation; either
  16 # version 2.1 of the License, or (at your option) any later version.
  17 #
  18 # This library is distributed in the hope that it will be useful,
  19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # Lesser General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public
  24 # License along with this library; if not, write to the Free Software
  25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  26 #
  27 # See the file COPYING for a complete copy of the LGPL.
  28 #
  29 #
  30 # This implements an LR parser that is constructed from grammar rules defined
  31 # as Python functions.  Roughly speaking, this module is a cross between
  32 # John Aycock's Spark system and the GNU bison utility.
  33 #
  34 # Disclaimer:  This is a work in progress. SLR parsing seems to work fairly
  35 # well and there is extensive error checking. LALR(1) is in progress.  The
  36 # rest of this file is a bit of a mess.  Please pardon the dust.
  37 #
  38 # The current implementation is only somewhat object-oriented. The
  39 # LR parser itself is defined in terms of an object (which allows multiple
  40 # parsers to co-exist).  However, most of the variables used during table
  41 # construction are defined in terms of global variables.  Users shouldn't
  42 # notice unless they are trying to define multiple parsers at the same
  43 # time using threads (in which case they should have their head examined).
  44 #-----------------------------------------------------------------------------
  45
  46 __version__ = "1.3"
  47
  48 #-----------------------------------------------------------------------------
  49 #                     === User configurable parameters ===
  50 #
  51 # Change these to modify the default behavior of yacc (if you wish)
  52 #-----------------------------------------------------------------------------
  53
  54 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
  55                                # a 'parser.out' file in the current directory
  56
  57 debug_file  = 'parser.out'     # Default name of the debugging file
  58 tab_module  = 'parsetab'       # Default name of the table module
  59 default_lr  = 'SLR'            # Default LR table generation method
  60
  61 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  62
  63 import re, types, sys, cStringIO, md5, os.path
  64
  65 # Exception raised for yacc-related errors
  66 class YaccError(Exception):   pass
  67
  68 #-----------------------------------------------------------------------------
  69 #                        ===  LR Parsing Engine ===
  70 #
  71 # The following classes are used for the LR parser itself.  These are not
  72 # used during table construction and are independent of the actual LR
  73 # table generation algorithm
  74 #-----------------------------------------------------------------------------
  75
  76 # This class is used to hold non-terminal grammar symbols during parsing.
  77 # It normally has the following attributes set:
  78 #        .type       = Grammar symbol type
  79 #        .value      = Symbol value
  80 #        .lineno     = Starting line number
  81 #        .endlineno  = Ending line number (optional, set automatically)
  82
  83 class YaccSymbol:
  84     def __str__(self):    return self.type
  85     def __repr__(self):   return str(self)
  86
  87 # This class is a wrapper around the objects actually passed to each
  88 # grammar rule.   Index lookup and assignment actually assign the
  89 # .value attribute of the underlying YaccSymbol object.
  90 # The lineno() method returns the line number of a given
  91 # item (or 0 if not defined).   The linespan() method returns
  92 # a tuple of (startline,endline) representing the range of lines
  93 # for a symbol.
  94
  95 class YaccSlice:
  96     def __init__(self,s):
  97         self.slice = s
  98         self.pbstack = []
  99
 100     def __getitem__(self,n):
 101         return self.slice[n].value
 102
 103     def __setitem__(self,n,v):
 104         self.slice[n].value = v
 105
 106     def lineno(self,n):
 107         return getattr(self.slice[n],"lineno",0)
 108
 109     def linespan(self,n):
 110         startline = getattr(self.slice[n],"lineno",0)
 111         endline = getattr(self.slice[n],"endlineno",startline)
 112         return startline,endline
 113
 114     def pushback(self,n):
 115         if n <= 0:
 116             raise ValueError, "Expected a positive value"
 117         if n > (len(self.slice)-1):
 118             raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
 119         for i in range(0,n):
 120             self.pbstack.append(self.slice[-i-1])
 121
 122 # The LR Parsing engine.   This is defined as a class so that multiple parsers
 123 # can exist in the same process.  A user never instantiates this directly.
 124 # Instead, the global yacc() function should be used to create a suitable Parser
 125 # object.
 126
 127 class Parser:
 128     def __init__(self,magic=None):
 129
 130         # This is a hack to keep users from trying to instantiate a Parser
 131         # object directly.
 132
 133         if magic != "xyzzy":
 134             raise YaccError, "Can't instantiate Parser. Use yacc() instead."
 135
 136         # Reset internal state
 137         self.productions = None          # List of productions
 138         self.errorfunc   = None          # Error handling function
 139         self.action      = { }           # LR Action table
 140         self.goto        = { }           # LR goto table
 141         self.require     = { }           # Attribute require table
 142         self.method      = "Unknown LR"  # Table construction method used
 143
 144     def errok(self):
 145         self.errorcount = 0
 146
 147     def restart(self):
 148         del self.statestack[:]
 149         del self.symstack[:]
 150         sym = YaccSymbol()
 151         sym.type = '$'
 152         self.symstack.append(sym)
 153         self.statestack.append(0)
 154
 155     def parse(self,input=None,lexer=None,debug=0):
 156         lookahead = None                 # Current lookahead symbol
 157         lookaheadstack = [ ]             # Stack of lookahead symbols
 158         actions = self.action            # Local reference to action table
 159         goto    = self.goto              # Local reference to goto table
 160         prod    = self.productions       # Local reference to production list
 161         pslice  = YaccSlice(None)        # Slice object passed to grammar rules
 162         pslice.parser = self             # Parser object
 163         self.errorcount = 0              # Used during error recovery
 164
 165         # If no lexer was given, we will try to use the lex module
 166         if not lexer:
 167             import lex as lexer
 168
 169         pslice.lexer = lexer
 170
 171         # If input was supplied, pass to lexer
 172         if input:
 173             lexer.input(input)
 174
 175         # Tokenize function
 176         get_token = lexer.token
 177
 178         statestack = [ ]                # Stack of parsing states
 179         self.statestack = statestack
 180         symstack   = [ ]                # Stack of grammar symbols
 181         self.symstack = symstack
 182
 183         errtoken   = None               # Err token
 184
 185         # The start state is assumed to be (0,$)
 186         statestack.append(0)
 187         sym = YaccSymbol()
 188         sym.type = '$'
 189         symstack.append(sym)
 190
 191         while 1:
 192             # Get the next symbol on the input.  If a lookahead symbol
 193             # is already set, we just use that. Otherwise, we'll pull
 194             # the next token off of the lookaheadstack or from the lexer
 195             if not lookahead:
 196                 if not lookaheadstack:
 197                     lookahead = get_token()     # Get the next token
 198                 else:
 199                     lookahead = lookaheadstack.pop()
 200                 if not lookahead:
 201                     lookahead = YaccSymbol()
 202                     lookahead.type = '$'
 203             if debug:
 204                 print "%-20s : %s" % (lookahead, [xx.type for xx in symstack])
 205
 206             # Check the action table
 207             s = statestack[-1]
 208             ltype = lookahead.type
 209             t = actions.get((s,ltype),None)
 210
 211             if t is not None:
 212                 if t > 0:
 213                     # shift a symbol on the stack
 214                     if ltype == '$':
 215                         # Error, end of input
 216                         print "yacc: Parse error. EOF"
 217                         return
 218                     statestack.append(t)
 219                     symstack.append(lookahead)
 220                     lookahead = None
 221
 222                     # Decrease error count on successful shift
 223                     if self.errorcount > 0:
 224                         self.errorcount -= 1
 225
 226                     continue
 227
 228                 if t < 0:
 229                     # reduce a symbol on the stack, emit a production
 230                     p = prod[-t]
 231                     pname = p.name
 232                     plen  = p.len
 233
 234                     # Get production function
 235                     sym = YaccSymbol()
 236                     sym.type = pname       # Production name
 237                     sym.value = None
 238
 239                     if plen:
 240                         targ = symstack[-plen-1:]
 241                         targ[0] = sym
 242                         try:
 243                             sym.lineno = targ[1].lineno
 244                             sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
 245                         except AttributeError:
 246                             sym.lineno = 0
 247                         del symstack[-plen:]
 248                         del statestack[-plen:]
 249                     else:
 250                         sym.lineno = 0
 251                         targ = [ sym ]
 252                     pslice.slice = targ
 253                     pslice.pbstack = []
 254                     # Call the grammar rule with our special slice object
 255                     p.func(pslice)
 256
 257                     # Validate attributes of the resulting value attribute
 258 #                  if require:
 259 #                      try:
 260 #                          t0 = targ[0]
 261 #                          r = Requires.get(t0.type,None)
 262 #                          t0d = t0.__dict__
 263 #                          if r:
 264 #                              for field in r:
 265 #                                  tn = t0
 266 #                                  for fname in field:
 267 #                                      try:
 268 #                                          tf = tn.__dict__
 269 #                                          tn = tf.get(fname)
 270 #                                      except StandardError:
 271 #                                          tn = None
 272 #                                      if not tn:
 273 #                                          print "%s:%d: Rule %s doesn't set required attribute '%s'" % \
 274 #                                                (p.file,p.line,p.name,".".join(field))
 275 #                      except TypeError,LookupError:
 276 #                          print "Bad requires directive " % r
 277 #                          pass
 278
 279
 280                     # If there was a pushback, put that on the stack
 281                     if pslice.pbstack:
 282                         lookaheadstack.append(lookahead)
 283                         for _t in pslice.pbstack:
 284                             lookaheadstack.append(_t)
 285                         lookahead = None
 286
 287                     symstack.append(sym)
 288                     statestack.append(goto[statestack[-1],pname])
 289                     continue
 290
 291                 if t == 0:
 292                     n = symstack[-1]
 293                     return getattr(n,"value",None)
 294
 295             if t == None:
 296                 # We have some kind of parsing error here.  To handle this,
 297                 # we are going to push the current token onto the tokenstack
 298                 # and replace it with an 'error' token.  If there are any synchronization
 299                 # rules, they may catch it.
 300                 #
 301                 # In addition to pushing the error token, we call call the user defined p_error()
 302                 # function if this is the first syntax error.   This function is only called
 303                 # if errorcount == 0.
 304
 305                 if not self.errorcount:
 306                     self.errorcount = error_count
 307                     errtoken = lookahead
 308                     if errtoken.type == '$':
 309                         errtoken = None               # End of file!
 310                     if self.errorfunc:
 311                         global errok,token,restart
 312                         errok = self.errok        # Set some special functions available in error recovery
 313                         token = get_token
 314                         restart = self.restart
 315                         tok = self.errorfunc(errtoken)
 316                         del errok, token, restart   # Delete special functions
 317
 318                         if not self.errorcount:
 319                             # User must have done some kind of panic mode recovery on their own.  The returned token
 320                             # is the next lookahead
 321                             lookahead = tok
 322                             errtoken = None
 323                             continue
 324                     else:
 325                         if errtoken:
 326                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 327                             else: lineno = 0
 328                             if lineno:
 329                                 print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type)
 330                             else:
 331                                 print "yacc: Syntax error, token=%s" % errtoken.type
 332                         else:
 333                             print "yacc: Parse error in input. EOF"
 334                             return
 335
 336                 else:
 337                     self.errorcount = error_count
 338
 339                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 340                 # entire parse has been rolled back and we're completely hosed.   The token is
 341                 # discarded and we just keep going.
 342
 343                 if len(statestack) <= 1 and lookahead.type != '$':
 344                     lookahead = None
 345                     errtoken = None
 346                     # Nuke the pushback stack
 347                     del lookaheadstack[:]
 348                     continue
 349
 350                 # case 2: the statestack has a couple of entries on it, but we're
 351                 # at the end of the file. nuke the top entry and generate an error token
 352
 353                 # Start nuking entries on the stack
 354                 if lookahead.type == '$':
 355                     # Whoa. We're really hosed here. Bail out
 356                     return
 357
 358                 if lookahead.type != 'error':
 359                     sym = symstack[-1]
 360                     if sym.type == 'error':
 361                         # Hmmm. Error is on top of stack, we'll just nuke input
 362                         # symbol and continue
 363                         lookahead = None
 364                         continue
 365                     t = YaccSymbol()
 366                     t.type = 'error'
 367                     if hasattr(lookahead,"lineno"):
 368                         t.lineno = lookahead.lineno
 369                     t.value = lookahead
 370                     lookaheadstack.append(lookahead)
 371                     lookahead = t
 372                 else:
 373                     symstack.pop()
 374                     statestack.pop()
 375
 376                 continue
 377
 378             # Call an error function here
 379             raise RuntimeError, "yacc: internal parser error!!!\n"
 380
 381 # -----------------------------------------------------------------------------
 382 #                          === Parser Construction ===
 383 #
 384 # The following functions and variables are used to implement the yacc() function
 385 # itself.   This is pretty hairy stuff involving lots of error checking,
 386 # construction of LR items, kernels, and so forth.   Although a lot of
 387 # this work is done using global variables, the resulting Parser object
 388 # is completely self contained--meaning that it is safe to repeatedly
 389 # call yacc() with different grammars in the same application.
 390 # -----------------------------------------------------------------------------
 391
 392 # -----------------------------------------------------------------------------
 393 # validate_file()
 394 #
 395 # This function checks to see if there are duplicated p_rulename() functions
 396 # in the parser module file.  Without this function, it is really easy for
 397 # users to make mistakes by cutting and pasting code fragments (and it's a real
 398 # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
 399 # we just do a little regular expression pattern matching of def statements
 400 # to try and detect duplicates.
 401 # -----------------------------------------------------------------------------
 402
 403 def validate_file(filename):
 404     base,ext = os.path.splitext(filename)
 405     if ext != '.py': return 1          # No idea. Assume it's okay.
 406
 407     try:
 408         f = open(filename)
 409         lines = f.readlines()
 410         f.close()
 411     except IOError:
 412         return 1                       # Oh well
 413
 414     # Match def p_funcname(
 415     fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
 416     counthash = { }
 417     linen = 1
 418     noerror = 1
 419     for l in lines:
 420         m = fre.match(l)
 421         if m:
 422             name = m.group(1)
 423             prev = counthash.get(name)
 424             if not prev:
 425                 counthash[name] = linen
 426             else:
 427                 print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
 428                 noerror = 0
 429         linen += 1
 430     return noerror
 431
 432 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
 433 def validate_dict(d):
 434     for n,v in d.items():
 435         if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue
 436         if n[0:2] == 't_': continue
 437
 438         if n[0:2] == 'p_':
 439             print "yacc: Warning. '%s' not defined as a function" % n
 440         if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
 441             try:
 442                 doc = v.__doc__.split(" ")
 443                 if doc[1] == ':':
 444                     print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n)
 445             except StandardError:
 446                 pass
 447
 448 # -----------------------------------------------------------------------------
 449 #                           === GRAMMAR FUNCTIONS ===
 450 #
 451 # The following global variables and functions are used to store, manipulate,
 452 # and verify the grammar rules specified by the user.
 453 # -----------------------------------------------------------------------------
 454
 455 # Initialize all of the global variables used during grammar construction
 456 def initialize_vars():
 457     global Productions, Prodnames, Prodmap, Terminals
 458     global Nonterminals, First, Follow, Precedence, LRitems
 459     global Errorfunc, Signature, Requires
 460
 461     Productions  = [None]  # A list of all of the productions.  The first
 462                            # entry is always reserved for the purpose of
 463                            # building an augmented grammar
 464
 465     Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
 466                            # productions of that nonterminal.
 467
 468     Prodmap      = { }     # A dictionary that is only used to detect duplicate
 469                            # productions.
 470
 471     Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
 472                            # list of the rules where they are used.
 473
 474     Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
 475                            # of rule numbers where they are used.
 476
 477     First        = { }     # A dictionary of precomputed FIRST(x) symbols
 478
 479     Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
 480
 481     Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
 482                            # form ('right',level) or ('nonassoc', level) or ('left',level)
 483
 484     LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
 485                            # productions with the "dot" like E -> E . PLUS E
 486
 487     Errorfunc    = None    # User defined error handler
 488
 489     Signature    = md5.new()   # Digital signature of the grammar rules, precedence
 490                                # and other information.  Used to determined when a
 491                                # parsing table needs to be regenerated.
 492
 493     Requires     = { }     # Requires list
 494
 495     # File objects used when creating the parser.out debugging file
 496     global _vf, _vfc
 497     _vf           = cStringIO.StringIO()
 498     _vfc          = cStringIO.StringIO()
 499
 500 # -----------------------------------------------------------------------------
 501 # class Production:
 502 #
 503 # This class stores the raw information about a single production or grammar rule.
 504 # It has a few required attributes:
 505 #
 506 #       name     - Name of the production (nonterminal)
 507 #       prod     - A list of symbols making up its production
 508 #       number   - Production number.
 509 #
 510 # In addition, a few additional attributes are used to help with debugging or
 511 # optimization of table generation.
 512 #
 513 #       file     - File where production action is defined.
 514 #       lineno   - Line number where action is defined
 515 #       func     - Action function
 516 #       prec     - Precedence level
 517 #       lr_next  - Next LR item. Example, if we are ' E -> E . PLUS E'
 518 #                  then lr_next refers to 'E -> E PLUS . E'
 519 #       lr_index - LR item index (location of the ".") in the prod list.
 520 #       len      - Length of the production (number of symbols on right hand side)
 521 # -----------------------------------------------------------------------------
 522
 523 class Production:
 524     def __init__(self,**kw):
 525         for k,v in kw.items():
 526             setattr(self,k,v)
 527         self.lr_index = -1
 528         self.lr0_added = 0    # Flag indicating whether or not added to LR0 closure
 529         self.usyms = [ ]
 530
 531     def __str__(self):
 532         if self.prod:
 533             s = "%s -> %s" % (self.name," ".join(self.prod))
 534         else:
 535             s = "%s -> <empty>" % self.name
 536         return s
 537
 538     def __repr__(self):
 539         return str(self)
 540
 541     # Compute lr_items from the production
 542     def lr_item(self,n):
 543         if n > len(self.prod): return None
 544         p = Production()
 545         p.name = self.name
 546         p.prod = list(self.prod)
 547         p.number = self.number
 548         p.lr_index = n
 549         p.prod.insert(n,".")
 550         p.prod = tuple(p.prod)
 551         p.len = len(p.prod)
 552         p.usyms = self.usyms
 553
 554         # Precompute list of productions immediately following
 555         try:
 556             p.lrafter = Prodnames[p.prod[n+1]]
 557         except (IndexError,KeyError),e:
 558             p.lrafter = []
 559         try:
 560             p.lrbefore = p.prod[n-1]
 561         except IndexError:
 562             p.lrbefore = None
 563
 564         return p
 565
 566 class MiniProduction:
 567     pass
 568
 569 # Utility function
 570 def is_identifier(s):
 571     for c in s:
 572         if not (c.isalnum() or c == '_'): return 0
 573     return 1
 574
 575 # -----------------------------------------------------------------------------
 576 # add_production()
 577 #
 578 # Given an action function, this function assembles a production rule.
 579 # The production rule is assumed to be found in the function's docstring.
 580 # This rule has the general syntax:
 581 #
 582 #              name1 ::= production1
 583 #                     |  production2
 584 #                     |  production3
 585 #                    ...
 586 #                     |  productionn
 587 #              name2 ::= production1
 588 #                     |  production2
 589 #                    ...
 590 # -----------------------------------------------------------------------------
 591
 592 def add_production(f,file,line,prodname,syms):
 593
 594     if Terminals.has_key(prodname):
 595         print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname)
 596         return -1
 597     if prodname == 'error':
 598         print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname)
 599         return -1
 600
 601     if not is_identifier(prodname):
 602         print "%s:%d: Illegal rule name '%s'" % (file,line,prodname)
 603         return -1
 604
 605     for s in syms:
 606         if not is_identifier(s) and s != '%prec':
 607             print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)
 608             return -1
 609
 610     # See if the rule is already in the rulemap
 611     map = "%s -> %s" % (prodname,syms)
 612     if Prodmap.has_key(map):
 613         m = Prodmap[map]
 614         print "%s:%d: Duplicate rule %s." % (file,line, m)
 615         print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line)
 616         return -1
 617
 618     p = Production()
 619     p.name = prodname
 620     p.prod = syms
 621     p.file = file
 622     p.line = line
 623     p.func = f
 624     p.number = len(Productions)
 625
 626
 627     Productions.append(p)
 628     Prodmap[map] = p
 629     if not Nonterminals.has_key(prodname):
 630         Nonterminals[prodname] = [ ]
 631
 632     # Add all terminals to Terminals
 633     i = 0
 634     while i < len(p.prod):
 635         t = p.prod[i]
 636         if t == '%prec':
 637             try:
 638                 precname = p.prod[i+1]
 639             except IndexError:
 640                 print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line)
 641                 return -1
 642
 643             prec = Precedence.get(precname,None)
 644             if not prec:
 645                 print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname)
 646                 return -1
 647             else:
 648                 p.prec = prec
 649             del p.prod[i]
 650             del p.prod[i]
 651             continue
 652
 653         if Terminals.has_key(t):
 654             Terminals[t].append(p.number)
 655             # Is a terminal.  We'll assign a precedence to p based on this
 656             if not hasattr(p,"prec"):
 657                 p.prec = Precedence.get(t,('right',0))
 658         else:
 659             if not Nonterminals.has_key(t):
 660                 Nonterminals[t] = [ ]
 661             Nonterminals[t].append(p.number)
 662         i += 1
 663
 664     if not hasattr(p,"prec"):
 665         p.prec = ('right',0)
 666
 667     # Set final length of productions
 668     p.len  = len(p.prod)
 669     p.prod = tuple(p.prod)
 670
 671     # Calculate unique syms in the production
 672     p.usyms = [ ]
 673     for s in p.prod:
 674         if s not in p.usyms:
 675             p.usyms.append(s)
 676
 677     # Add to the global productions list
 678     try:
 679         Prodnames[p.name].append(p)
 680     except KeyError:
 681         Prodnames[p.name] = [ p ]
 682     return 0
 683
 684 # Given a raw rule function, this function rips out its doc string
 685 # and adds rules to the grammar
 686
 687 def add_function(f):
 688     line = f.func_code.co_firstlineno
 689     file = f.func_code.co_filename
 690     error = 0
 691
 692     if f.func_code.co_argcount > 1:
 693         print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
 694         return -1
 695
 696     if f.func_code.co_argcount < 1:
 697         print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
 698         return -1
 699
 700     if f.__doc__:
 701         # Split the doc string into lines
 702         pstrings = f.__doc__.splitlines()
 703         lastp = None
 704         dline = line
 705         for ps in pstrings:
 706             dline += 1
 707             p = ps.split()
 708             if not p: continue
 709             try:
 710                 if p[0] == '|':
 711                     # This is a continuation of a previous rule
 712                     if not lastp:
 713                         print "%s:%d: Misplaced '|'." % (file,dline)
 714                         return -1
 715                     prodname = lastp
 716                     if len(p) > 1:
 717                         syms = p[1:]
 718                     else:
 719                         syms = [ ]
 720                 else:
 721                     prodname = p[0]
 722                     lastp = prodname
 723                     assign = p[1]
 724                     if len(p) > 2:
 725                         syms = p[2:]
 726                     else:
 727                         syms = [ ]
 728                     if assign != ':' and assign != '::=':
 729                         print "%s:%d: Syntax error. Expected ':'" % (file,dline)
 730                         return -1
 731                 e = add_production(f,file,dline,prodname,syms)
 732                 error += e
 733             except StandardError:
 734                 print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps)
 735                 error -= 1
 736     else:
 737         print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__)
 738     return error
 739
 740
 741 # Cycle checking code (Michael Dyck)
 742
 743 def compute_reachable():
 744     '''
 745     Find each symbol that can be reached from the start symbol.
 746     Print a warning for any nonterminals that can't be reached.
 747     (Unused terminals have already had their warning.)
 748     '''
 749     Reachable = { }
 750     for s in Terminals.keys() + Nonterminals.keys():
 751         Reachable[s] = 0
 752
 753     mark_reachable_from( Productions[0].prod[0], Reachable )
 754
 755     for s in Nonterminals.keys():
 756         if not Reachable[s]:
 757             print "yacc: Symbol '%s' is unreachable." % s
 758
 759 def mark_reachable_from(s, Reachable):
 760     '''
 761     Mark all symbols that are reachable from symbol s.
 762     '''
 763     if Reachable[s]:
 764         # We've already reached symbol s.
 765         return
 766     Reachable[s] = 1
 767     for p in Prodnames.get(s,[]):
 768         for r in p.prod:
 769             mark_reachable_from(r, Reachable)
 770
 771 # -----------------------------------------------------------------------------
 772 # compute_terminates()
 773 #
 774 # This function looks at the various parsing rules and tries to detect
 775 # infinite recursion cycles (grammar rules where there is no possible way
 776 # to derive a string of only terminals).
 777 # -----------------------------------------------------------------------------
 778 def compute_terminates():
 779     '''
 780     Raise an error for any symbols that don't terminate.
 781     '''
 782     Terminates = {}
 783
 784     # Terminals:
 785     for t in Terminals.keys():
 786         Terminates[t] = 1
 787
 788     Terminates['$'] = 1
 789
 790     # Nonterminals:
 791
 792     # Initialize to false:
 793     for n in Nonterminals.keys():
 794         Terminates[n] = 0
 795
 796     # Then propagate termination until no change:
 797     while 1:
 798         some_change = 0
 799         for (n,pl) in Prodnames.items():
 800             # Nonterminal n terminates iff any of its productions terminates.
 801             for p in pl:
 802                 # Production p terminates iff all of its rhs symbols terminate.
 803                 for s in p.prod:
 804                     if not Terminates[s]:
 805                         # The symbol s does not terminate,
 806                         # so production p does not terminate.
 807                         p_terminates = 0
 808                         break
 809                 else:
 810                     # didn't break from the loop,
 811                     # so every symbol s terminates
 812                     # so production p terminates.
 813                     p_terminates = 1
 814
 815                 if p_terminates:
 816                     # symbol n terminates!
 817                     if not Terminates[n]:
 818                         Terminates[n] = 1
 819                         some_change = 1
 820                     # Don't need to consider any more productions for this n.
 821                     break
 822
 823         if not some_change:
 824             break
 825
 826     some_error = 0
 827     for (s,terminates) in Terminates.items():
 828         if not terminates:
 829             if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
 830                 # s is used-but-not-defined, and we've already warned of that,
 831                 # so it would be overkill to say that it's also non-terminating.
 832                 pass
 833             else:
 834                 print "yacc: Infinite recursion detected for symbol '%s'." % s
 835                 some_error = 1
 836
 837     return some_error
 838
 839 # -----------------------------------------------------------------------------
 840 # verify_productions()
 841 #
 842 # This function examines all of the supplied rules to see if they seem valid.
 843 # -----------------------------------------------------------------------------
 844 def verify_productions(cycle_check=1):
 845     error = 0
 846     for p in Productions:
 847         if not p: continue
 848
 849         for s in p.prod:
 850             if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
 851                 print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s)
 852                 error = 1
 853                 continue
 854
 855     unused_tok = 0
 856     # Now verify all of the tokens
 857     if yaccdebug:
 858         _vf.write("Unused terminals:\n\n")
 859     for s,v in Terminals.items():
 860         if s != 'error' and not v:
 861             print "yacc: Warning. Token '%s' defined, but not used." % s
 862             if yaccdebug: _vf.write("   %s\n"% s)
 863             unused_tok += 1
 864
 865     # Print out all of the productions
 866     if yaccdebug:
 867         _vf.write("\nGrammar\n\n")
 868         for i in range(1,len(Productions)):
 869             _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
 870
 871     unused_prod = 0
 872     # Verify the use of all productions
 873     for s,v in Nonterminals.items():
 874         if not v:
 875             p = Prodnames[s][0]
 876             print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s)
 877             unused_prod += 1
 878
 879
 880     if unused_tok == 1:
 881         print "yacc: Warning. There is 1 unused token."
 882     if unused_tok > 1:
 883         print "yacc: Warning. There are %d unused tokens." % unused_tok
 884
 885     if unused_prod == 1:
 886         print "yacc: Warning. There is 1 unused rule."
 887     if unused_prod > 1:
 888         print "yacc: Warning. There are %d unused rules." % unused_prod
 889
 890     if yaccdebug:
 891         _vf.write("\nTerminals, with rules where they appear\n\n")
 892         ks = Terminals.keys()
 893         ks.sort()
 894         for k in ks:
 895             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
 896         _vf.write("\nNonterminals, with rules where they appear\n\n")
 897         ks = Nonterminals.keys()
 898         ks.sort()
 899         for k in ks:
 900             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
 901
 902     if (cycle_check):
 903         compute_reachable()
 904         error += compute_terminates()
 905 #        error += check_cycles()
 906     return error
 907
 908 # -----------------------------------------------------------------------------
 909 # build_lritems()
 910 #
 911 # This function walks the list of productions and builds a complete set of the
 912 # LR items.  The LR items are stored in two ways:  First, they are uniquely
 913 # numbered and placed in the list _lritems.  Second, a linked list of LR items
 914 # is built for each production.  For example:
 915 #
 916 #   E -> E PLUS E
 917 #
 918 # Creates the list
 919 #
 920 #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
 921 # -----------------------------------------------------------------------------
 922
 923 def build_lritems():
 924     for p in Productions:
 925         lastlri = p
 926         lri = p.lr_item(0)
 927         i = 0
 928         while 1:
 929             lri = p.lr_item(i)
 930             lastlri.lr_next = lri
 931             if not lri: break
 932             lri.lr_num = len(LRitems)
 933             LRitems.append(lri)
 934             lastlri = lri
 935             i += 1
 936
 937     # In order for the rest of the parser generator to work, we need to
 938     # guarantee that no more lritems are generated.  Therefore, we nuke
 939     # the p.lr_item method.  (Only used in debugging)
 940     # Production.lr_item = None
 941
 942 # -----------------------------------------------------------------------------
 943 # add_precedence()
 944 #
 945 # Given a list of precedence rules, add to the precedence table.
 946 # -----------------------------------------------------------------------------
 947
 948 def add_precedence(plist):
 949     plevel = 0
 950     error = 0
 951     for p in plist:
 952         plevel += 1
 953         try:
 954             prec = p[0]
 955             terms = p[1:]
 956             if prec != 'left' and prec != 'right' and prec != 'nonassoc':
 957                 print "yacc: Invalid precedence '%s'" % prec
 958                 return -1
 959             for t in terms:
 960                 if Precedence.has_key(t):
 961                     print "yacc: Precedence already specified for terminal '%s'" % t
 962                     error += 1
 963                     continue
 964                 Precedence[t] = (prec,plevel)
 965         except:
 966             print "yacc: Invalid precedence table."
 967             error += 1
 968
 969     return error
 970
 971 # -----------------------------------------------------------------------------
 972 # augment_grammar()
 973 #
 974 # Compute the augmented grammar.  This is just a rule S' -> start where start
 975 # is the starting symbol.
 976 # -----------------------------------------------------------------------------
 977
 978 def augment_grammar(start=None):
 979     if not start:
 980         start = Productions[1].name
 981     Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
 982     Productions[0].usyms = [ start ]
 983     Nonterminals[start].append(0)
 984
 985
 986 # -------------------------------------------------------------------------
 987 # first()
 988 #
 989 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
 990 #
 991 # During execution of compute_first1, the result may be incomplete.
 992 # Afterward (e.g., when called from compute_follow()), it will be complete.
 993 # -------------------------------------------------------------------------
 994 def first(beta):
 995
 996     # We are computing First(x1,x2,x3,...,xn)
 997     result = [ ]
 998     for x in beta:
 999         x_produces_empty = 0
1000
1001         # Add all the non-<empty> symbols of First[x] to the result.
1002         for f in First[x]:
1003             if f == '<empty>':
1004                 x_produces_empty = 1
1005             else:
1006                 if f not in result: result.append(f)
1007
1008         if x_produces_empty:
1009             # We have to consider the next x in beta,
1010             # i.e. stay in the loop.
1011             pass
1012         else:
1013             # We don't have to consider any further symbols in beta.
1014             break
1015     else:
1016         # There was no 'break' from the loop,
1017         # so x_produces_empty was true for all x in beta,
1018         # so beta produces empty as well.
1019         result.append('<empty>')
1020
1021     return result
1022
1023
1024 # FOLLOW(x)
1025 # Given a non-terminal.  This function computes the set of all symbols
1026 # that might follow it.  Dragon book, p. 189.
1027
1028 def compute_follow(start=None):
1029     # Add '$' to the follow list of the start symbol
1030     for k in Nonterminals.keys():
1031         Follow[k] = [ ]
1032
1033     if not start:
1034         start = Productions[1].name
1035
1036     Follow[start] = [ '$' ]
1037
1038     while 1:
1039         didadd = 0
1040         for p in Productions[1:]:
1041             # Here is the production set
1042             for i in range(len(p.prod)):
1043                 B = p.prod[i]
1044                 if Nonterminals.has_key(B):
1045                     # Okay. We got a non-terminal in a production
1046                     fst = first(p.prod[i+1:])
1047                     hasempty = 0
1048                     for f in fst:
1049                         if f != '<empty>' and f not in Follow[B]:
1050                             Follow[B].append(f)
1051                             didadd = 1
1052                         if f == '<empty>':
1053                             hasempty = 1
1054                     if hasempty or i == (len(p.prod)-1):
1055                         # Add elements of follow(a) to follow(b)
1056                         for f in Follow[p.name]:
1057                             if f not in Follow[B]:
1058                                 Follow[B].append(f)
1059                                 didadd = 1
1060         if not didadd: break
1061
1062     if 0 and yaccdebug:
1063         _vf.write('\nFollow:\n')
1064         for k in Nonterminals.keys():
1065             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1066
1067 # -------------------------------------------------------------------------
1068 # compute_first1()
1069 #
1070 # Compute the value of FIRST1(X) for all symbols
1071 # -------------------------------------------------------------------------
1072 def compute_first1():
1073
1074     # Terminals:
1075     for t in Terminals.keys():
1076         First[t] = [t]
1077
1078     First['$'] = ['$']
1079     First['#'] = ['#'] # what's this for?
1080
1081     # Nonterminals:
1082
1083     # Initialize to the empty set:
1084     for n in Nonterminals.keys():
1085         First[n] = []
1086
1087     # Then propagate symbols until no change:
1088     while 1:
1089         some_change = 0
1090         for n in Nonterminals.keys():
1091             for p in Prodnames[n]:
1092                 for f in first(p.prod):
1093                     if f not in First[n]:
1094                         First[n].append( f )
1095                         some_change = 1
1096         if not some_change:
1097             break
1098
1099     if 0 and yaccdebug:
1100         _vf.write('\nFirst:\n')
1101         for k in Nonterminals.keys():
1102             _vf.write("%-20s : %s\n" %
1103                 (k, " ".join([str(s) for s in First[k]])))
1104
1105 # -----------------------------------------------------------------------------
1106 #                           === SLR Generation ===
1107 #
1108 # The following functions are used to construct SLR (Simple LR) parsing tables
1109 # as described on p.221-229 of the dragon book.
1110 # -----------------------------------------------------------------------------
1111
1112 # Global variables for the LR parsing engine
1113 def lr_init_vars():
1114     global _lr_action, _lr_goto, _lr_method
1115     global _lr_goto_cache
1116
1117     _lr_action       = { }        # Action table
1118     _lr_goto         = { }        # Goto table
1119     _lr_method       = "Unknown"  # LR method used
1120     _lr_goto_cache   = { }
1121
1122 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1123 # prodlist is a list of productions.
1124
1125 _add_count = 0       # Counter used to detect cycles
1126
1127 def lr0_closure(I):
1128     global _add_count
1129
1130     _add_count += 1
1131     prodlist = Productions
1132
1133     # Add everything in I to J
1134     J = I[:]
1135     didadd = 1
1136     while didadd:
1137         didadd = 0
1138         for j in J:
1139             for x in j.lrafter:
1140                 if x.lr0_added == _add_count: continue
1141                 # Add B --> .G to J
1142                 J.append(x.lr_next)
1143                 x.lr0_added = _add_count
1144                 didadd = 1
1145
1146     return J
1147
1148 # Compute the LR(0) goto function goto(I,X) where I is a set
1149 # of LR(0) items and X is a grammar symbol.   This function is written
1150 # in a way that guarantees uniqueness of the generated goto sets
1151 # (i.e. the same goto set will never be returned as two different Python
1152 # objects).  With uniqueness, we can later do fast set comparisons using
1153 # id(obj) instead of element-wise comparison.
1154
1155 def lr0_goto(I,x):
1156     # First we look for a previously cached entry
1157     g = _lr_goto_cache.get((id(I),x),None)
1158     if g: return g
1159
1160     # Now we generate the goto set in a way that guarantees uniqueness
1161     # of the result
1162
1163     s = _lr_goto_cache.get(x,None)
1164     if not s:
1165         s = { }
1166         _lr_goto_cache[x] = s
1167
1168     gs = [ ]
1169     for p in I:
1170         n = p.lr_next
1171         if n and n.lrbefore == x:
1172             s1 = s.get(id(n),None)
1173             if not s1:
1174                 s1 = { }
1175                 s[id(n)] = s1
1176             gs.append(n)
1177             s = s1
1178     g = s.get('$',None)
1179     if not g:
1180         if gs:
1181             g = lr0_closure(gs)
1182             s['$'] = g
1183         else:
1184             s['$'] = gs
1185     _lr_goto_cache[(id(I),x)] = g
1186     return g
1187
1188 # Compute the kernel of a set of LR(0) items
1189 def lr0_kernel(I):
1190     KI = [ ]
1191     for p in I:
1192         if p.name == "S'" or p.lr_index > 0 or p.len == 0:
1193             KI.append(p)
1194
1195     return KI
1196
1197 _lr0_cidhash = { }
1198
1199 # Compute the LR(0) sets of item function
1200 def lr0_items():
1201
1202     C = [ lr0_closure([Productions[0].lr_next]) ]
1203     i = 0
1204     for I in C:
1205         _lr0_cidhash[id(I)] = i
1206         i += 1
1207
1208     # Loop over the items in C and each grammar symbols
1209     i = 0
1210     while i < len(C):
1211         I = C[i]
1212         i += 1
1213
1214         # Collect all of the symbols that could possibly be in the goto(I,X) sets
1215         asyms = { }
1216         for ii in I:
1217             for s in ii.usyms:
1218                 asyms[s] = None
1219
1220         for x in asyms.keys():
1221             g = lr0_goto(I,x)
1222             if not g:  continue
1223             if _lr0_cidhash.has_key(id(g)): continue
1224             _lr0_cidhash[id(g)] = len(C)
1225             C.append(g)
1226
1227     return C
1228
1229 # -----------------------------------------------------------------------------
1230 # slr_parse_table()
1231 #
1232 # This function constructs an SLR table.
1233 # -----------------------------------------------------------------------------
1234 def slr_parse_table():
1235     global _lr_method
1236     goto = _lr_goto           # Goto array
1237     action = _lr_action       # Action array
1238     actionp = { }             # Action production array (temporary)
1239
1240     _lr_method = "SLR"
1241
1242     n_srconflict = 0
1243     n_rrconflict = 0
1244
1245     print "yacc: Generating SLR parsing table..."
1246     if yaccdebug:
1247         _vf.write("\n\nParsing method: SLR\n\n")
1248
1249     # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1250     # This determines the number of states
1251
1252     C = lr0_items()
1253
1254     # Build the parser table, state by state
1255     st = 0
1256     for I in C:
1257         # Loop over each production in I
1258         actlist = [ ]              # List of actions
1259
1260         if yaccdebug:
1261             _vf.write("\nstate %d\n\n" % st)
1262             for p in I:
1263                 _vf.write("    (%d) %s\n" % (p.number, str(p)))
1264             _vf.write("\n")
1265
1266         for p in I:
1267             try:
1268                 if p.prod[-1] == ".":
1269                     if p.name == "S'":
1270                         # Start symbol. Accept!
1271                         action[st,"$"] = 0
1272                         actionp[st,"$"] = p
1273                     else:
1274                         # We are at the end of a production.  Reduce!
1275                         for a in Follow[p.name]:
1276                             actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1277                             r = action.get((st,a),None)
1278                             if r is not None:
1279                                 # Whoa. Have a shift/reduce or reduce/reduce conflict
1280                                 if r > 0:
1281                                     # Need to decide on shift or reduce here
1282                                     # By default we favor shifting. Need to add
1283                                     # some precedence rules here.
1284                                     sprec,slevel = Productions[actionp[st,a].number].prec
1285                                     rprec,rlevel = Precedence.get(a,('right',0))
1286                                     if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1287                                         # We really need to reduce here.
1288                                         action[st,a] = -p.number
1289                                         actionp[st,a] = p
1290                                         if not slevel and not rlevel:
1291                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1292                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1293                                             n_srconflict += 1
1294                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
1295                                         action[st,a] = None
1296                                     else:
1297                                         # Hmmm. Guess we'll keep the shift
1298                                         if not slevel and not rlevel:
1299                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1300                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1301                                             n_srconflict +=1
1302                                 elif r < 0:
1303                                     # Reduce/reduce conflict.   In this case, we favor the rule
1304                                     # that was defined first in the grammar file
1305                                     oldp = Productions[-r]
1306                                     pp = Productions[p.number]
1307                                     if oldp.line > pp.line:
1308                                         action[st,a] = -p.number
1309                                         actionp[st,a] = p
1310                                     # print "Reduce/reduce conflict in state %d" % st
1311                                     n_rrconflict += 1
1312                                     _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
1313                                     _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
1314                                 else:
1315                                     print "Unknown conflict in state %d" % st
1316                             else:
1317                                 action[st,a] = -p.number
1318                                 actionp[st,a] = p
1319                 else:
1320                     i = p.lr_index
1321                     a = p.prod[i+1]       # Get symbol right after the "."
1322                     if Terminals.has_key(a):
1323                         g = lr0_goto(I,a)
1324                         j = _lr0_cidhash.get(id(g),-1)
1325                         if j >= 0:
1326                             # We are in a shift state
1327                             actlist.append((a,p,"shift and go to state %d" % j))
1328                             r = action.get((st,a),None)
1329                             if r is not None:
1330                                 # Whoa have a shift/reduce or shift/shift conflict
1331                                 if r > 0:
1332                                     if r != j:
1333                                         print "Shift/shift conflict in state %d" % st
1334                                 elif r < 0:
1335                                     # Do a precedence check.
1336                                     #   -  if precedence of reduce rule is higher, we reduce.
1337                                     #   -  if precedence of reduce is same and left assoc, we reduce.
1338                                     #   -  otherwise we shift
1339                                     rprec,rlevel = Productions[actionp[st,a].number].prec
1340                                     sprec,slevel = Precedence.get(a,('right',0))
1341                                     if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1342                                         # We decide to shift here... highest precedence to shift
1343                                         action[st,a] = j
1344                                         actionp[st,a] = p
1345                                         if not slevel and not rlevel:
1346                                             n_srconflict += 1
1347                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1348                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1349                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
1350                                         action[st,a] = None
1351                                     else:
1352                                         # Hmmm. Guess we'll keep the reduce
1353                                         if not slevel and not rlevel:
1354                                             n_srconflict +=1
1355                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1356                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1357
1358                                 else:
1359                                     print "Unknown conflict in state %d" % st
1360                             else:
1361                                 action[st,a] = j
1362                                 actionp[st,a] = p
1363
1364             except StandardError,e:
1365                 raise YaccError, "Hosed in slr_parse_table", e
1366
1367         # Print the actions associated with each terminal
1368         if yaccdebug:
1369           for a,p,m in actlist:
1370             if action.has_key((st,a)):
1371                 if p is actionp[st,a]:
1372                     _vf.write("    %-15s %s\n" % (a,m))
1373           _vf.write("\n")
1374           for a,p,m in actlist:
1375             if action.has_key((st,a)):
1376                 if p is not actionp[st,a]:
1377                     _vf.write("  ! %-15s [ %s ]\n" % (a,m))
1378
1379         # Construct the goto table for this state
1380         if yaccdebug:
1381             _vf.write("\n")
1382         nkeys = { }
1383         for ii in I:
1384             for s in ii.usyms:
1385                 if Nonterminals.has_key(s):
1386                     nkeys[s] = None
1387         for n in nkeys.keys():
1388             g = lr0_goto(I,n)
1389             j = _lr0_cidhash.get(id(g),-1)
1390             if j >= 0:
1391                 goto[st,n] = j
1392                 if yaccdebug:
1393                     _vf.write("    %-15s shift and go to state %d\n" % (n,j))
1394
1395         st += 1
1396
1397     if n_srconflict == 1:
1398         print "yacc: %d shift/reduce conflict" % n_srconflict
1399     if n_srconflict > 1:
1400         print "yacc: %d shift/reduce conflicts" % n_srconflict
1401     if n_rrconflict == 1:
1402         print "yacc: %d reduce/reduce conflict" % n_rrconflict
1403     if n_rrconflict > 1:
1404         print "yacc: %d reduce/reduce conflicts" % n_rrconflict
1405
1406
1407 # -----------------------------------------------------------------------------
1408 #                       ==== LALR(1) Parsing ====
1409 # **** UNFINISHED!  6/16/01
1410 # -----------------------------------------------------------------------------
1411
1412
1413 # Compute the lr1_closure of a set I.  I is a list of tuples (p,a) where
1414 # p is a LR0 item and a is a terminal
1415
1416 _lr1_add_count = 0
1417
1418 def lr1_closure(I):
1419     global _lr1_add_count
1420
1421     _lr1_add_count += 1
1422
1423     J = I[:]
1424
1425     # Loop over items (p,a) in I.
1426     ji = 0
1427     while ji < len(J):
1428         p,a = J[ji]
1429         #  p = [ A -> alpha . B beta]
1430
1431         #  For each production B -> gamma
1432         for B in p.lr1_after:
1433             f = tuple(p.lr1_beta + (a,))
1434
1435             # For each terminal b in first(Beta a)
1436             for b in first(f):
1437                 # Check if (B -> . gamma, b) is in J
1438                 # Only way this can happen is if the add count mismatches
1439                 pn = B.lr_next
1440                 if pn.lr_added.get(b,0) == _lr1_add_count: continue
1441                 pn.lr_added[b] = _lr1_add_count
1442                 J.append((pn,b))
1443         ji += 1
1444
1445     return J
1446
1447 def lalr_parse_table():
1448
1449     # Compute some lr1 information about all of the productions
1450     for p in LRitems:
1451         try:
1452             after = p.prod[p.lr_index + 1]
1453             p.lr1_after = Prodnames[after]
1454             p.lr1_beta = p.prod[p.lr_index + 2:]
1455         except LookupError:
1456             p.lr1_after = [ ]
1457             p.lr1_beta = [ ]
1458         p.lr_added = { }
1459
1460     # Compute the LR(0) items
1461     C = lr0_items()
1462     CK = []
1463     for I in C:
1464         CK.append(lr0_kernel(I))
1465
1466     print CK
1467
1468 # -----------------------------------------------------------------------------
1469 #                          ==== LR Utility functions ====
1470 # -----------------------------------------------------------------------------
1471
1472 # -----------------------------------------------------------------------------
1473 # _lr_write_tables()
1474 #
1475 # This function writes the LR parsing tables to a file
1476 # -----------------------------------------------------------------------------
1477
1478 def lr_write_tables(modulename=tab_module):
1479     filename = modulename + ".py"
1480     try:
1481         f = open(filename,"w")
1482
1483         f.write("""
1484 # %s
1485 # This file is automatically generated. Do not edit.
1486
1487 _lr_method = %s
1488
1489 _lr_signature = %s
1490 """ % (filename, repr(_lr_method), repr(Signature.digest())))
1491
1492         # Change smaller to 0 to go back to original tables
1493         smaller = 1
1494
1495         # Factor out names to try and make smaller
1496         if smaller:
1497             items = { }
1498
1499             for k,v in _lr_action.items():
1500                 i = items.get(k[1])
1501                 if not i:
1502                     i = ([],[])
1503                     items[k[1]] = i
1504                 i[0].append(k[0])
1505                 i[1].append(v)
1506
1507             f.write("\n_lr_action_items = {")
1508             for k,v in items.items():
1509                 f.write("%r:([" % k)
1510                 for i in v[0]:
1511                     f.write("%r," % i)
1512                 f.write("],[")
1513                 for i in v[1]:
1514                     f.write("%r," % i)
1515
1516                 f.write("]),")
1517             f.write("}\n")
1518
1519             f.write("""
1520 _lr_action = { }
1521 for _k, _v in _lr_action_items.items():
1522    for _x,_y in zip(_v[0],_v[1]):
1523        _lr_action[(_x,_k)] = _y
1524 del _lr_action_items
1525 """)
1526
1527         else:
1528             f.write("\n_lr_action = { ");
1529             for k,v in _lr_action.items():
1530                 f.write("(%r,%r):%r," % (k[0],k[1],v))
1531             f.write("}\n");
1532
1533         if smaller:
1534             # Factor out names to try and make smaller
1535             items = { }
1536
1537             for k,v in _lr_goto.items():
1538                 i = items.get(k[1])
1539                 if not i:
1540                     i = ([],[])
1541                     items[k[1]] = i
1542                 i[0].append(k[0])
1543                 i[1].append(v)
1544
1545             f.write("\n_lr_goto_items = {")
1546             for k,v in items.items():
1547                 f.write("%r:([" % k)
1548                 for i in v[0]:
1549                     f.write("%r," % i)
1550                 f.write("],[")
1551                 for i in v[1]:
1552                     f.write("%r," % i)
1553
1554                 f.write("]),")
1555             f.write("}\n")
1556
1557             f.write("""
1558 _lr_goto = { }
1559 for _k, _v in _lr_goto_items.items():
1560    for _x,_y in zip(_v[0],_v[1]):
1561        _lr_goto[(_x,_k)] = _y
1562 del _lr_goto_items
1563 """)
1564         else:
1565             f.write("\n_lr_goto = { ");
1566             for k,v in _lr_goto.items():
1567                 f.write("(%r,%r):%r," % (k[0],k[1],v))
1568             f.write("}\n");
1569
1570         # Write production table
1571         f.write("_lr_productions = [\n")
1572         for p in Productions:
1573             if p:
1574                 if (p.func):
1575                     f.write("  (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
1576                 else:
1577                     f.write("  (%r,%d,None,None,None),\n" % (p.name, p.len))
1578             else:
1579                 f.write("  None,\n")
1580         f.write("]\n")
1581         f.close()
1582
1583     except IOError,e:
1584         print "Unable to create '%s'" % filename
1585         print e
1586         return
1587
1588 def lr_read_tables(module=tab_module,optimize=0):
1589     global _lr_action, _lr_goto, _lr_productions, _lr_method
1590     try:
1591         exec "import %s as parsetab" % module
1592
1593         if (optimize) or (Signature.digest() == parsetab._lr_signature):
1594             _lr_action = parsetab._lr_action
1595             _lr_goto   = parsetab._lr_goto
1596             _lr_productions = parsetab._lr_productions
1597             _lr_method = parsetab._lr_method
1598             return 1
1599         else:
1600             return 0
1601
1602     except (ImportError,AttributeError):
1603         return 0
1604
1605 # -----------------------------------------------------------------------------
1606 # yacc(module)
1607 #
1608 # Build the parser module
1609 # -----------------------------------------------------------------------------
1610
1611 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0):
1612     global yaccdebug
1613     yaccdebug = debug
1614
1615     initialize_vars()
1616     files = { }
1617     error = 0
1618
1619     # Add starting symbol to signature
1620     if start:
1621         Signature.update(start)
1622
1623     # Try to figure out what module we are working with
1624     if module:
1625         # User supplied a module object.
1626         if not isinstance(module, types.ModuleType):
1627             raise ValueError,"Expected a module"
1628
1629         ldict = module.__dict__
1630
1631     else:
1632         # No module given.  We might be able to get information from the caller.
1633         # Throw an exception and unwind the traceback to get the globals
1634
1635         try:
1636             raise RuntimeError
1637         except RuntimeError:
1638             e,b,t = sys.exc_info()
1639             f = t.tb_frame
1640             f = f.f_back           # Walk out to our calling function
1641             ldict = f.f_globals    # Grab its globals dictionary
1642
1643     # If running in optimized mode.  We're going to
1644
1645     if (optimize and lr_read_tables(tabmodule,1)):
1646         # Read parse table
1647         del Productions[:]
1648         for p in _lr_productions:
1649             if not p:
1650                 Productions.append(None)
1651             else:
1652                 m = MiniProduction()
1653                 m.name = p[0]
1654                 m.len  = p[1]
1655                 m.file = p[3]
1656                 m.line = p[4]
1657                 if p[2]:
1658                     m.func = ldict[p[2]]
1659                 Productions.append(m)
1660
1661     else:
1662         # Get the tokens map
1663         tokens = ldict.get("tokens",None)
1664
1665         if not tokens:
1666             raise YaccError,"module does not define a list 'tokens'"
1667         if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
1668             raise YaccError,"tokens must be a list or tuple."
1669
1670         # Check to see if a requires dictionary is defined.
1671         requires = ldict.get("require",None)
1672         if requires:
1673             if not (isinstance(requires,types.DictType)):
1674                 raise YaccError,"require must be a dictionary."
1675
1676             for r,v in requires.items():
1677                 try:
1678                     if not (isinstance(v,types.ListType)):
1679                         raise TypeError
1680                     v1 = [x.split(".") for x in v]
1681                     Requires[r] = v1
1682                 except StandardError:
1683                     print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
1684
1685
1686         # Build the dictionary of terminals.  We a record a 0 in the
1687         # dictionary to track whether or not a terminal is actually
1688         # used in the grammar
1689
1690         if 'error' in tokens:
1691             print "yacc: Illegal token 'error'.  Is a reserved word."
1692             raise YaccError,"Illegal token name"
1693
1694         for n in tokens:
1695             if Terminals.has_key(n):
1696                 print "yacc: Warning. Token '%s' multiply defined." % n
1697             Terminals[n] = [ ]
1698
1699         Terminals['error'] = [ ]
1700
1701         # Get the precedence map (if any)
1702         prec = ldict.get("precedence",None)
1703         if prec:
1704             if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
1705                 raise YaccError,"precedence must be a list or tuple."
1706             add_precedence(prec)
1707             Signature.update(repr(prec))
1708
1709         for n in tokens:
1710             if not Precedence.has_key(n):
1711                 Precedence[n] = ('right',0)         # Default, right associative, 0 precedence
1712
1713         # Look for error handler
1714         ef = ldict.get('p_error',None)
1715         if ef:
1716             if not isinstance(ef,types.FunctionType):
1717                 raise YaccError,"'p_error' defined, but is not a function."
1718             eline = ef.func_code.co_firstlineno
1719             efile = ef.func_code.co_filename
1720             files[efile] = None
1721
1722             if (ef.func_code.co_argcount != 1):
1723                 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
1724             global Errorfunc
1725             Errorfunc = ef
1726         else:
1727             print "yacc: Warning. no p_error() function is defined."
1728
1729         # Get the list of built-in functions with p_ prefix
1730         symbols = [ldict[f] for f in ldict.keys()
1731                if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_'
1732                    and ldict[f].__name__ != 'p_error')]
1733
1734         # Check for non-empty symbols
1735         if len(symbols) == 0:
1736             raise YaccError,"no rules of the form p_rulename are defined."
1737
1738         # Sort the symbols by line number
1739         symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
1740
1741         # Add all of the symbols to the grammar
1742         for f in symbols:
1743             if (add_function(f)) < 0:
1744                 error += 1
1745             else:
1746                 files[f.func_code.co_filename] = None
1747
1748         # Make a signature of the docstrings
1749         for f in symbols:
1750             if f.__doc__:
1751                 Signature.update(f.__doc__)
1752
1753         lr_init_vars()
1754
1755         if error:
1756             raise YaccError,"Unable to construct parser."
1757
1758         if not lr_read_tables(tabmodule):
1759
1760             # Validate files
1761             for filename in files.keys():
1762                 if not validate_file(filename):
1763                     error = 1
1764
1765             # Validate dictionary
1766             validate_dict(ldict)
1767
1768             if start and not Prodnames.has_key(start):
1769                 raise YaccError,"Bad starting symbol '%s'" % start
1770
1771             augment_grammar(start)
1772             error = verify_productions(cycle_check=check_recursion)
1773             otherfunc = [ldict[f] for f in ldict.keys()
1774                if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')]
1775
1776             if error:
1777                 raise YaccError,"Unable to construct parser."
1778
1779             build_lritems()
1780             compute_first1()
1781             compute_follow(start)
1782
1783             if method == 'SLR':
1784                 slr_parse_table()
1785             elif method == 'LALR1':
1786                 lalr_parse_table()
1787                 return
1788             else:
1789                 raise YaccError, "Unknown parsing method '%s'" % method
1790
1791             lr_write_tables(tabmodule)
1792
1793             if yaccdebug:
1794                 try:
1795                     f = open(debug_file,"w")
1796                     f.write(_vfc.getvalue())
1797                     f.write("\n\n")
1798                     f.write(_vf.getvalue())
1799                     f.close()
1800                 except IOError,e:
1801                     print "yacc: can't create '%s'" % debug_file,e
1802
1803     # Made it here.   Create a parser object and set up its internal state.
1804     # Set global parse() method to bound method of parser object.
1805
1806     p = Parser("xyzzy")
1807     p.productions = Productions
1808     p.errorfunc = Errorfunc
1809     p.action = _lr_action
1810     p.goto   = _lr_goto
1811     p.method = _lr_method
1812     p.require = Requires
1813
1814     global parse
1815     parse = p.parse
1816
1817     # Clean up all of the globals we created
1818     if (not optimize):
1819         yacc_cleanup()
1820     return p
1821
1822 # yacc_cleanup function.  Delete all of the global variables
1823 # used during table construction
1824
1825 def yacc_cleanup():
1826     global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1827     del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1828
1829     global Productions, Prodnames, Prodmap, Terminals
1830     global Nonterminals, First, Follow, Precedence, LRitems
1831     global Errorfunc, Signature, Requires
1832
1833     del Productions, Prodnames, Prodmap, Terminals
1834     del Nonterminals, First, Follow, Precedence, LRitems
1835     del Errorfunc, Signature, Requires
1836
1837     global _vf, _vfc
1838     del _vf, _vfc
1839
1840
1841 # Stub that raises an error if parsing is attempted without first calling yacc()
1842 def parse(*args,**kwargs):
1843     raise YaccError, "yacc: No parser built with yacc()"
1844