Package PyFoam :: Package ThirdParty :: Package ply :: Module yacc
[hide private]
[frames] | no frames]

Source Code for Module PyFoam.ThirdParty.ply.yacc

   1  #----------------------------------------------------------------------------- 
   2  # ply: yacc.py 
   3  # 
   4  # Author(s): David M. Beazley (dave@dabeaz.com) 
   5  # 
   6  # Copyright (C) 2001-2006, David M. Beazley 
   7  # 
   8  # This library is free software; you can redistribute it and/or 
   9  # modify it under the terms of the GNU Lesser General Public 
  10  # License as published by the Free Software Foundation; either 
  11  # version 2.1 of the License, or (at your option) any later version. 
  12  #  
  13  # This library is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  16  # Lesser General Public License for more details. 
  17  #  
  18  # You should have received a copy of the GNU Lesser General Public 
  19  # License along with this library; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21  #  
  22  # See the file COPYING for a complete copy of the LGPL. 
  23  # 
  24  # 
  25  # This implements an LR parser that is constructed from grammar rules defined 
  26  # as Python functions. The grammer is specified by supplying the BNF inside 
  27  # Python documentation strings.  The inspiration for this technique was borrowed 
  28  # from John Aycock's Spark parsing system.  PLY might be viewed as cross between 
  29  # Spark and the GNU bison utility. 
  30  # 
  31  # The current implementation is only somewhat object-oriented. The 
  32  # LR parser itself is defined in terms of an object (which allows multiple 
  33  # parsers to co-exist).  However, most of the variables used during table 
  34  # construction are defined in terms of global variables.  Users shouldn't 
  35  # notice unless they are trying to define multiple parsers at the same 
  36  # time using threads (in which case they should have their head examined). 
  37  # 
  38  # This implementation supports both SLR and LALR(1) parsing.  LALR(1) 
  39  # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 
  40  # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 
  41  # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced 
  42  # by the more efficient DeRemer and Pennello algorithm. 
  43  # 
  44  # :::::::: WARNING ::::::: 
  45  # 
  46  # Construction of LR parsing tables is fairly complicated and expensive. 
  47  # To make this module run fast, a *LOT* of work has been put into 
  48  # optimization---often at the expensive of readability and what might 
  49  # consider to be good Python "coding style."   Modify the code at your 
  50  # own risk! 
  51  # ---------------------------------------------------------------------------- 
  52   
  53  __version__ = "2.2" 
  54   
  55  #----------------------------------------------------------------------------- 
  56  #                     === User configurable parameters === 
  57  # 
  58  # Change these to modify the default behavior of yacc (if you wish) 
  59  #----------------------------------------------------------------------------- 
  60   
  61  yaccdebug   = 1                # Debugging mode.  If set, yacc generates a 
  62                                 # a 'parser.out' file in the current directory 
  63   
  64  debug_file  = 'parser.out'     # Default name of the debugging file 
  65  tab_module  = 'parsetab'       # Default name of the table module 
  66  default_lr  = 'LALR'           # Default LR table generation method 
  67   
  68  error_count = 3                # Number of symbols that must be shifted to leave recovery mode 
  69   
  70  import re, types, sys, cStringIO, md5, os.path 
  71   
  72  # Exception raised for yacc-related errors 
73 -class YaccError(Exception): pass
74 75 #----------------------------------------------------------------------------- 76 # === LR Parsing Engine === 77 # 78 # The following classes are used for the LR parser itself. These are not 79 # used during table construction and are independent of the actual LR 80 # table generation algorithm 81 #----------------------------------------------------------------------------- 82 83 # This class is used to hold non-terminal grammar symbols during parsing. 84 # It normally has the following attributes set: 85 # .type = Grammar symbol type 86 # .value = Symbol value 87 # .lineno = Starting line number 88 # .endlineno = Ending line number (optional, set automatically) 89 # .lexpos = Starting lex position 90 # .endlexpos = Ending lex position (optional, set automatically) 91
92 -class YaccSymbol:
93 - def __str__(self): return self.type
94 - def __repr__(self): return str(self)
95 96 # This class is a wrapper around the objects actually passed to each 97 # grammar rule. Index lookup and assignment actually assign the 98 # .value attribute of the underlying YaccSymbol object. 99 # The lineno() method returns the line number of a given 100 # item (or 0 if not defined). The linespan() method returns 101 # a tuple of (startline,endline) representing the range of lines 102 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 103 # representing the range of positional information for a symbol. 104
105 -class YaccProduction:
106 - def __init__(self,s,stack=None):
107 self.slice = s 108 self.pbstack = [] 109 self.stack = stack
110
111 - def __getitem__(self,n):
112 if type(n) == types.IntType: 113 if n >= 0: return self.slice[n].value 114 else: return self.stack[n].value 115 else: 116 return [s.value for s in self.slice[n.start:n.stop:n.step]]
117
118 - def __setitem__(self,n,v):
119 self.slice[n].value = v
120
121 - def __len__(self):
122 return len(self.slice)
123
124 - def lineno(self,n):
125 return getattr(self.slice[n],"lineno",0)
126
127 - def linespan(self,n):
128 startline = getattr(self.slice[n],"lineno",0) 129 endline = getattr(self.slice[n],"endlineno",startline) 130 return startline,endline
131
132 - def lexpos(self,n):
133 return getattr(self.slice[n],"lexpos",0)
134
135 - def lexspan(self,n):
136 startpos = getattr(self.slice[n],"lexpos",0) 137 endpos = getattr(self.slice[n],"endlexpos",startpos) 138 return startpos,endpos
139
140 - def pushback(self,n):
141 if n <= 0: 142 raise ValueError, "Expected a positive value" 143 if n > (len(self.slice)-1): 144 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) 145 for i in range(0,n): 146 self.pbstack.append(self.slice[-i-1])
147 148 # The LR Parsing engine. This is defined as a class so that multiple parsers 149 # can exist in the same process. A user never instantiates this directly. 150 # Instead, the global yacc() function should be used to create a suitable Parser 151 # object. 152
153 -class Parser:
154 - def __init__(self,magic=None):
155 156 # This is a hack to keep users from trying to instantiate a Parser 157 # object directly. 158 159 if magic != "xyzzy": 160 raise YaccError, "Can't instantiate Parser. Use yacc() instead." 161 162 # Reset internal state 163 self.productions = None # List of productions 164 self.errorfunc = None # Error handling function 165 self.action = { } # LR Action table 166 self.goto = { } # LR goto table 167 self.require = { } # Attribute require table 168 self.method = "Unknown LR" # Table construction method used
169
170 - def errok(self):
171 self.errorcount = 0
172
173 - def restart(self):
174 del self.statestack[:] 175 del self.symstack[:] 176 sym = YaccSymbol() 177 sym.type = '$end' 178 self.symstack.append(sym) 179 self.statestack.append(0)
180
181 - def parse(self,input=None,lexer=None,debug=0):
182 lookahead = None # Current lookahead symbol 183 lookaheadstack = [ ] # Stack of lookahead symbols 184 actions = self.action # Local reference to action table 185 goto = self.goto # Local reference to goto table 186 prod = self.productions # Local reference to production list 187 pslice = YaccProduction(None) # Production object passed to grammar rules 188 pslice.parser = self # Parser object 189 self.errorcount = 0 # Used during error recovery 190 191 # If no lexer was given, we will try to use the lex module 192 if not lexer: 193 import lex 194 lexer = lex.lexer 195 196 pslice.lexer = lexer 197 198 # If input was supplied, pass to lexer 199 if input: 200 lexer.input(input) 201 202 # Tokenize function 203 get_token = lexer.token 204 205 statestack = [ ] # Stack of parsing states 206 self.statestack = statestack 207 symstack = [ ] # Stack of grammar symbols 208 self.symstack = symstack 209 210 pslice.stack = symstack # Put in the production 211 errtoken = None # Err token 212 213 # The start state is assumed to be (0,$end) 214 statestack.append(0) 215 sym = YaccSymbol() 216 sym.type = '$end' 217 symstack.append(sym) 218 219 while 1: 220 # Get the next symbol on the input. If a lookahead symbol 221 # is already set, we just use that. Otherwise, we'll pull 222 # the next token off of the lookaheadstack or from the lexer 223 if debug > 1: 224 print 'state', statestack[-1] 225 if not lookahead: 226 if not lookaheadstack: 227 lookahead = get_token() # Get the next token 228 else: 229 lookahead = lookaheadstack.pop() 230 if not lookahead: 231 lookahead = YaccSymbol() 232 lookahead.type = '$end' 233 if debug: 234 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() 235 236 # Check the action table 237 s = statestack[-1] 238 ltype = lookahead.type 239 t = actions.get((s,ltype),None) 240 241 if debug > 1: 242 print 'action', t 243 if t is not None: 244 if t > 0: 245 # shift a symbol on the stack 246 if ltype == '$end': 247 # Error, end of input 248 sys.stderr.write("yacc: Parse error. EOF\n") 249 return 250 statestack.append(t) 251 if debug > 1: 252 sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) 253 symstack.append(lookahead) 254 lookahead = None 255 256 # Decrease error count on successful shift 257 if self.errorcount > 0: 258 self.errorcount -= 1 259 260 continue 261 262 if t < 0: 263 # reduce a symbol on the stack, emit a production 264 p = prod[-t] 265 pname = p.name 266 plen = p.len 267 268 # Get production function 269 sym = YaccSymbol() 270 sym.type = pname # Production name 271 sym.value = None 272 if debug > 1: 273 sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) 274 275 if plen: 276 targ = symstack[-plen-1:] 277 targ[0] = sym 278 try: 279 sym.lineno = targ[1].lineno 280 sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) 281 sym.lexpos = targ[1].lexpos 282 sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) 283 except AttributeError: 284 sym.lineno = 0 285 del symstack[-plen:] 286 del statestack[-plen:] 287 else: 288 sym.lineno = 0 289 targ = [ sym ] 290 pslice.slice = targ 291 pslice.pbstack = [] 292 # Call the grammar rule with our special slice object 293 p.func(pslice) 294 295 # If there was a pushback, put that on the stack 296 if pslice.pbstack: 297 lookaheadstack.append(lookahead) 298 for _t in pslice.pbstack: 299 lookaheadstack.append(_t) 300 lookahead = None 301 302 symstack.append(sym) 303 statestack.append(goto[statestack[-1],pname]) 304 continue 305 306 if t == 0: 307 n = symstack[-1] 308 return getattr(n,"value",None) 309 sys.stderr.write(errorlead, "\n") 310 311 if t == None: 312 if debug: 313 sys.stderr.write(errorlead + "\n") 314 # We have some kind of parsing error here. To handle 315 # this, we are going to push the current token onto 316 # the tokenstack and replace it with an 'error' token. 317 # If there are any synchronization rules, they may 318 # catch it. 319 # 320 # In addition to pushing the error token, we call call 321 # the user defined p_error() function if this is the 322 # first syntax error. This function is only called if 323 # errorcount == 0. 324 if not self.errorcount: 325 self.errorcount = error_count 326 errtoken = lookahead 327 if errtoken.type == '$end': 328 errtoken = None # End of file! 329 if self.errorfunc: 330 global errok,token,restart 331 errok = self.errok # Set some special functions available in error recovery 332 token = get_token 333 restart = self.restart 334 tok = self.errorfunc(errtoken) 335 del errok, token, restart # Delete special functions 336 337 if not self.errorcount: 338 # User must have done some kind of panic 339 # mode recovery on their own. The 340 # returned token is the next lookahead 341 lookahead = tok 342 errtoken = None 343 continue 344 else: 345 if errtoken: 346 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 347 else: lineno = 0 348 if lineno: 349 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 350 else: 351 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 352 else: 353 sys.stderr.write("yacc: Parse error in input. EOF\n") 354 return 355 356 else: 357 self.errorcount = error_count 358 359 # case 1: the statestack only has 1 entry on it. If we're in this state, the 360 # entire parse has been rolled back and we're completely hosed. The token is 361 # discarded and we just keep going. 362 363 if len(statestack) <= 1 and lookahead.type != '$end': 364 lookahead = None 365 errtoken = None 366 # Nuke the pushback stack 367 del lookaheadstack[:] 368 continue 369 370 # case 2: the statestack has a couple of entries on it, but we're 371 # at the end of the file. nuke the top entry and generate an error token 372 373 # Start nuking entries on the stack 374 if lookahead.type == '$end': 375 # Whoa. We're really hosed here. Bail out 376 return 377 378 if lookahead.type != 'error': 379 sym = symstack[-1] 380 if sym.type == 'error': 381 # Hmmm. Error is on top of stack, we'll just nuke input 382 # symbol and continue 383 lookahead = None 384 continue 385 t = YaccSymbol() 386 t.type = 'error' 387 if hasattr(lookahead,"lineno"): 388 t.lineno = lookahead.lineno 389 t.value = lookahead 390 lookaheadstack.append(lookahead) 391 lookahead = t 392 else: 393 symstack.pop() 394 statestack.pop() 395 396 continue 397 398 # Call an error function here 399 raise RuntimeError, "yacc: internal parser error!!!\n"
400 401 # ----------------------------------------------------------------------------- 402 # === Parser Construction === 403 # 404 # The following functions and variables are used to implement the yacc() function 405 # itself. This is pretty hairy stuff involving lots of error checking, 406 # construction of LR items, kernels, and so forth. Although a lot of 407 # this work is done using global variables, the resulting Parser object 408 # is completely self contained--meaning that it is safe to repeatedly 409 # call yacc() with different grammars in the same application. 410 # ----------------------------------------------------------------------------- 411 412 # ----------------------------------------------------------------------------- 413 # validate_file() 414 # 415 # This function checks to see if there are duplicated p_rulename() functions 416 # in the parser module file. Without this function, it is really easy for 417 # users to make mistakes by cutting and pasting code fragments (and it's a real 418 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 419 # we just do a little regular expression pattern matching of def statements 420 # to try and detect duplicates. 421 # ----------------------------------------------------------------------------- 422
423 -def validate_file(filename):
424 base,ext = os.path.splitext(filename) 425 if ext != '.py': return 1 # No idea. Assume it's okay. 426 427 try: 428 f = open(filename) 429 lines = f.readlines() 430 f.close() 431 except IOError: 432 return 1 # Oh well 433 434 # Match def p_funcname( 435 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 436 counthash = { } 437 linen = 1 438 noerror = 1 439 for l in lines: 440 m = fre.match(l) 441 if m: 442 name = m.group(1) 443 prev = counthash.get(name) 444 if not prev: 445 counthash[name] = linen 446 else: 447 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) 448 noerror = 0 449 linen += 1 450 return noerror
451 452 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
453 -def validate_dict(d):
454 for n,v in d.items(): 455 if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue 456 if n[0:2] == 't_': continue 457 458 if n[0:2] == 'p_': 459 sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) 460 if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: 461 try: 462 doc = v.__doc__.split(" ") 463 if doc[1] == ':': 464 sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) 465 except StandardError: 466 pass
467 468 # ----------------------------------------------------------------------------- 469 # === GRAMMAR FUNCTIONS === 470 # 471 # The following global variables and functions are used to store, manipulate, 472 # and verify the grammar rules specified by the user. 473 # ----------------------------------------------------------------------------- 474 475 # Initialize all of the global variables used during grammar construction
476 -def initialize_vars():
477 global Productions, Prodnames, Prodmap, Terminals 478 global Nonterminals, First, Follow, Precedence, LRitems 479 global Errorfunc, Signature, Requires 480 481 Productions = [None] # A list of all of the productions. The first 482 # entry is always reserved for the purpose of 483 # building an augmented grammar 484 485 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 486 # productions of that nonterminal. 487 488 Prodmap = { } # A dictionary that is only used to detect duplicate 489 # productions. 490 491 Terminals = { } # A dictionary mapping the names of terminal symbols to a 492 # list of the rules where they are used. 493 494 Nonterminals = { } # A dictionary mapping names of nonterminals to a list 495 # of rule numbers where they are used. 496 497 First = { } # A dictionary of precomputed FIRST(x) symbols 498 499 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 500 501 Precedence = { } # Precedence rules for each terminal. Contains tuples of the 502 # form ('right',level) or ('nonassoc', level) or ('left',level) 503 504 LRitems = [ ] # A list of all LR items for the grammar. These are the 505 # productions with the "dot" like E -> E . PLUS E 506 507 Errorfunc = None # User defined error handler 508 509 Signature = md5.new() # Digital signature of the grammar rules, precedence 510 # and other information. Used to determined when a 511 # parsing table needs to be regenerated. 512 513 Requires = { } # Requires list 514 515 # File objects used when creating the parser.out debugging file 516 global _vf, _vfc 517 _vf = cStringIO.StringIO() 518 _vfc = cStringIO.StringIO()
519 520 # ----------------------------------------------------------------------------- 521 # class Production: 522 # 523 # This class stores the raw information about a single production or grammar rule. 524 # It has a few required attributes: 525 # 526 # name - Name of the production (nonterminal) 527 # prod - A list of symbols making up its production 528 # number - Production number. 529 # 530 # In addition, a few additional attributes are used to help with debugging or 531 # optimization of table generation. 532 # 533 # file - File where production action is defined. 534 # lineno - Line number where action is defined 535 # func - Action function 536 # prec - Precedence level 537 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' 538 # then lr_next refers to 'E -> E PLUS . E' 539 # lr_index - LR item index (location of the ".") in the prod list. 540 # lookaheads - LALR lookahead symbols for this item 541 # len - Length of the production (number of symbols on right hand side) 542 # ----------------------------------------------------------------------------- 543
544 -class Production:
545 - def __init__(self,**kw):
546 for k,v in kw.items(): 547 setattr(self,k,v) 548 self.lr_index = -1 549 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure 550 self.lr1_added = 0 # Flag indicating whether or not added to LR1 551 self.usyms = [ ] 552 self.lookaheads = { } 553 self.lk_added = { } 554 self.setnumbers = [ ]
555
556 - def __str__(self):
557 if self.prod: 558 s = "%s -> %s" % (self.name," ".join(self.prod)) 559 else: 560 s = "%s -> <empty>" % self.name 561 return s
562
563 - def __repr__(self):
564 return str(self)
565 566 # Compute lr_items from the production
567 - def lr_item(self,n):
568 if n > len(self.prod): return None 569 p = Production() 570 p.name = self.name 571 p.prod = list(self.prod) 572 p.number = self.number 573 p.lr_index = n 574 p.lookaheads = { } 575 p.setnumbers = self.setnumbers 576 p.prod.insert(n,".") 577 p.prod = tuple(p.prod) 578 p.len = len(p.prod) 579 p.usyms = self.usyms 580 581 # Precompute list of productions immediately following 582 try: 583 p.lrafter = Prodnames[p.prod[n+1]] 584 except (IndexError,KeyError),e: 585 p.lrafter = [] 586 try: 587 p.lrbefore = p.prod[n-1] 588 except IndexError: 589 p.lrbefore = None 590 591 return p
592
593 -class MiniProduction:
594 pass
595 596 # regex matching identifiers 597 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 598 599 # ----------------------------------------------------------------------------- 600 # add_production() 601 # 602 # Given an action function, this function assembles a production rule. 603 # The production rule is assumed to be found in the function's docstring. 604 # This rule has the general syntax: 605 # 606 # name1 ::= production1 607 # | production2 608 # | production3 609 # ... 610 # | productionn 611 # name2 ::= production1 612 # | production2 613 # ... 614 # ----------------------------------------------------------------------------- 615
616 -def add_production(f,file,line,prodname,syms):
617 618 if Terminals.has_key(prodname): 619 sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) 620 return -1 621 if prodname == 'error': 622 sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) 623 return -1 624 625 if not _is_identifier.match(prodname): 626 sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) 627 return -1 628 629 for x in range(len(syms)): 630 s = syms[x] 631 if s[0] in "'\"": 632 try: 633 c = eval(s) 634 if (len(c) > 1): 635 sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) 636 return -1 637 if not Terminals.has_key(c): 638 Terminals[c] = [] 639 syms[x] = c 640 continue 641 except SyntaxError: 642 pass 643 if not _is_identifier.match(s) and s != '%prec': 644 sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) 645 return -1 646 647 # See if the rule is already in the rulemap 648 map = "%s -> %s" % (prodname,syms) 649 if Prodmap.has_key(map): 650 m = Prodmap[map] 651 sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) 652 sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) 653 return -1 654 655 p = Production() 656 p.name = prodname 657 p.prod = syms 658 p.file = file 659 p.line = line 660 p.func = f 661 p.number = len(Productions) 662 663 664 Productions.append(p) 665 Prodmap[map] = p 666 if not Nonterminals.has_key(prodname): 667 Nonterminals[prodname] = [ ] 668 669 # Add all terminals to Terminals 670 i = 0 671 while i < len(p.prod): 672 t = p.prod[i] 673 if t == '%prec': 674 try: 675 precname = p.prod[i+1] 676 except IndexError: 677 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) 678 return -1 679 680 prec = Precedence.get(precname,None) 681 if not prec: 682 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) 683 return -1 684 else: 685 p.prec = prec 686 del p.prod[i] 687 del p.prod[i] 688 continue 689 690 if Terminals.has_key(t): 691 Terminals[t].append(p.number) 692 # Is a terminal. We'll assign a precedence to p based on this 693 if not hasattr(p,"prec"): 694 p.prec = Precedence.get(t,('right',0)) 695 else: 696 if not Nonterminals.has_key(t): 697 Nonterminals[t] = [ ] 698 Nonterminals[t].append(p.number) 699 i += 1 700 701 if not hasattr(p,"prec"): 702 p.prec = ('right',0) 703 704 # Set final length of productions 705 p.len = len(p.prod) 706 p.prod = tuple(p.prod) 707 708 # Calculate unique syms in the production 709 p.usyms = [ ] 710 for s in p.prod: 711 if s not in p.usyms: 712 p.usyms.append(s) 713 714 # Add to the global productions list 715 try: 716 Prodnames[p.name].append(p) 717 except KeyError: 718 Prodnames[p.name] = [ p ] 719 return 0
720 721 # Given a raw rule function, this function rips out its doc string 722 # and adds rules to the grammar 723
724 -def add_function(f):
725 line = f.func_code.co_firstlineno 726 file = f.func_code.co_filename 727 error = 0 728 729 if isinstance(f,types.MethodType): 730 reqdargs = 2 731 else: 732 reqdargs = 1 733 734 if f.func_code.co_argcount > reqdargs: 735 sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) 736 return -1 737 738 if f.func_code.co_argcount < reqdargs: 739 sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) 740 return -1 741 742 if f.__doc__: 743 # Split the doc string into lines 744 pstrings = f.__doc__.splitlines() 745 lastp = None 746 dline = line 747 for ps in pstrings: 748 dline += 1 749 p = ps.split() 750 if not p: continue 751 try: 752 if p[0] == '|': 753 # This is a continuation of a previous rule 754 if not lastp: 755 sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) 756 return -1 757 prodname = lastp 758 if len(p) > 1: 759 syms = p[1:] 760 else: 761 syms = [ ] 762 else: 763 prodname = p[0] 764 lastp = prodname 765 assign = p[1] 766 if len(p) > 2: 767 syms = p[2:] 768 else: 769 syms = [ ] 770 if assign != ':' and assign != '::=': 771 sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) 772 return -1 773 774 775 e = add_production(f,file,dline,prodname,syms) 776 error += e 777 778 779 except StandardError: 780 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) 781 error -= 1 782 else: 783 sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) 784 return error
785 786 787 # Cycle checking code (Michael Dyck) 788
789 -def compute_reachable():
790 ''' 791 Find each symbol that can be reached from the start symbol. 792 Print a warning for any nonterminals that can't be reached. 793 (Unused terminals have already had their warning.) 794 ''' 795 Reachable = { } 796 for s in Terminals.keys() + Nonterminals.keys(): 797 Reachable[s] = 0 798 799 mark_reachable_from( Productions[0].prod[0], Reachable ) 800 801 for s in Nonterminals.keys(): 802 if not Reachable[s]: 803 pass
804 # sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s) 805
806 -def mark_reachable_from(s, Reachable):
807 ''' 808 Mark all symbols that are reachable from symbol s. 809 ''' 810 if Reachable[s]: 811 # We've already reached symbol s. 812 return 813 Reachable[s] = 1 814 for p in Prodnames.get(s,[]): 815 for r in p.prod: 816 mark_reachable_from(r, Reachable)
817 818 # ----------------------------------------------------------------------------- 819 # compute_terminates() 820 # 821 # This function looks at the various parsing rules and tries to detect 822 # infinite recursion cycles (grammar rules where there is no possible way 823 # to derive a string of only terminals). 824 # -----------------------------------------------------------------------------
825 -def compute_terminates():
826 ''' 827 Raise an error for any symbols that don't terminate. 828 ''' 829 Terminates = {} 830 831 # Terminals: 832 for t in Terminals.keys(): 833 Terminates[t] = 1 834 835 Terminates['$end'] = 1 836 837 # Nonterminals: 838 839 # Initialize to false: 840 for n in Nonterminals.keys(): 841 Terminates[n] = 0 842 843 # Then propagate termination until no change: 844 while 1: 845 some_change = 0 846 for (n,pl) in Prodnames.items(): 847 # Nonterminal n terminates iff any of its productions terminates. 848 for p in pl: 849 # Production p terminates iff all of its rhs symbols terminate. 850 for s in p.prod: 851 if not Terminates[s]: 852 # The symbol s does not terminate, 853 # so production p does not terminate. 854 p_terminates = 0 855 break 856 else: 857 # didn't break from the loop, 858 # so every symbol s terminates 859 # so production p terminates. 860 p_terminates = 1 861 862 if p_terminates: 863 # symbol n terminates! 864 if not Terminates[n]: 865 Terminates[n] = 1 866 some_change = 1 867 # Don't need to consider any more productions for this n. 868 break 869 870 if not some_change: 871 break 872 873 some_error = 0 874 for (s,terminates) in Terminates.items(): 875 if not terminates: 876 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 877 # s is used-but-not-defined, and we've already warned of that, 878 # so it would be overkill to say that it's also non-terminating. 879 pass 880 else: 881 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) 882 some_error = 1 883 884 return some_error
885 886 # ----------------------------------------------------------------------------- 887 # verify_productions() 888 # 889 # This function examines all of the supplied rules to see if they seem valid. 890 # -----------------------------------------------------------------------------
891 -def verify_productions(cycle_check=1):
892 error = 0 893 for p in Productions: 894 if not p: continue 895 896 for s in p.prod: 897 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 898 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) 899 error = 1 900 continue 901 902 unused_tok = 0 903 # Now verify all of the tokens 904 if yaccdebug: 905 _vf.write("Unused terminals:\n\n") 906 for s,v in Terminals.items(): 907 if s != 'error' and not v: 908 # sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) 909 if yaccdebug: _vf.write(" %s\n"% s) 910 unused_tok += 1 911 912 # Print out all of the productions 913 if yaccdebug: 914 _vf.write("\nGrammar\n\n") 915 for i in range(1,len(Productions)): 916 _vf.write("Rule %-5d %s\n" % (i, Productions[i])) 917 918 unused_prod = 0 919 # Verify the use of all productions 920 for s,v in Nonterminals.items(): 921 if not v: 922 p = Prodnames[s][0] 923 # sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) 924 unused_prod += 1 925 926 927 if unused_tok == 1: 928 sys.stderr.write("yacc: Warning. There is 1 unused token.\n") 929 if unused_tok > 1: 930 sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) 931 932 if unused_prod == 1: 933 pass 934 # sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") 935 if unused_prod > 1: 936 pass 937 # sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) 938 939 if yaccdebug: 940 _vf.write("\nTerminals, with rules where they appear\n\n") 941 ks = Terminals.keys() 942 ks.sort() 943 for k in ks: 944 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) 945 _vf.write("\nNonterminals, with rules where they appear\n\n") 946 ks = Nonterminals.keys() 947 ks.sort() 948 for k in ks: 949 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) 950 951 if (cycle_check): 952 compute_reachable() 953 error += compute_terminates() 954 # error += check_cycles() 955 return error
956 957 # ----------------------------------------------------------------------------- 958 # build_lritems() 959 # 960 # This function walks the list of productions and builds a complete set of the 961 # LR items. The LR items are stored in two ways: First, they are uniquely 962 # numbered and placed in the list _lritems. Second, a linked list of LR items 963 # is built for each production. For example: 964 # 965 # E -> E PLUS E 966 # 967 # Creates the list 968 # 969 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 970 # ----------------------------------------------------------------------------- 971
972 -def build_lritems():
973 for p in Productions: 974 lastlri = p 975 lri = p.lr_item(0) 976 i = 0 977 while 1: 978 lri = p.lr_item(i) 979 lastlri.lr_next = lri 980 if not lri: break 981 lri.lr_num = len(LRitems) 982 LRitems.append(lri) 983 lastlri = lri 984 i += 1
985 986 # In order for the rest of the parser generator to work, we need to 987 # guarantee that no more lritems are generated. Therefore, we nuke 988 # the p.lr_item method. (Only used in debugging) 989 # Production.lr_item = None 990 991 # ----------------------------------------------------------------------------- 992 # add_precedence() 993 # 994 # Given a list of precedence rules, add to the precedence table. 995 # ----------------------------------------------------------------------------- 996
997 -def add_precedence(plist):
998 plevel = 0 999 error = 0 1000 for p in plist: 1001 plevel += 1 1002 try: 1003 prec = p[0] 1004 terms = p[1:] 1005 if prec != 'left' and prec != 'right' and prec != 'nonassoc': 1006 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) 1007 return -1 1008 for t in terms: 1009 if Precedence.has_key(t): 1010 sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) 1011 error += 1 1012 continue 1013 Precedence[t] = (prec,plevel) 1014 except: 1015 sys.stderr.write("yacc: Invalid precedence table.\n") 1016 error += 1 1017 1018 return error
1019 1020 # ----------------------------------------------------------------------------- 1021 # augment_grammar() 1022 # 1023 # Compute the augmented grammar. This is just a rule S' -> start where start 1024 # is the starting symbol. 1025 # ----------------------------------------------------------------------------- 1026
1027 -def augment_grammar(start=None):
1028 if not start: 1029 start = Productions[1].name 1030 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) 1031 Productions[0].usyms = [ start ] 1032 Nonterminals[start].append(0)
1033 1034 1035 # ------------------------------------------------------------------------- 1036 # first() 1037 # 1038 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1039 # 1040 # During execution of compute_first1, the result may be incomplete. 1041 # Afterward (e.g., when called from compute_follow()), it will be complete. 1042 # -------------------------------------------------------------------------
1043 -def first(beta):
1044 1045 # We are computing First(x1,x2,x3,...,xn) 1046 result = [ ] 1047 for x in beta: 1048 x_produces_empty = 0 1049 1050 # Add all the non-<empty> symbols of First[x] to the result. 1051 for f in First[x]: 1052 if f == '<empty>': 1053 x_produces_empty = 1 1054 else: 1055 if f not in result: result.append(f) 1056 1057 if x_produces_empty: 1058 # We have to consider the next x in beta, 1059 # i.e. stay in the loop. 1060 pass 1061 else: 1062 # We don't have to consider any further symbols in beta. 1063 break 1064 else: 1065 # There was no 'break' from the loop, 1066 # so x_produces_empty was true for all x in beta, 1067 # so beta produces empty as well. 1068 result.append('<empty>') 1069 1070 return result
1071 1072 1073 # FOLLOW(x) 1074 # Given a non-terminal. This function computes the set of all symbols 1075 # that might follow it. Dragon book, p. 189. 1076
1077 -def compute_follow(start=None):
1078 # Add '$end' to the follow list of the start symbol 1079 for k in Nonterminals.keys(): 1080 Follow[k] = [ ] 1081 1082 if not start: 1083 start = Productions[1].name 1084 1085 Follow[start] = [ '$end' ] 1086 1087 while 1: 1088 didadd = 0 1089 for p in Productions[1:]: 1090 # Here is the production set 1091 for i in range(len(p.prod)): 1092 B = p.prod[i] 1093 if Nonterminals.has_key(B): 1094 # Okay. We got a non-terminal in a production 1095 fst = first(p.prod[i+1:]) 1096 hasempty = 0 1097 for f in fst: 1098 if f != '<empty>' and f not in Follow[B]: 1099 Follow[B].append(f) 1100 didadd = 1 1101 if f == '<empty>': 1102 hasempty = 1 1103 if hasempty or i == (len(p.prod)-1): 1104 # Add elements of follow(a) to follow(b) 1105 for f in Follow[p.name]: 1106 if f not in Follow[B]: 1107 Follow[B].append(f) 1108 didadd = 1 1109 if not didadd: break 1110 1111 if 0 and yaccdebug: 1112 _vf.write('\nFollow:\n') 1113 for k in Nonterminals.keys(): 1114 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1115 1116 # ------------------------------------------------------------------------- 1117 # compute_first1() 1118 # 1119 # Compute the value of FIRST1(X) for all symbols 1120 # -------------------------------------------------------------------------
1121 -def compute_first1():
1122 1123 # Terminals: 1124 for t in Terminals.keys(): 1125 First[t] = [t] 1126 1127 First['$end'] = ['$end'] 1128 First['#'] = ['#'] # what's this for? 1129 1130 # Nonterminals: 1131 1132 # Initialize to the empty set: 1133 for n in Nonterminals.keys(): 1134 First[n] = [] 1135 1136 # Then propagate symbols until no change: 1137 while 1: 1138 some_change = 0 1139 for n in Nonterminals.keys(): 1140 for p in Prodnames[n]: 1141 for f in first(p.prod): 1142 if f not in First[n]: 1143 First[n].append( f ) 1144 some_change = 1 1145 if not some_change: 1146 break 1147 1148 if 0 and yaccdebug: 1149 _vf.write('\nFirst:\n') 1150 for k in Nonterminals.keys(): 1151 _vf.write("%-20s : %s\n" % 1152 (k, " ".join([str(s) for s in First[k]])))
1153 1154 # ----------------------------------------------------------------------------- 1155 # === SLR Generation === 1156 # 1157 # The following functions are used to construct SLR (Simple LR) parsing tables 1158 # as described on p.221-229 of the dragon book. 1159 # ----------------------------------------------------------------------------- 1160 1161 # Global variables for the LR parsing engine
1162 -def lr_init_vars():
1163 global _lr_action, _lr_goto, _lr_method 1164 global _lr_goto_cache, _lr0_cidhash 1165 1166 _lr_action = { } # Action table 1167 _lr_goto = { } # Goto table 1168 _lr_method = "Unknown" # LR method used 1169 _lr_goto_cache = { } 1170 _lr0_cidhash = { }
1171 1172 1173 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1174 # prodlist is a list of productions. 1175 1176 _add_count = 0 # Counter used to detect cycles 1177
1178 -def lr0_closure(I):
1179 global _add_count 1180 1181 _add_count += 1 1182 prodlist = Productions 1183 1184 # Add everything in I to J 1185 J = I[:] 1186 didadd = 1 1187 while didadd: 1188 didadd = 0 1189 for j in J: 1190 for x in j.lrafter: 1191 if x.lr0_added == _add_count: continue 1192 # Add B --> .G to J 1193 J.append(x.lr_next) 1194 x.lr0_added = _add_count 1195 didadd = 1 1196 1197 return J
1198 1199 # Compute the LR(0) goto function goto(I,X) where I is a set 1200 # of LR(0) items and X is a grammar symbol. This function is written 1201 # in a way that guarantees uniqueness of the generated goto sets 1202 # (i.e. the same goto set will never be returned as two different Python 1203 # objects). With uniqueness, we can later do fast set comparisons using 1204 # id(obj) instead of element-wise comparison. 1205
1206 -def lr0_goto(I,x):
1207 # First we look for a previously cached entry 1208 g = _lr_goto_cache.get((id(I),x),None) 1209 if g: return g 1210 1211 # Now we generate the goto set in a way that guarantees uniqueness 1212 # of the result 1213 1214 s = _lr_goto_cache.get(x,None) 1215 if not s: 1216 s = { } 1217 _lr_goto_cache[x] = s 1218 1219 gs = [ ] 1220 for p in I: 1221 n = p.lr_next 1222 if n and n.lrbefore == x: 1223 s1 = s.get(id(n),None) 1224 if not s1: 1225 s1 = { } 1226 s[id(n)] = s1 1227 gs.append(n) 1228 s = s1 1229 g = s.get('$end',None) 1230 if not g: 1231 if gs: 1232 g = lr0_closure(gs) 1233 s['$end'] = g 1234 else: 1235 s['$end'] = gs 1236 _lr_goto_cache[(id(I),x)] = g 1237 return g
1238 1239 _lr0_cidhash = { } 1240 1241 # Compute the LR(0) sets of item function
1242 -def lr0_items():
1243 1244 C = [ lr0_closure([Productions[0].lr_next]) ] 1245 i = 0 1246 for I in C: 1247 _lr0_cidhash[id(I)] = i 1248 i += 1 1249 1250 # Loop over the items in C and each grammar symbols 1251 i = 0 1252 while i < len(C): 1253 I = C[i] 1254 i += 1 1255 1256 # Collect all of the symbols that could possibly be in the goto(I,X) sets 1257 asyms = { } 1258 for ii in I: 1259 for s in ii.usyms: 1260 asyms[s] = None 1261 1262 for x in asyms.keys(): 1263 g = lr0_goto(I,x) 1264 if not g: continue 1265 if _lr0_cidhash.has_key(id(g)): continue 1266 _lr0_cidhash[id(g)] = len(C) 1267 C.append(g) 1268 1269 return C
1270 1271 # ----------------------------------------------------------------------------- 1272 # ==== LALR(1) Parsing ==== 1273 # 1274 # LALR(1) parsing is almost exactly the same as SLR except that instead of 1275 # relying upon Follow() sets when performing reductions, a more selective 1276 # lookahead set that incorporates the state of the LR(0) machine is utilized. 1277 # Thus, we mainly just have to focus on calculating the lookahead sets. 1278 # 1279 # The method used here is due to DeRemer and Pennelo (1982). 1280 # 1281 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 1282 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 1283 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 1284 # 1285 # Further details can also be found in: 1286 # 1287 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 1288 # McGraw-Hill Book Company, (1985). 1289 # 1290 # Note: This implementation is a complete replacement of the LALR(1) 1291 # implementation in PLY-1.x releases. That version was based on 1292 # a less efficient algorithm and it had bugs in its implementation. 1293 # ----------------------------------------------------------------------------- 1294 1295 # ----------------------------------------------------------------------------- 1296 # compute_nullable_nonterminals() 1297 # 1298 # Creates a dictionary containing all of the non-terminals that might produce 1299 # an empty production. 1300 # ----------------------------------------------------------------------------- 1301
1302 -def compute_nullable_nonterminals():
1303 nullable = {} 1304 num_nullable = 0 1305 while 1: 1306 for p in Productions[1:]: 1307 if p.len == 0: 1308 nullable[p.name] = 1 1309 continue 1310 for t in p.prod: 1311 if not nullable.has_key(t): break 1312 else: 1313 nullable[p.name] = 1 1314 if len(nullable) == num_nullable: break 1315 num_nullable = len(nullable) 1316 return nullable
1317 1318 # ----------------------------------------------------------------------------- 1319 # find_nonterminal_trans(C) 1320 # 1321 # Given a set of LR(0) items, this functions finds all of the non-terminal 1322 # transitions. These are transitions in which a dot appears immediately before 1323 # a non-terminal. Returns a list of tuples of the form (state,N) where state 1324 # is the state number and N is the nonterminal symbol. 1325 # 1326 # The input C is the set of LR(0) items. 1327 # ----------------------------------------------------------------------------- 1328
1329 -def find_nonterminal_transitions(C):
1330 trans = [] 1331 for state in range(len(C)): 1332 for p in C[state]: 1333 if p.lr_index < p.len - 1: 1334 t = (state,p.prod[p.lr_index+1]) 1335 if Nonterminals.has_key(t[1]): 1336 if t not in trans: trans.append(t) 1337 state = state + 1 1338 return trans
1339 1340 # ----------------------------------------------------------------------------- 1341 # dr_relation() 1342 # 1343 # Computes the DR(p,A) relationships for non-terminal transitions. The input 1344 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 1345 # 1346 # Returns a list of terminals. 1347 # ----------------------------------------------------------------------------- 1348
1349 -def dr_relation(C,trans,nullable):
1350 dr_set = { } 1351 state,N = trans 1352 terms = [] 1353 1354 g = lr0_goto(C[state],N) 1355 for p in g: 1356 if p.lr_index < p.len - 1: 1357 a = p.prod[p.lr_index+1] 1358 if Terminals.has_key(a): 1359 if a not in terms: terms.append(a) 1360 1361 # This extra bit is to handle the start state 1362 if state == 0 and N == Productions[0].prod[0]: 1363 terms.append('$end') 1364 1365 return terms
1366 1367 # ----------------------------------------------------------------------------- 1368 # reads_relation() 1369 # 1370 # Computes the READS() relation (p,A) READS (t,C). 1371 # ----------------------------------------------------------------------------- 1372
1373 -def reads_relation(C, trans, empty):
1374 # Look for empty transitions 1375 rel = [] 1376 state, N = trans 1377 1378 g = lr0_goto(C[state],N) 1379 j = _lr0_cidhash.get(id(g),-1) 1380 for p in g: 1381 if p.lr_index < p.len - 1: 1382 a = p.prod[p.lr_index + 1] 1383 if empty.has_key(a): 1384 rel.append((j,a)) 1385 1386 return rel
1387 1388 # ----------------------------------------------------------------------------- 1389 # compute_lookback_includes() 1390 # 1391 # Determines the lookback and includes relations 1392 # 1393 # LOOKBACK: 1394 # 1395 # This relation is determined by running the LR(0) state machine forward. 1396 # For example, starting with a production "N : . A B C", we run it forward 1397 # to obtain "N : A B C ." We then build a relationship between this final 1398 # state and the starting state. These relationships are stored in a dictionary 1399 # lookdict. 1400 # 1401 # INCLUDES: 1402 # 1403 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 1404 # 1405 # This relation is used to determine non-terminal transitions that occur 1406 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 1407 # if the following holds: 1408 # 1409 # B -> LAT, where T -> epsilon and p' -L-> p 1410 # 1411 # L is essentially a prefix (which may be empty), T is a suffix that must be 1412 # able to derive an empty string. State p' must lead to state p with the string L. 1413 # 1414 # ----------------------------------------------------------------------------- 1415
1416 -def compute_lookback_includes(C,trans,nullable):
1417 1418 lookdict = {} # Dictionary of lookback relations 1419 includedict = {} # Dictionary of include relations 1420 1421 # Make a dictionary of non-terminal transitions 1422 dtrans = {} 1423 for t in trans: 1424 dtrans[t] = 1 1425 1426 # Loop over all transitions and compute lookbacks and includes 1427 for state,N in trans: 1428 lookb = [] 1429 includes = [] 1430 for p in C[state]: 1431 if p.name != N: continue 1432 1433 # Okay, we have a name match. We now follow the production all the way 1434 # through the state machine until we get the . on the right hand side 1435 1436 lr_index = p.lr_index 1437 j = state 1438 while lr_index < p.len - 1: 1439 lr_index = lr_index + 1 1440 t = p.prod[lr_index] 1441 1442 # Check to see if this symbol and state are a non-terminal transition 1443 if dtrans.has_key((j,t)): 1444 # Yes. Okay, there is some chance that this is an includes relation 1445 # the only way to know for certain is whether the rest of the 1446 # production derives empty 1447 1448 li = lr_index + 1 1449 while li < p.len: 1450 if Terminals.has_key(p.prod[li]): break # No forget it 1451 if not nullable.has_key(p.prod[li]): break 1452 li = li + 1 1453 else: 1454 # Appears to be a relation between (j,t) and (state,N) 1455 includes.append((j,t)) 1456 1457 g = lr0_goto(C[j],t) # Go to next set 1458 j = _lr0_cidhash.get(id(g),-1) # Go to next state 1459 1460 # When we get here, j is the final state, now we have to locate the production 1461 for r in C[j]: 1462 if r.name != p.name: continue 1463 if r.len != p.len: continue 1464 i = 0 1465 # This look is comparing a production ". A B C" with "A B C ." 1466 while i < r.lr_index: 1467 if r.prod[i] != p.prod[i+1]: break 1468 i = i + 1 1469 else: 1470 lookb.append((j,r)) 1471 for i in includes: 1472 if not includedict.has_key(i): includedict[i] = [] 1473 includedict[i].append((state,N)) 1474 lookdict[(state,N)] = lookb 1475 1476 return lookdict,includedict
1477 1478 # ----------------------------------------------------------------------------- 1479 # digraph() 1480 # traverse() 1481 # 1482 # The following two functions are used to compute set valued functions 1483 # of the form: 1484 # 1485 # F(x) = F'(x) U U{F(y) | x R y} 1486 # 1487 # This is used to compute the values of Read() sets as well as FOLLOW sets 1488 # in LALR(1) generation. 1489 # 1490 # Inputs: X - An input set 1491 # R - A relation 1492 # FP - Set-valued function 1493 # ------------------------------------------------------------------------------ 1494
1495 -def digraph(X,R,FP):
1496 N = { } 1497 for x in X: 1498 N[x] = 0 1499 stack = [] 1500 F = { } 1501 for x in X: 1502 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1503 return F
1504
1505 -def traverse(x,N,stack,F,X,R,FP):
1506 stack.append(x) 1507 d = len(stack) 1508 N[x] = d 1509 F[x] = FP(x) # F(X) <- F'(x) 1510 1511 rel = R(x) # Get y's related to x 1512 for y in rel: 1513 if N[y] == 0: 1514 traverse(y,N,stack,F,X,R,FP) 1515 N[x] = min(N[x],N[y]) 1516 for a in F.get(y,[]): 1517 if a not in F[x]: F[x].append(a) 1518 if N[x] == d: 1519 N[stack[-1]] = sys.maxint 1520 F[stack[-1]] = F[x] 1521 element = stack.pop() 1522 while element != x: 1523 N[stack[-1]] = sys.maxint 1524 F[stack[-1]] = F[x] 1525 element = stack.pop()
1526 1527 # ----------------------------------------------------------------------------- 1528 # compute_read_sets() 1529 # 1530 # Given a set of LR(0) items, this function computes the read sets. 1531 # 1532 # Inputs: C = Set of LR(0) items 1533 # ntrans = Set of nonterminal transitions 1534 # nullable = Set of empty transitions 1535 # 1536 # Returns a set containing the read sets 1537 # ----------------------------------------------------------------------------- 1538
1539 -def compute_read_sets(C, ntrans, nullable):
1540 FP = lambda x: dr_relation(C,x,nullable) 1541 R = lambda x: reads_relation(C,x,nullable) 1542 F = digraph(ntrans,R,FP) 1543 return F
1544 1545 # ----------------------------------------------------------------------------- 1546 # compute_follow_sets() 1547 # 1548 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 1549 # and an include set, this function computes the follow sets 1550 # 1551 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 1552 # 1553 # Inputs: 1554 # ntrans = Set of nonterminal transitions 1555 # readsets = Readset (previously computed) 1556 # inclsets = Include sets (previously computed) 1557 # 1558 # Returns a set containing the follow sets 1559 # ----------------------------------------------------------------------------- 1560
1561 -def compute_follow_sets(ntrans,readsets,inclsets):
1562 FP = lambda x: readsets[x] 1563 R = lambda x: inclsets.get(x,[]) 1564 F = digraph(ntrans,R,FP) 1565 return F
1566 1567 # ----------------------------------------------------------------------------- 1568 # add_lookaheads() 1569 # 1570 # Attaches the lookahead symbols to grammar rules. 1571 # 1572 # Inputs: lookbacks - Set of lookback relations 1573 # followset - Computed follow set 1574 # 1575 # This function directly attaches the lookaheads to productions contained 1576 # in the lookbacks set 1577 # ----------------------------------------------------------------------------- 1578
1579 -def add_lookaheads(lookbacks,followset):
1580 for trans,lb in lookbacks.items(): 1581 # Loop over productions in lookback 1582 for state,p in lb: 1583 if not p.lookaheads.has_key(state): 1584 p.lookaheads[state] = [] 1585 f = followset.get(trans,[]) 1586 for a in f: 1587 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
1588 1589 # ----------------------------------------------------------------------------- 1590 # add_lalr_lookaheads() 1591 # 1592 # This function does all of the work of adding lookahead information for use 1593 # with LALR parsing 1594 # ----------------------------------------------------------------------------- 1595
1596 -def add_lalr_lookaheads(C):
1597 # Determine all of the nullable nonterminals 1598 nullable = compute_nullable_nonterminals() 1599 1600 # Find all non-terminal transitions 1601 trans = find_nonterminal_transitions(C) 1602 1603 # Compute read sets 1604 readsets = compute_read_sets(C,trans,nullable) 1605 1606 # Compute lookback/includes relations 1607 lookd, included = compute_lookback_includes(C,trans,nullable) 1608 1609 # Compute LALR FOLLOW sets 1610 followsets = compute_follow_sets(trans,readsets,included) 1611 1612 # Add all of the lookaheads 1613 add_lookaheads(lookd,followsets)
1614 1615 # ----------------------------------------------------------------------------- 1616 # lr_parse_table() 1617 # 1618 # This function constructs the parse tables for SLR or LALR 1619 # -----------------------------------------------------------------------------
1620 -def lr_parse_table(method):
1621 global _lr_method 1622 goto = _lr_goto # Goto array 1623 action = _lr_action # Action array 1624 actionp = { } # Action production array (temporary) 1625 1626 _lr_method = method 1627 1628 n_srconflict = 0 1629 n_rrconflict = 0 1630 1631 if yaccdebug: 1632 sys.stderr.write("yacc: Generating %s parsing table...\n" % method) 1633 _vf.write("\n\nParsing method: %s\n\n" % method) 1634 1635 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 1636 # This determines the number of states 1637 1638 C = lr0_items() 1639 1640 if method == 'LALR': 1641 add_lalr_lookaheads(C) 1642 1643 # Build the parser table, state by state 1644 st = 0 1645 for I in C: 1646 # Loop over each production in I 1647 actlist = [ ] # List of actions 1648 1649 if yaccdebug: 1650 _vf.write("\nstate %d\n\n" % st) 1651 for p in I: 1652 _vf.write(" (%d) %s\n" % (p.number, str(p))) 1653 _vf.write("\n") 1654 1655 for p in I: 1656 try: 1657 if p.prod[-1] == ".": 1658 if p.name == "S'": 1659 # Start symbol. Accept! 1660 action[st,"$end"] = 0 1661 actionp[st,"$end"] = p 1662 else: 1663 # We are at the end of a production. Reduce! 1664 if method == 'LALR': 1665 laheads = p.lookaheads[st] 1666 else: 1667 laheads = Follow[p.name] 1668 for a in laheads: 1669 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 1670 r = action.get((st,a),None) 1671 if r is not None: 1672 # Whoa. Have a shift/reduce or reduce/reduce conflict 1673 if r > 0: 1674 # Need to decide on shift or reduce here 1675 # By default we favor shifting. Need to add 1676 # some precedence rules here. 1677 sprec,slevel = Productions[actionp[st,a].number].prec 1678 rprec,rlevel = Precedence.get(a,('right',0)) 1679 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 1680 # We really need to reduce here. 1681 action[st,a] = -p.number 1682 actionp[st,a] = p 1683 if not slevel and not rlevel: 1684 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1685 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1686 n_srconflict += 1 1687 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1688 action[st,a] = None 1689 else: 1690 # Hmmm. Guess we'll keep the shift 1691 if not rlevel: 1692 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1693 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1694 n_srconflict +=1 1695 elif r < 0: 1696 # Reduce/reduce conflict. In this case, we favor the rule 1697 # that was defined first in the grammar file 1698 oldp = Productions[-r] 1699 pp = Productions[p.number] 1700 if oldp.line > pp.line: 1701 action[st,a] = -p.number 1702 actionp[st,a] = p 1703 # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) 1704 n_rrconflict += 1 1705 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) 1706 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) 1707 else: 1708 sys.stderr.write("Unknown conflict in state %d\n" % st) 1709 else: 1710 action[st,a] = -p.number 1711 actionp[st,a] = p 1712 else: 1713 i = p.lr_index 1714 a = p.prod[i+1] # Get symbol right after the "." 1715 if Terminals.has_key(a): 1716 g = lr0_goto(I,a) 1717 j = _lr0_cidhash.get(id(g),-1) 1718 if j >= 0: 1719 # We are in a shift state 1720 actlist.append((a,p,"shift and go to state %d" % j)) 1721 r = action.get((st,a),None) 1722 if r is not None: 1723 # Whoa have a shift/reduce or shift/shift conflict 1724 if r > 0: 1725 if r != j: 1726 sys.stderr.write("Shift/shift conflict in state %d\n" % st) 1727 elif r < 0: 1728 # Do a precedence check. 1729 # - if precedence of reduce rule is higher, we reduce. 1730 # - if precedence of reduce is same and left assoc, we reduce. 1731 # - otherwise we shift 1732 rprec,rlevel = Productions[actionp[st,a].number].prec 1733 sprec,slevel = Precedence.get(a,('right',0)) 1734 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): 1735 # We decide to shift here... highest precedence to shift 1736 action[st,a] = j 1737 actionp[st,a] = p 1738 if not rlevel: 1739 n_srconflict += 1 1740 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1741 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1742 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1743 action[st,a] = None 1744 else: 1745 # Hmmm. Guess we'll keep the reduce 1746 if not slevel and not rlevel: 1747 n_srconflict +=1 1748 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1749 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1750 1751 else: 1752 sys.stderr.write("Unknown conflict in state %d\n" % st) 1753 else: 1754 action[st,a] = j 1755 actionp[st,a] = p 1756 1757 except StandardError,e: 1758 raise YaccError, "Hosed in lr_parse_table", e 1759 1760 # Print the actions associated with each terminal 1761 if yaccdebug: 1762 _actprint = { } 1763 for a,p,m in actlist: 1764 if action.has_key((st,a)): 1765 if p is actionp[st,a]: 1766 _vf.write(" %-15s %s\n" % (a,m)) 1767 _actprint[(a,m)] = 1 1768 _vf.write("\n") 1769 for a,p,m in actlist: 1770 if action.has_key((st,a)): 1771 if p is not actionp[st,a]: 1772 if not _actprint.has_key((a,m)): 1773 _vf.write(" ! %-15s [ %s ]\n" % (a,m)) 1774 _actprint[(a,m)] = 1 1775 1776 # Construct the goto table for this state 1777 if yaccdebug: 1778 _vf.write("\n") 1779 nkeys = { } 1780 for ii in I: 1781 for s in ii.usyms: 1782 if Nonterminals.has_key(s): 1783 nkeys[s] = None 1784 for n in nkeys.keys(): 1785 g = lr0_goto(I,n) 1786 j = _lr0_cidhash.get(id(g),-1) 1787 if j >= 0: 1788 goto[st,n] = j 1789 if yaccdebug: 1790 _vf.write(" %-30s shift and go to state %d\n" % (n,j)) 1791 1792 st += 1 1793 1794 if yaccdebug: 1795 if n_srconflict == 1: 1796 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) 1797 if n_srconflict > 1: 1798 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) 1799 if n_rrconflict == 1: 1800 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) 1801 if n_rrconflict > 1: 1802 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
1803 1804 # ----------------------------------------------------------------------------- 1805 # ==== LR Utility functions ==== 1806 # ----------------------------------------------------------------------------- 1807 1808 # ----------------------------------------------------------------------------- 1809 # _lr_write_tables() 1810 # 1811 # This function writes the LR parsing tables to a file 1812 # ----------------------------------------------------------------------------- 1813
1814 -def lr_write_tables(modulename=tab_module,outputdir=''):
1815 filename = os.path.join(outputdir,modulename) + ".py" 1816 try: 1817 f = open(filename,"w") 1818 1819 f.write(""" 1820 # %s 1821 # This file is automatically generated. Do not edit. 1822 1823 _lr_method = %s 1824 1825 _lr_signature = %s 1826 """ % (filename, repr(_lr_method), repr(Signature.digest()))) 1827 1828 # Change smaller to 0 to go back to original tables 1829 smaller = 1 1830 1831 # Factor out names to try and make smaller 1832 if smaller: 1833 items = { } 1834 1835 for k,v in _lr_action.items(): 1836 i = items.get(k[1]) 1837 if not i: 1838 i = ([],[]) 1839 items[k[1]] = i 1840 i[0].append(k[0]) 1841 i[1].append(v) 1842 1843 f.write("\n_lr_action_items = {") 1844 for k,v in items.items(): 1845 f.write("%r:([" % k) 1846 for i in v[0]: 1847 f.write("%r," % i) 1848 f.write("],[") 1849 for i in v[1]: 1850 f.write("%r," % i) 1851 1852 f.write("]),") 1853 f.write("}\n") 1854 1855 f.write(""" 1856 _lr_action = { } 1857 for _k, _v in _lr_action_items.items(): 1858 for _x,_y in zip(_v[0],_v[1]): 1859 _lr_action[(_x,_k)] = _y 1860 del _lr_action_items 1861 """) 1862 1863 else: 1864 f.write("\n_lr_action = { "); 1865 for k,v in _lr_action.items(): 1866 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1867 f.write("}\n"); 1868 1869 if smaller: 1870 # Factor out names to try and make smaller 1871 items = { } 1872 1873 for k,v in _lr_goto.items(): 1874 i = items.get(k[1]) 1875 if not i: 1876 i = ([],[]) 1877 items[k[1]] = i 1878 i[0].append(k[0]) 1879 i[1].append(v) 1880 1881 f.write("\n_lr_goto_items = {") 1882 for k,v in items.items(): 1883 f.write("%r:([" % k) 1884 for i in v[0]: 1885 f.write("%r," % i) 1886 f.write("],[") 1887 for i in v[1]: 1888 f.write("%r," % i) 1889 1890 f.write("]),") 1891 f.write("}\n") 1892 1893 f.write(""" 1894 _lr_goto = { } 1895 for _k, _v in _lr_goto_items.items(): 1896 for _x,_y in zip(_v[0],_v[1]): 1897 _lr_goto[(_x,_k)] = _y 1898 del _lr_goto_items 1899 """) 1900 else: 1901 f.write("\n_lr_goto = { "); 1902 for k,v in _lr_goto.items(): 1903 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1904 f.write("}\n"); 1905 1906 # Write production table 1907 f.write("_lr_productions = [\n") 1908 for p in Productions: 1909 if p: 1910 if (p.func): 1911 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) 1912 else: 1913 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) 1914 else: 1915 f.write(" None,\n") 1916 f.write("]\n") 1917 1918 f.close() 1919 1920 except IOError,e: 1921 print "Unable to create '%s'" % filename 1922 print e 1923 return
1924
1925 -def lr_read_tables(module=tab_module,optimize=0):
1926 global _lr_action, _lr_goto, _lr_productions, _lr_method 1927 try: 1928 exec "import %s as parsetab" % module 1929 1930 if (optimize) or (Signature.digest() == parsetab._lr_signature): 1931 _lr_action = parsetab._lr_action 1932 _lr_goto = parsetab._lr_goto 1933 _lr_productions = parsetab._lr_productions 1934 _lr_method = parsetab._lr_method 1935 return 1 1936 else: 1937 return 0 1938 1939 except (ImportError,AttributeError): 1940 return 0
1941 1942 1943 # Available instance types. This is used when parsers are defined by a class. 1944 # it's a little funky because I want to preserve backwards compatibility 1945 # with Python 2.0 where types.ObjectType is undefined. 1946 1947 try: 1948 _INSTANCETYPE = (types.InstanceType, types.ObjectType) 1949 except AttributeError: 1950 _INSTANCETYPE = types.InstanceType 1951 1952 # ----------------------------------------------------------------------------- 1953 # yacc(module) 1954 # 1955 # Build the parser module 1956 # ----------------------------------------------------------------------------- 1957
1958 -def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
1959 global yaccdebug 1960 yaccdebug = debug 1961 1962 initialize_vars() 1963 files = { } 1964 error = 0 1965 1966 1967 # Add parsing method to signature 1968 Signature.update(method) 1969 1970 # If a "module" parameter was supplied, extract its dictionary. 1971 # Note: a module may in fact be an instance as well. 1972 1973 if module: 1974 # User supplied a module object. 1975 if isinstance(module, types.ModuleType): 1976 ldict = module.__dict__ 1977 elif isinstance(module, _INSTANCETYPE): 1978 _items = [(k,getattr(module,k)) for k in dir(module)] 1979 ldict = { } 1980 for i in _items: 1981 ldict[i[0]] = i[1] 1982 else: 1983 raise ValueError,"Expected a module" 1984 1985 else: 1986 # No module given. We might be able to get information from the caller. 1987 # Throw an exception and unwind the traceback to get the globals 1988 1989 try: 1990 raise RuntimeError 1991 except RuntimeError: 1992 e,b,t = sys.exc_info() 1993 f = t.tb_frame 1994 f = f.f_back # Walk out to our calling function 1995 ldict = f.f_globals # Grab its globals dictionary 1996 1997 # Add starting symbol to signature 1998 if not start: 1999 start = ldict.get("start",None) 2000 if start: 2001 Signature.update(start) 2002 2003 # If running in optimized mode. We're going to 2004 2005 if (optimize and lr_read_tables(tabmodule,1)): 2006 # Read parse table 2007 del Productions[:] 2008 for p in _lr_productions: 2009 if not p: 2010 Productions.append(None) 2011 else: 2012 m = MiniProduction() 2013 m.name = p[0] 2014 m.len = p[1] 2015 m.file = p[3] 2016 m.line = p[4] 2017 if p[2]: 2018 m.func = ldict[p[2]] 2019 Productions.append(m) 2020 2021 else: 2022 # Get the tokens map 2023 if (module and isinstance(module,_INSTANCETYPE)): 2024 tokens = getattr(module,"tokens",None) 2025 else: 2026 tokens = ldict.get("tokens",None) 2027 2028 if not tokens: 2029 raise YaccError,"module does not define a list 'tokens'" 2030 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): 2031 raise YaccError,"tokens must be a list or tuple." 2032 2033 # Check to see if a requires dictionary is defined. 2034 requires = ldict.get("require",None) 2035 if requires: 2036 if not (isinstance(requires,types.DictType)): 2037 raise YaccError,"require must be a dictionary." 2038 2039 for r,v in requires.items(): 2040 try: 2041 if not (isinstance(v,types.ListType)): 2042 raise TypeError 2043 v1 = [x.split(".") for x in v] 2044 Requires[r] = v1 2045 except StandardError: 2046 print "Invalid specification for rule '%s' in require. Expected a list of strings" % r 2047 2048 2049 # Build the dictionary of terminals. We a record a 0 in the 2050 # dictionary to track whether or not a terminal is actually 2051 # used in the grammar 2052 2053 if 'error' in tokens: 2054 print "yacc: Illegal token 'error'. Is a reserved word." 2055 raise YaccError,"Illegal token name" 2056 2057 for n in tokens: 2058 if Terminals.has_key(n): 2059 print "yacc: Warning. Token '%s' multiply defined." % n 2060 Terminals[n] = [ ] 2061 2062 Terminals['error'] = [ ] 2063 2064 # Get the precedence map (if any) 2065 prec = ldict.get("precedence",None) 2066 if prec: 2067 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): 2068 raise YaccError,"precedence must be a list or tuple." 2069 add_precedence(prec) 2070 Signature.update(repr(prec)) 2071 2072 for n in tokens: 2073 if not Precedence.has_key(n): 2074 Precedence[n] = ('right',0) # Default, right associative, 0 precedence 2075 2076 # Look for error handler 2077 ef = ldict.get('p_error',None) 2078 if ef: 2079 if isinstance(ef,types.FunctionType): 2080 ismethod = 0 2081 elif isinstance(ef, types.MethodType): 2082 ismethod = 1 2083 else: 2084 raise YaccError,"'p_error' defined, but is not a function or method." 2085 eline = ef.func_code.co_firstlineno 2086 efile = ef.func_code.co_filename 2087 files[efile] = None 2088 2089 if (ef.func_code.co_argcount != 1+ismethod): 2090 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) 2091 global Errorfunc 2092 Errorfunc = ef 2093 else: 2094 print "yacc: Warning. no p_error() function is defined." 2095 2096 # Get the list of built-in functions with p_ prefix 2097 symbols = [ldict[f] for f in ldict.keys() 2098 if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' 2099 and ldict[f].__name__ != 'p_error')] 2100 2101 # Check for non-empty symbols 2102 if len(symbols) == 0: 2103 raise YaccError,"no rules of the form p_rulename are defined." 2104 2105 # Sort the symbols by line number 2106 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) 2107 2108 # Add all of the symbols to the grammar 2109 for f in symbols: 2110 if (add_function(f)) < 0: 2111 error += 1 2112 else: 2113 files[f.func_code.co_filename] = None 2114 2115 # Make a signature of the docstrings 2116 for f in symbols: 2117 if f.__doc__: 2118 Signature.update(f.__doc__) 2119 2120 lr_init_vars() 2121 2122 if error: 2123 raise YaccError,"Unable to construct parser." 2124 2125 if not lr_read_tables(tabmodule): 2126 2127 # Validate files 2128 for filename in files.keys(): 2129 if not validate_file(filename): 2130 error = 1 2131 2132 # Validate dictionary 2133 validate_dict(ldict) 2134 2135 if start and not Prodnames.has_key(start): 2136 raise YaccError,"Bad starting symbol '%s'" % start 2137 2138 augment_grammar(start) 2139 error = verify_productions(cycle_check=check_recursion) 2140 otherfunc = [ldict[f] for f in ldict.keys() 2141 if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] 2142 2143 if error: 2144 raise YaccError,"Unable to construct parser." 2145 2146 build_lritems() 2147 compute_first1() 2148 compute_follow(start) 2149 2150 if method in ['SLR','LALR']: 2151 lr_parse_table(method) 2152 else: 2153 raise YaccError, "Unknown parsing method '%s'" % method 2154 2155 if write_tables: 2156 lr_write_tables(tabmodule,outputdir) 2157 2158 if yaccdebug: 2159 try: 2160 f = open(os.path.join(outputdir,debugfile),"w") 2161 f.write(_vfc.getvalue()) 2162 f.write("\n\n") 2163 f.write(_vf.getvalue()) 2164 f.close() 2165 except IOError,e: 2166 print "yacc: can't create '%s'" % debugfile,e 2167 2168 # Made it here. Create a parser object and set up its internal state. 2169 # Set global parse() method to bound method of parser object. 2170 2171 p = Parser("xyzzy") 2172 p.productions = Productions 2173 p.errorfunc = Errorfunc 2174 p.action = _lr_action 2175 p.goto = _lr_goto 2176 p.method = _lr_method 2177 p.require = Requires 2178 2179 global parse 2180 parse = p.parse 2181 2182 global parser 2183 parser = p 2184 2185 # Clean up all of the globals we created 2186 if (not optimize): 2187 yacc_cleanup() 2188 return p
2189 2190 # yacc_cleanup function. Delete all of the global variables 2191 # used during table construction 2192
2193 -def yacc_cleanup():
2194 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache 2195 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache 2196 2197 global Productions, Prodnames, Prodmap, Terminals 2198 global Nonterminals, First, Follow, Precedence, LRitems 2199 global Errorfunc, Signature, Requires 2200 2201 del Productions, Prodnames, Prodmap, Terminals 2202 del Nonterminals, First, Follow, Precedence, LRitems 2203 del Errorfunc, Signature, Requires 2204 2205 global _vf, _vfc 2206 del _vf, _vfc
2207 2208 2209 # Stub that raises an error if parsing is attempted without first calling yacc()
2210 -def parse(*args,**kwargs):
2211 raise YaccError, "yacc: No parser built with yacc()"
2212