--- /dev/null
+import socket, types, copy, bisect, re
+
+class Cidr:
+ """A class representing a CIDRized IPv4 network value.
+
+ Specifically, it is representing contiguous IPv4 network blocks
+ that can be expressed as a ip-address/network length pair."""
+
+ # FIXME: we should probably actually make this class immutable and
+ # add methods that generate copies of this class with different
+ # netlens or whatever.
+
+ ip4addr_re = re.compile("^\d{1,3}(\.\d{1,3}){0,3}(/\d{1,2})?$")
+
+ def __init__(self, address, netlen = -1):
+ """This takes either a formatted string in CIDR notation:
+ (e.g., "127.0.0.1/32"), A tuple consisting of an formatting
+ string IPv4 address and a numeric network length, or the same
+ as two arguments."""
+
+ if not Cidr.ip4addr_re.search(address):
+ raise ValueError, repr(address) + \
+ " is not a valid CIDR representation"
+
+ if netlen < 0:
+ if type(address) == types.StringType:
+ if "/" in address:
+ self.addr, self.netlen = address.split("/", 1)
+ else:
+ self.addr, self.netlen = address, 32
+ elif type(address) == types.TupleType:
+ self.addr, self.netlen = address
+ else:
+ raise TypeError, "address must be a string or a tuple"
+ else:
+ self.addr = address
+ self.netlen = netlen
+
+ # convert string network lengths to integer
+ if type(self.netlen) == types.StringType:
+ self.netlen = int(self.netlen)
+
+ self.calc()
+
+ def __cmp__(self, other):
+ """One CIDR network block is less than another if the start
+ address is numerically less or if the block is larger. That
+ is, supernets will sort before subnets. This ordering allows
+ for an effienct search for subnets of a given network."""
+
+ # FIXME: have to convert to longs to overcome signedness problems.
+ # There is probably a better way to do this.
+ res = (self.numaddr & 0xFFFFFFFFL) - (other.numaddr & 0xFFFFFFFFL)
+ if (res < 0 ): return -1
+ if (res > 0): return 1
+ res = self.netlen - other.netlen
+ return res
+
+ def __str__(self):
+ return self.addr + "/" + str(self.netlen)
+
+ def __repr__(self):
+ return "<" + str(self) + ">"
+
+ def calc(self):
+ """This method should be called after any change to the main
+ internal state: netlen or numaddr."""
+
+ # make sure the network length is valid
+ if self.netlen > 32 or self.netlen < 0:
+ raise TypeError, "network length must be between 0 and 32"
+
+ # convert the string ipv4 address to a 32bit number
+ self.numaddr = self._convert_ip4str(self.addr)
+ # calculate our netmask
+ self.mask = self._mask(self.netlen)
+ # force the cidr address into correct masked notation
+ self.numaddr &= self.mask
+
+ # convert the number back to a string to normalize the string
+ self.addr = self._convert_ip4addr(self.numaddr)
+
+ def is_supernet(self, other):
+ """returns True if the other Cidr object is a supernet (an
+ enclosing network block) of this one. A Cidr object is a
+ supernet of itself."""
+ return other.numaddr & self.mask == self.numaddr
+
+ def is_subnet(self, other):
+ """returns True if the other Cidr object is a subnet (an
+ enclosednetwork block) of this one. A Cidr object is a
+ subnet of itself."""
+ return self.numaddr & other.mask == other.numaddr
+
+ def netmask(self):
+ """return the netmask of this Cidr network"""
+ return self._convert_ip4addr(self.mask)
+
+ def length(self):
+ """return the length (in number of addresses) of this network block"""
+ return 1 << (32 - self.netlen);
+
+ def end(self):
+ """return the last IP address in this network block"""
+ return self._convert_ip4addr(self.numaddr + self.length() - 1)
+
+ def _convert_ip4str(self, addr):
+ p = 3; a = 0
+ for octet in addr.split(".", 3):
+ o = int(octet);
+ if (o & 0xFF != o):
+ raise SyntaxWarning, "octet " + str(o) + " isn't in range"
+ a |= o << (p * 8)
+ p -= 1
+ return a
+
+ def _convert_ip4addr(self, numaddr):
+ res = str((numaddr & 0xFF000000) >> 24 & 0xFF) + "." + \
+ str((numaddr & 0x00FF0000) >> 16) + "." + \
+ str((numaddr & 0x0000FF00) >> 8) + "." + \
+ str(numaddr & 0x000000FF)
+ return res
+
+ def _mask(self, len):
+ return 0xFFFFFFFF << (32 - len)
+
+ def clone(self):
+ # we can get away with a shallow copy (so far)
+ return copy.copy(self)
+
+
+def valid_cidr(address):
+ """Returns the converted Cidr object if 'address' is valid
+ CIDR notation, False if not. For the purposes of this module,
+ valid CIDR notation consists of 1 to 4 octets separated by '.'
+ characters, with an optional trailing "/netlen"."""
+
+ if isinstance(address, Cidr): return address
+ try:
+ c = Cidr(address)
+ return c
+ except:
+ return False
+
+
+# test driver
+if __name__ == "__main__":
+ a = Cidr("127.00.000.1/24")
+ b = Cidr("127.0.0.1", 32)
+ c = Cidr("24.232.119.192", 26)
+ d = Cidr("24.232.119.0", 24)
+ e = Cidr(("24.224.0.0", 11))
+ f = Cidr("216.168.111.0/27");
+ g = Cidr("127.0.0.2/31");
+ h = Cidr("127.0.0.16/32")
+ print f.addr
+
+ try:
+ bad = Cidr("24.261.119.0", 32)
+ except Warning, x:
+ print "warning:", x
+
+ print "cidr:", a, "num addresses:", a.length(), "ending address", \
+ a.end(), "netmask", a.netmask()
+
+ clist = [a, b, c, d, e, f, g, h]
+ print "unsorted list of cidr objects:\n ", clist
+
+ clist.sort()
+ print "sorted list of cidr object:\n ", clist
--- /dev/null
+import re
+import Rwhois, config
+
+class DirectiveProcessor:
+
+ rwhois_dir_exp = re.compile(r"V-(\d+\.\d+)", re.I)
+
+ def __init__(self, db):
+ self.db = db
+ self.directives = {
+ "rwhois" : self.rwhois_directive,
+ "limit" : self.limit_directive,
+ "holdconnect" : self.hold_directive,
+ "directive" : self.directive_directive,
+ "xfer" : self.xfer_directive
+ }
+
+ def process_directive(self, session, line):
+ d_args = line.lstrip("-").split()
+
+ if not self.directives.has_key(d_args[0]):
+ session.wfile.write(Rwhois.error_message(400))
+ return
+
+ self.directives[d_args[0]](session, d_args[1:])
+
+
+ def rwhois_directive(self, session, arglist):
+ if not arglist:
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ mo = DirectiveProcessor.rwhois_dir_exp.match(arglist[0])
+ if not mo:
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ # normally we would make sure that the version given was
+ # sufficiently great.
+
+ session.wfile.write(config.banner_string)
+ session.wfile.write("\r\n")
+
+ def limit_directive(self, session, arglist):
+ try:
+ limit = int(arglist[0])
+ except (IndexError, ValueError):
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ if limit > config.max_limit:
+ limit = config.max_limit
+ elif limit < config.min_limit:
+ limit = config.min_limit
+ session.limit = limit
+
+ session.wfile.write(Rwhois.ok())
+
+ def hold_directive(self, session, arglist):
+ if not arglist:
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ arg = arglist[0].lower()
+ if arg == "on":
+ session.holdconnect = True
+ elif arg == "off":
+ session.holdconnect = False
+ else:
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ session.wfile.write(Rwhois.ok())
+
+ def directive_directive(self, session, arglist):
+ if not arglist:
+ reslist = []
+ dirs = self.directives.keys()
+ dirs.sort()
+ for dir in dirs:
+ desc = dir.capitalize()
+ reslist.append("%%directive directive:%s" % dir)
+ reslist.append("%%directive description:%s directive" % desc)
+ session.wfile.write("\r\n".join(reslist))
+ session.wfile.write("\r\n")
+ session.wfile.write(Rwhois.ok())
+ return
+ if self.directives.has_key(arglist[0]):
+ dir = arglist[0]
+ desc = dir.capitalize()
+ session.wfile.write("%%directive directive:%s\r\n" % dir)
+ session.wfile.write("%%directive description:%s directive\r\n"
+ % desc)
+ else:
+ session.wfile.write(Rwhois.error_message(400))
+ return
+ session.wfile.write(Rwhois.ok())
+
+
+ def xfer_directive(self, session, arglist):
+ if not arglist:
+ session.wfile.write(Rwhois.error_message(338))
+ return
+
+ aa = arglist[0].lower()
+
+ oc = None
+ attr_list = []
+ for arg in arglist[1:]:
+ if arg.startswith("class="):
+ oc = arg[6:].lower()
+ elif arg.startswith("attribute="):
+ attr = arg[10:].lower()
+ if attr: attr_list.append(attr)
+
+ # check the constraints
+ if not self.db.is_autharea(aa):
+ session.wfile.write(Rwhois.error_message((340, aa)))
+ return
+ if oc and not self.db.is_objectclass(oc):
+ session.wfile.write(Rwhois.error_message((341, oc)))
+ return
+
+ for attr in attr_list:
+ if not self.db.is_attribute(attr):
+ session.wfile.write(Rwhois.error_message((342, attr)))
+ return
+
+ # now iterate over the entire dataset looking for objects that
+ # match our criteria.
+
+ objs = self.db.object_iterator()
+
+ for obj in objs:
+ # Note: in theory, we should leverage QueryProcessors
+ # filtering code.
+ if obj.get_attr_value("auth-area").lower() != aa:
+ continue
+ if oc and obj.get_attr_value("class-name").lower() != oc:
+ continue
+
+ if attr_list:
+ session.wfile.write(obj.attrs_to_wire_str(attr_list, "%xfer "))
+ else:
+ session.wfile.write(obj.to_wire_str("%xfer "))
+ session.wfile.write("\r\n%xfer \r\n");
+
+ session.wfile.write(Rwhois.ok())
+
+
+if __name__ == '__main__':
+
+ import sys
+ import MemDB
+
+ session = Session.Context()
+ session.wfile = sys.stdout
+
+ db = MemDB.MemDB()
+
+ db.init_schema(sys.argv[1])
+ for data_file in sys.argv[2:]:
+ db.load_data(data_file)
+ db.index_data()
+
+ dp = DirectiveProcessor(db)
+
+ directives = [ "-rwhois",
+ "-rwhois foo bar baz",
+ "-rwhois V-1.6 noise blah",
+ "-limit",
+ "-limit a",
+ "-limit 20",
+ "-holdconnect",
+ "-holdconnect on",
+ "-holdconnect foo",
+ "-directive",
+ "-directive limit",
+ "-directive foo",
+ "-xfer",
+ "-xfer a.com",
+ "-xfer a.com class=contact",
+ "-xfer a.com class=domain attribute=domain-name",
+ "-xfer foo class=bar",
+ "-xfer foo class=bar attribute=baz attribute=boo",
+ "-foo baz bar" ]
+
+ for dir in directives:
+ print "trying %r:" % dir
+ print dp.process_directive(session, dir)
+
--- /dev/null
+import bisect, types
+import MemIndex, Cidr
+from Rwhois import rwhoisobject
+
+class MemDB:
+
+ def __init__(self):
+
+ # a dictonary holding the various attribute indexes. The keys
+ # are lowercase attribute names, values are MemIndex or
+ # CidrMemIndex objects.
+ self.indexes = {}
+
+ # a dictonary holding the actual rwhoisobjects. keys are
+ # string IDs, values are rwhoisobject instances.
+ self.main_index = {}
+
+ # dictonary holding all of the seen attributes. keys are
+ # lowercase attribute names, value is a character indicating
+ # the index type (if indexed), or None if not indexed. Index
+ # type characters a 'N' for normal string index, 'C' for CIDR
+ # index.
+ self.attrs = {}
+
+ # Lists containing attribute names that have indexes by type.
+ # This exists so unconstrained searches can just iterate over
+ # them.
+ self.normal_indexes = []
+ self.cidr_indexes = []
+
+ # dictonary holding all of the seen class names. keys are
+ # lowercase classnames, value is always None.
+ self.classes = {}
+
+ # dictionary holding all of the seen auth-areas. keys are
+ # lowercase authority area names, value is always None.
+ self.authareas = {}
+
+ def init_schema(self, schema_file):
+ """Initialize the schema from a schema file. Currently the
+ schema file is a list of 'attribute_name = index_type' pairs,
+ one per line. index_type is one of N or C, where N means a
+ normal string index, and C means a CIDR index.
+
+ It should be noted that this database implementation
+ implements a global namespace for attributes, which isn't
+ really correct according to RFC 2167. RFC 2167 dictates that
+ different authority area are actually autonomous and thus have
+ separate schemas."""
+
+ # initialize base schema
+
+ self.attrs['id'] = "N"
+ self.attrs['auth-area'] = None
+ self.attrs['class-name'] = None
+ self.attrs['updated'] = None
+ self.attrs['referred-auth-area'] = "R"
+
+ sf = open(schema_file, "r")
+
+ for line in sf.xreadlines():
+ line = line.strip()
+ if not line or line.startswith("#"): continue
+
+ attr, it = line.split("=")
+ self.attrs[attr.strip().lower()] = it.strip()[0].upper()
+
+ for attr, index_type in self.attrs.items():
+ if index_type == "N":
+ # normal index
+ self.indexes[attr] = MemIndex.MemIndex()
+ self.normal_indexes.append(attr)
+ elif index_type == "A":
+ # "all" index -- both a normal and a cidr index
+ self.indexes[attr] = MemIndex.ComboMemIndex()
+ self.normal_indexes.append(attr)
+ self.cidr_indexes.append(attr)
+ elif index_type == "R":
+ # referral index, an all index that must be searched
+ # explictly by attribute
+ self.indexes[attr] = MemIndex.ComboMemIndex()
+ elif index_type == "C":
+ # a cidr index
+ self.indexes[attr] = MemIndex.CidrMemIndex()
+ self.cidr_indexes.append(attr)
+ return
+
+ def add_object(self, obj):
+ """Add an rwhoisobject to the raw indexes, including the
+ master index."""
+
+ # add the object to the main index
+ id = obj.getid()
+ if not id: return
+ id = id.lower()
+
+ self.main_index[id] = obj
+
+ for a,v in obj.items():
+ # note the attribute.
+ index_type = self.attrs.setdefault(a, None)
+ v = v.lower()
+ # make sure that we note the auth-area and class
+ if a == 'auth-area':
+ self.authareas.setdefault(v, None)
+ elif a == 'class-name':
+ self.classes.setdefault(v, None)
+
+ if index_type:
+ index = self.indexes[a]
+ index.add(v, id)
+
+ def load_data(self, data_file):
+ """Load data from rwhoisd-style TXT files (i.e., attr:value,
+ records separated with a "---" bare line)."""
+
+ df = open(data_file, "r")
+ obj = rwhoisobject()
+
+ for line in df.xreadlines():
+ line = line.strip()
+ if line.startswith("#"): continue
+ if not line or line.startswith("---"):
+ # we've reached the end of an object, so index it.
+ self.add_object(obj)
+ # reset obj
+ obj = rwhoisobject()
+ continue
+
+ a, v = line.split(":", 1)
+ obj.add_attr(a, v.lstrip())
+
+ self.add_object(obj)
+ return
+
+ def index_data(self):
+ """Prepare the indexes for searching. Currently, this isn't
+ strictly necessary (the indexes will prepare themselves when
+ necessary), but it should elminate a penalty on initial
+ searches"""
+
+ for i in self.indexes.values():
+ i.prepare()
+ return
+
+ def is_attribute(self, attr):
+ return self.attrs.has_key(attr.lower())
+
+ def is_indexed_attr(self, attr):
+ if self.is_attribute(attr):
+ return self.attrs[attr.lower()]
+ return False
+
+ def is_objectclass(self, objectclass):
+ return self.classes.has_key(objectclass.lower())
+
+ def is_autharea(self, aa):
+ return self.authareas.has_key(aa.lower())
+
+ def fetch_objects(self, id_list):
+ return [ self.main_index[x] for x in id_list
+ if self.main_index.has_key(x) ]
+
+ def search_attr(self, attr, value, max = 0):
+
+ """Search for a value in a particular attribute's index. If
+ the attribute is cidr indexed, an attempt to convert value
+ into a Cidr object will be made. Returns a list of object ids
+ (or an empty list if nothing was found)"""
+
+ attr = attr.lower()
+ index_type = self.attrs.get(attr)
+ index = self.indexes.get(attr)
+ if not index: return []
+
+ super_prefix_match = False
+ if value.endswith("**"):
+ super_prefix_match = True
+
+ prefix_match = False
+ if value.endswith("*"):
+ value = value.rstrip("*")
+ prefix_match = True
+
+ if index_type == 'C' and not isinstance(value, Cidr.Cidr):
+ value = Cidr.valid_cidr(value)
+ else:
+ value = value.strip().lower()
+
+ if index_type == 'C' and super_prefix_match:
+ return index.find_subnets(value, max)
+
+ res = index.find(value, prefix_match, max)
+ return IndexResult(res)
+
+ def search_normal(self, value, max = 0):
+ """Search for a value in the 'normal' (string keyed) indexes.
+ Returns a list of object ids, or an empty list if nothing was
+ found."""
+
+ res = IndexResult()
+
+ for attr in self.normal_indexes:
+ res.extend(self.search_attr(attr, value, max))
+ if max:
+ if len(res) >= max:
+ res.truncate(max)
+ return res
+ return res
+
+ def search_cidr(self, value, max = 0):
+ """Search for a value in the cidr indexes. Returns a list of
+ object ids, or an empty list if nothing was found."""
+
+ res = IndexResult()
+ for attr in self.cidr_indexes:
+ res.extend(self.search_attr(attr, value, max))
+ if max:
+ if len(res) >= max:
+ res.truncate(max)
+ return res
+ return res
+
+ def search_referral(self, value, max = 0):
+ """Given a heirarchal value, search for referrals. Returns a
+ list of object ids or an empty list."""
+
+ return self.search_attr("referred-auth-area", value, max)
+
+ def object_iterator(self):
+ return self.main_index.itervalues()
+
+class IndexResult:
+ def __init__(self, list=None):
+ if not list: list = []
+ self.data = list
+ self._dict = dict(zip(self.data, self.data))
+
+ def extend(self, list):
+ if isinstance(list, type(self)):
+ list = list.list()
+ new_els = [ x for x in list if not self._dict.has_key(x) ]
+ self.data.extend(new_els)
+ self._dict.update(dict(zip(new_els, new_els)))
+
+ def list(self):
+ return self.data
+
+ def truncate(self, n=0):
+ to_del = self.data[n:]
+ for i in to_del: del self._dict[i]
+ self.data = self.data[:n]
+
+
+# test driver
+if __name__ == "__main__":
+ import sys
+ db = MemDB()
+
+ print "loading schema:", sys.argv[1]
+ db.init_schema(sys.argv[1])
+ for data_file in sys.argv[2:]:
+ print "loading data file:", data_file
+ db.load_data(data_file)
+ db.index_data()
+
+ print "Schema: authority areas"
+ for a in db.authareas.keys():
+ print " %s" % a
+ print "Schema: classes"
+ for c in db.classes.keys():
+ print " %s" % c
+ print "Schema: attributes"
+ for a in db.attrs.keys():
+ print " %s" % a
+
+ print "Is 'Network' a class?", db.is_objectclass("Network")
+
+# for k, v in db.main_index.items():
+# print "main_index[", k, "]:", v
+
+ print "searching for a.com"
+ res = db.search_attr("domain-name", "a.com")
+ print res.list()
+ print [ str(x) for x in db.fetch_objects(res.list()) ]
+
+ print "searching for doe"
+ res = db.search_normal("doe")
+ print res.list()
+ print [ str(x) for x in db.fetch_objects(res.list()) ]
+
+ print "searching for 10.0.0.2"
+ res = db.search_cidr("10.0.0.2")
+ print res.list()
+ print [ str(x) for x in db.fetch_objects(res.list()) ]
+
+ print "searching for fddi.a.com"
+ res = db.search_normal("fddi.a.com")
+ print res.list()
+
+ print "searching referral index for fddi.a.com"
+ res = db.search_attr("referred-auth-area", "fddi.a.com")
+ print res.list()
+ print [ str(x) for x in db.fetch_objects(res.list()) ]
+
+
--- /dev/null
+import bisect, types
+import Cidr
+
+class MemIndex:
+ """This class implements a simple in-memory key-value map. This
+ index supports efficient prefix matching (as well as pretty
+ efficient exact matching). Internally, it is implemented as a
+ sorted list supporting binary searches."""
+
+ # NOTE: This implementation is probably far from as efficient as
+ # it could be. Ideally, we should avoid the use of custom
+ # comparison functions, so that list.sort() will use built-in
+ # comparitors. This would mean storing raw key tuples in index as
+ # opposed to element objects. Also, it would mean avoiding the
+ # use of objects (like Cidr) as keys in favor of a primitive type.
+ # In the Cidr case, we would either have to use longs or strings,
+ # as Python doesn't seem to have an unsigned 32-bit integer type.
+
+ def __init__(self):
+ self.index = []
+ self.sorted = False
+
+ def add(self, key, value=None):
+ """Add a key-value pair to the map. If the map is already in
+ the prepared state, this operation will preserve it, so don't
+ use this if many elements are to be added at once. The 'key'
+ argument may be a 2 element tuple, in which case 'value' is
+ ignored."""
+
+ if isinstance(key, types.TupleType):
+ el = element(key[0], key[1])
+ else:
+ el = element(key, value)
+
+ if self.sorted:
+ i = bisect.bisect_left(self.index, el)
+ while i < len(self.index):
+ if self.index[i].total_equals(el):
+ break
+ if self.index[i] != el:
+ self.index.insert(i, el)
+ break
+ i += 1
+ else:
+ self.index.append(el)
+ else:
+ self.index.append(el)
+
+ def addlist(self, list):
+ """Add the entire list of elements to the map. The elements
+ of 'list' may be 2 element tuples or actual 'element' objects.
+ Use this method to add many elements at once."""
+
+ self.sorted = False
+ for i in list:
+ if isinstance(i, types.TupleType):
+ self.index.append(element(i[0], i[1]))
+ elif isinstance(i, element):
+ self.index.append(i)
+
+ def prepare(self):
+ """Put the map in a prepared state, if necessary."""
+
+ n = len(self.index)
+ if not n: return
+ if not self.sorted:
+ self.index.sort()
+ # unique the index
+ last = self.index[0]
+ lasti = i = 1
+ while i < n:
+ if not self.index[i].total_equals(last):
+ self.index[lasti] = last = self.index[i]
+ lasti += 1
+ i += 1
+ self.index[lasti:]
+ self.sorted = True
+
+ def _find(self, key):
+ """Return the (search_element, index) tuple. Used internally
+ only."""
+
+ self.prepare()
+ search_el = element(key, None)
+ i = bisect.bisect_left(self.index, search_el)
+ if i > len(self.index) or i < 0:
+ print "warning: bisect.bisect_left returned something " + \
+ "unexpected:", i, len(self.index)
+ return (search_el, i)
+
+ def find(self, key, prefix_match=False, max=0):
+ """Return a list of values whose keys string match 'key'. If
+ prefix_match is True, then keys will match if 'key' is a
+ prefix of the element key."""
+
+ search_el, i = self._find(key)
+ res = []
+ while i < len(self.index):
+ if max and len(res) == max: break
+ if search_el.equals(self.index[i], prefix_match):
+ res.append(self.index[i].value)
+ i += 1
+ else:
+ break
+ return res
+
+class CidrMemIndex(MemIndex):
+ """This is an in-memory map that has been extended to support CIDR
+ searching semantics."""
+
+ # NOTE: this structure lends to fairly efficient exact searches
+ # (O[log2N]), effience subnet searches (also O[log2N]), but not
+ # terribly efficient supernet searches (O[32log2N]), because we
+ # have to potentially do 32 exact matches. If we want efficient
+ # supernet searches, we will probably have to use some sort of
+ # general (i.e., not binary) search tree datastructure, as there
+ # is no sorted ordering that will efficiently give supernets that
+ # I can think of.
+
+ def add(self, key, value=None):
+ if isinstance(key, types.TupleType):
+ MemIndex.add(self, (Cidr.valid_cidr(key[0]), key[1]), value)
+ else:
+ MemIndex.add(self, Cidr.valid_cidr(key), value)
+ return
+
+ def addlist(self, list):
+
+ # make sure the keys are Cidr objects
+ for i in list:
+ if isinstance(i, types.TupleType):
+ i = (Cidr.valid_cidr(el[0]), el[1])
+ elif isinstance(el, element):
+ i.key = Cidr.valid_cidr(i.key)
+
+ MemIndex.addlist(self, list)
+ return
+
+ def find_exact(self, key, max = 0):
+
+ key = Cidr.valid_cidr(key)
+ search_el, i = self._find(key)
+ res = []
+ while i < len(self.index) and self.index[i].key == key:
+ res.append(self.index[i].value)
+ if max and len(res) == max: break
+ i += 1
+ return res
+
+ def find_subnets(self, key, max = 0):
+ """Return all values that are subnets of 'key', including any
+ that match 'key' itself."""
+
+ key = Cidr.valid_cidr(key)
+ search_el, i = self._find(key)
+
+ res = []
+ while i < len(self.index) and self.index[i].key.is_subnet(key):
+ if max and len(res) == max: break
+ res.append(self.index[i].value)
+ i += 1
+ return res
+
+ def find_supernets(self, key, max = 0):
+ """Return all values that are supernets of 'key', including
+ any that match 'key' itself."""
+
+ key = Cidr.valid_cidr(key)
+ k = key.clone()
+ res = []
+ while k.netlen >= 0:
+ k.calc()
+ res += self.find_exact(k, max)
+ if max and len(res) >= max:
+ return res[:max]
+ k.netlen -= 1
+
+
+ return res
+
+ def find(self, key, prefix_match=0, max=0):
+ """Return either the exact match of 'key', or the closest
+ supernet of 'key'. If prefix_match is True, then find all
+ supernets of 'key'"""
+
+ key = Cidr.valid_cidr(key)
+ if prefix_match == 0:
+ res = self.find_exact(key, max)
+
+ if not res:
+ # now do a modified supernet search that stops after
+ # the first proper supernet, but gets all values
+ # matching that supernet key
+ k = key.clone()
+ k.netlen -= 1
+ while not res and k.netlen >= 0:
+ k.calc()
+ res = self.find_exact(k, max)
+ k.netlen -= 1
+ return res
+
+ # for now, a prefix match means all supernets
+ return self.find_supernets(key, max)
+
+class ComboMemIndex:
+ """This is an in-memory map that contains both a normal string
+ index and a CIDR index. Valid CIDR values we be applied against
+ the CIDR index. Other values will be applied against the normal
+ index."""
+
+ def __init__(self):
+ self.normal_index = MemIndex()
+ self.cidr_index = CidrMemIndex()
+
+ def add(self, key, value = None):
+ """Add a key,value pair to the correct map. See MemIndex for
+ the behavior of this method"""
+
+ if isinstance(key, types.TupleType):
+ k = key[0]
+ else:
+ k = key
+ c = Cidr.valid_cidr(key)
+ if c:
+ self.cidr_index.add(key, value)
+ else:
+ self.normal_index.add(key, value)
+ return
+
+ def addlist(self, list):
+ """Add a list of elements or key, value tuples to the
+ appropriate maps."""
+
+ cidr_list = []
+ normal_list = []
+
+ for i in list:
+ if isinstance(i, element):
+ k, v = i.key, i.value
+ elif isinstance(i, types.TupleType):
+ k, v = i[:2]
+
+ c = Cidr.valid_cidr(k)
+ if c:
+ cidr_list.append((c, v))
+ else:
+ normal_list.append((k, v))
+
+ if cidr_list:
+ self.cidr_index.addlist(cidr_list)
+ if normal_list:
+ self.normal_index.addlist(normal_list)
+ return
+
+ def prepare(self):
+ """Prepare the internally held maps for searching."""
+
+ self.cidr_index.prepare()
+ self.normal_index.prepare()
+
+ def find(self, key, prefix_match=False, max=0):
+ """Return a list of values whose keys match 'key'."""
+
+ c = Cidr.valid_cidr(key)
+ if c:
+ return self.cidr_index.find(c, prefix_match, max)
+ return self.normal_index.find(key, prefix_match, max)
+
+ def find_exact(self, key, max = 0):
+ """Return a list of values whose keys match 'key'. if 'key'
+ is not a CIDR value, then this is the same as find()."""
+
+ c = Cidr.valid_cidr(key)
+ if c:
+ return self.cidr_index.find_exact(c, max)
+ return self.normal_index.find(key, False, max)
+
+ def find_subnets(self, key, max = 0):
+ """If 'key' is a CIDR value (either a Cidr object or a valid
+ CIDR string representation, do a find_subnets on the internal
+ CidrMemIndex, otherwise return None."""
+
+ c = Cidr.valid_cidr(key)
+ if c: return self.cidr_index.find_subnets(key, max)
+ return None
+
+ def find_supernets(self, key, max = 0):
+ """If 'key' is a CIDR value (either a Cidr object or a valid
+ CIDR string representation, do a find_supernets on the internal
+ CidrMemIndex, otherwise return None."""
+
+ c = Cidr.valid_cidr(key)
+ if c: return self.cidr_index.find_supernets(key, max)
+ return None
+
+class element:
+ """This is the base element class. It basically exists to
+ simplify sorting."""
+
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+ def __cmp__(self, other):
+ """Compare only on the key."""
+
+ if not type(self.key) == type(other.key):
+ print "other is incompatible type?", repr(other.key), other.key
+ if self.key < other.key:
+ return -1
+ if self.key == other.key:
+ return 0
+ return 1
+
+ def __str__(self):
+ return "<" + str(self.key) + ", " + str(self.value) + ">"
+
+ def __repr__(self):
+ return "element" + str(self)
+
+ def __hash__(self):
+ return self.key.__hash__()
+
+ def equals(self, other, prefix_match=0):
+ if prefix_match:
+ return self.key == other.key[:len(self.key)]
+ return self.key == other.key
+
+ def total_equals(self, other):
+ if not isinstance(other, type(self)): return False
+ return self.key == other.key and self.value == other.value
+
+if __name__ == "__main__":
+
+ source = [ ("foo", "foo-id"), ("bar", "bar-id"), ("baz", "baz-id"),
+ ("foobar", "foo-id-2"), ("barnone", "bar-id-2"),
+ ("zygnax", "z-id") ]
+
+ mi = MemIndex()
+ mi.addlist(source)
+
+ print "finding foobar:"
+ res = mi.find("foobar")
+ print res
+
+ print "finding foo*:"
+ res = mi.find("foo", 1)
+ print res
+
+ print "finding baz:"
+ res = mi.find("baz")
+ print res
+
+ print "adding bork"
+ mi.add("bork", "bork-id")
+
+ print "finding b*:"
+ res = mi.find("b", 1)
+ print res
+
+ ci = CidrMemIndex()
+
+ ci.add(Cidr.Cidr("127.0.0.1/24"), "net-local-1");
+ ci.add(Cidr.Cidr("127.0.0.1/32"), "net-local-2");
+ ci.add(Cidr.Cidr("216.168.224.0", 22), "net-vrsn-1")
+ ci.add(Cidr.Cidr("216.168.252.1", 32), "net-vrsn-2")
+ ci.add(Cidr.Cidr("24.36.191.0/24"), "net-foo-c")
+ ci.add(Cidr.Cidr("24.36.191.32/27"), "net-foo-sub-c")
+ ci.add(Cidr.Cidr("24.36/16"), "net-foo-b")
+
+ print "finding exactly 127.0.0.0/24"
+ res = ci.find(Cidr.Cidr("127.0.0.0/24"))
+ print res
+
+ print "finding exactly 127.0.0.16/32"
+ res = ci.find(Cidr.Cidr("127.0.0.16/32"))
+ print res
+
+ print "finding supernets of 127.0.0.16/32"
+ res = ci.find_supernets(Cidr.Cidr("127.0.0.16/32"))
+ print res
+
+ print "finding supernets of 24.36.191.32/27"
+ res = ci.find(Cidr.Cidr("24.36.191.32/27"), 1)
+ print res
+
+ print "finding supernets of 24.36.191.33/27"
+ res = ci.find_supernets(Cidr.Cidr("24.36.191.33/27"))
+ print res
+
+ print "finding supernets of 24.36.191.64/27"
+ res = ci.find_supernets(Cidr.Cidr("24.36.191.64/27"))
+ print res
+
+ print "finding subnets of 127.0/16"
+ res = ci.find_subnets(Cidr.Cidr("127.0/16"))
+ print res
--- /dev/null
+
+# queryparser.db must be set to a DB class instance.
+db = None
+
+# Define the Lexer for the RWhois query language
+
+tokens = (
+ 'VALUE',
+ 'QUOTEDVALUE',
+ 'CLASS',
+ 'ATTR',
+ 'AND',
+ 'OR',
+ 'EQ',
+ 'NEQ'
+ )
+
+# whitespace
+t_ignore = ' \t'
+# equality
+t_EQ = r'='
+# inequality
+t_NEQ = r'!='
+
+# for now, quoted values must have the wildcards inside the quotes.
+# I kind of wonder if anyone would ever notice this.
+t_QUOTEDVALUE = r'["\']\*?[^"*\n]+\*{0,2}["\']'
+
+def t_firstvalue(t):
+ r'^\*?[^\s"\'=*]+\*{0,2}'
+
+ if db.is_objectclass(t.value):
+ t.type = 'CLASS'
+ else:
+ t.type = 'VALUE'
+ return t
+
+def t_VALUE(t):
+ r'\*?[^\s"\'=!*]+\*{0,2}'
+
+ if t.value.upper() == 'AND':
+ t.type = 'AND'
+ t.value = t.value.upper()
+ return t
+ if t.value.upper() == 'OR':
+ t.type = 'OR'
+ t.value = t.value.upper()
+ return t
+ if db.is_attribute(t.value):
+ t.type = 'ATTR'
+ else:
+ t.type = 'VALUE'
+ return t
+
+
+def t_error(t):
+ pass
+ # print "Illegal character '%r'" % t.value[0]
+ # t.type = 'ERR'
+ # t.skip(1)
+
+# initalize the lexer
+import lex
+lex.lex()
+
+# Define the parser for the query language
+
+# 'value' productions are simple strings
+# 'querystr' productions are tuples (either 1 or 3 values)
+# 'query' productions are Query objects
+# 'total' productions are Query objects
+
+def p_total_class_query(t):
+ 'total : CLASS query'
+
+ t[0] = t[2]
+ t[0].set_class(t[1])
+
+def p_total_query(t):
+ 'total : query'
+
+ t[0] = t[1]
+
+
+def p_query_oper_querystr(t):
+ '''query : query AND querystr
+ | query OR querystr'''
+
+ t[0] = t[1]
+ if t[2] == 'OR':
+ t[0].cur_clause = [ t[3] ]
+ t[0].clauses.append(t[0].cur_clause)
+ else:
+ t[0].cur_clause.append(t[3])
+
+def p_query_querystr(t):
+ 'query : querystr'
+
+ t[0] = Query()
+ t[0].cur_clause = [ t[1] ]
+ t[0].clauses.append(t[0].cur_clause)
+
+def p_querystr_attr_value(t):
+ '''querystr : ATTR EQ value
+ | ATTR NEQ value'''
+
+ t[0] = (t[1], t[2], t[3])
+
+def p_querystr_attr(t):
+ 'querystr : ATTR'
+
+ t[0] = (None, '=', t[1])
+
+def p_querystr_value(t):
+ 'querystr : value'
+
+ t[0] = (None, '=', t[1])
+
+
+def p_value(t):
+ 'value : VALUE'
+
+ t[1] = t[1].strip()
+ if t[1]:
+ t[0] = t[1]
+
+def p_quotedvalue(t):
+ 'value : QUOTEDVALUE'
+
+ t[0] = t[1].strip('"')
+
+
+def p_error(t):
+ # print "Syntax error at '%s:%s'" % (t.type, t.value)
+ raise yacc.YaccError, "Syntax error at %r" % t.value
+
+
+import types
+class Query:
+ """A representation of a parsed RWhois query."""
+
+ def __init__(self):
+ self.clauses = []
+ self.cur_clause = None
+ self.objectclass = None
+ self.prepared = False
+
+ def __str__(self):
+ self._prepare()
+ res = ''
+ for i in range(len(self.clauses)):
+ cl = self.clauses[i]
+ res += "clause %d:\n" % i
+ for item in cl:
+ res += " " + repr(item) + "\n"
+ return res
+
+ def __repr__(self):
+ return "<Query:\n" + str(self) + ">"
+
+ def _prepare(self):
+ """Prepare the query for use. For now, this means propagating
+ an objectclass restriction to all query clauses."""
+
+ if self.prepared: return
+ if self.objectclass:
+ for c in self.clauses:
+ c.append(("class-name", "=", self.objectclass))
+
+ def clauses(self):
+ """Return the query clauses. This is a list of AND clauses,
+ which are, in turn, lists of query terms. Query terms are 3
+ element tuples: (attr, op, value)."""
+
+ return self.clauses
+
+ def set_class(self, objectclass):
+ """Set the query-wide objectclass restriction."""
+
+ # note: we don't allow the code to set this more than once,
+ # because we would have to code the removal of the previous
+ # class restriction from the query clauses, and it just isn't
+ # worth it. Queries are built, used and thrown away.
+ assert not self.prepared
+ self.objectclass = objectclass
+ return
+
+
+import yacc
+import Rwhois
+
+def get_parser():
+ """Return a parser instances. Parser objects should not be shared
+ amongst threads."""
+
+ return yacc.yacc()
+
+def parse(p, query):
+ """Parse a query, raising a RwhoisError in case of parse failure.
+ Returns a Query object."""
+
+ # before using any parser objects, the database backend must be
+ # set (and it shared by all parsers).
+ assert db
+ try:
+ return p.parse(query)
+ except (lex.LexError, yacc.YaccError):
+ raise Rwhois.RwhoisError, (350, "")
+
+if __name__ == "__main__":
+ import sys
+ import MemDB
+
+ mydb = MemDB.MemDB()
+
+ print "loading schema:", sys.argv[1]
+ mydb.init_schema(sys.argv[1])
+ for data_file in sys.argv[2:]:
+ print "loading data file:", data_file
+ mydb.load_data(data_file)
+ mydb.index_data()
+
+ db = mydb
+ qp = get_parser()
+
+ for line in sys.stdin.readlines():
+ line = line.rstrip('\n')
+ line = line.strip()
+ if not line: continue
+ print 'inputting:', `line`
+ try:
+ res = qp.parse(line)
+ print repr(res)
+ except (lex.LexError, yacc.YaccError), x:
+ print "parse error occurred:", x
+ print "query:", line
+
+
+# lex.input(line)
+# while 1:
+# tok = lex.token()
+# if not tok: break
+# print tok
+
+
--- /dev/null
+import sys
+import Cidr, Rwhois, QueryParser
+
+class QueryProcessor:
+
+ def __init__(self, db):
+ self.db = db
+
+ def _filter_obj_term(self, obj, term):
+ """Given a rwhoisobject and a query term (a 3 element tuple:
+ attr, operator, value), determine if the object satisfies the
+ term. Returns True if the object matches the term, False if
+ not."""
+
+ attr, op, searchval = term
+ res = False
+
+ # filter by named attribute
+ if attr:
+ vals = obj.get_attr(attr)
+ if not vals:
+ res = False
+ else:
+ res = match_values(searchval, vals)
+ if op == "!=": return not res
+ return res
+ # filter by general term
+ else:
+ for val in obj.values():
+ if match_value(searchval, val):
+ return True
+ return False
+
+ def _filter_obj(self, obj, terms):
+ """Given a rwhoisobject and a list of query terms (i.e., a
+ whole AND clause), return True if the object satisfies the
+ terms."""
+
+ for term in terms:
+ if not self._filter_obj_term(obj, term): return False
+ return True
+
+ def _filter_results(self, reslist, terms):
+ """Given list of result objects (not simply the ids returned
+ from the search) and a list of query terms (i.e., a query
+ clause), remove elements that do not satisfy the terms.
+ Returns a list of objects that satisfy the filters."""
+
+ if not terms: return reslist
+ return [ x for x in reslist if self._filter_obj(x, terms) ]
+
+ def process_query_clause(self, clause, max=0):
+ """Process a query clause (a grouping of terms ANDed
+ together). This is where the indexed searches actually get
+ done. The technique used here is to search on one index and
+ use the rest of the clause to filter the results. Returns a
+ QueryResult object"""
+
+ # the technique is to do an index search on the first (or
+ # maybe best) indexed term (bare terms are always considered
+ # indexed), and filter those results with the remaining terms.
+
+ # Note: this could be better if we found the "optimal" query
+ # term. One approach may be to create a cost function and
+ # search for the minimum cost term.
+
+ # Note: another approach might be to actually do indexed
+ # searches on all applicable terms (bare or using an indexed
+ # attribute) and find the intersection of the results.
+
+ # FIXME: need to put in the referral chasing logic here, I
+ # think.
+
+ st = None
+ sti = 0
+
+ # find the first searchable term:
+ for term, i in zip(clause, xrange(sys.maxint)):
+ attr, op, value = term
+ if op == "!=": continue
+ if not attr or self.db.is_indexed_attr(attr):
+ st, sti = term, i
+ break
+ if not st:
+ raise Rwhois.RwhoisError, (351, "No indexed terms in query clause")
+
+ # remove the search term from the clause, what remains is the
+ # filter.
+ del clause[sti]
+
+ # if we have an attribute name, search on that.
+ if st[0]:
+ res = self.db.search_attr(st[0], st[2], max)
+ else:
+ if Cidr.valid_cidr(st[2].strip("*")):
+ res = self.db.search_cidr(st[2], max)
+ else:
+ res = self.db.search_normal(st[2], max)
+
+ objs = self._filter_results(self.db.fetch_objects(res.list()), clause)
+
+ return QueryResult(objs)
+
+ def process_full_query(self, query, max=0):
+ """Given a parsed query object, process it by unioning the
+ results of the various ORed together clauses"""
+
+ # shortcut for the very common single clause case:
+ if len(query.clauses) == 1:
+ res = self.process_query_clause(query.clauses[0])
+ return res
+
+ res = QueryResult()
+ for clause in query.clauses:
+ res.extend(self.process_query_clause(clause))
+ if max and len(res) >= max:
+ res.truncate(max)
+ break
+
+ return res
+
+ def process_query(self, session, queryline):
+ """Given a session config and a query line, parse the query,
+ perform any searches, return any referrals."""
+
+ if not session.queryparser:
+ session.queryparser = QueryParser.get_parser()
+
+ # parse the query
+ try:
+ query = QueryParser.parse(session.queryparser, queryline)
+ except Rwhois.RwhoisError, x:
+ session.wfile.write(Rwhois.error_message(x))
+ return
+
+ max = session.limit
+ if max: max += 1
+
+ query_result = self.process_full_query(query, max)
+
+ objects = query_result.objects()
+ referrals = query_result.referrals()
+
+ if not objects and not referrals:
+ session.wfile.write(Rwhois.error_message(230))
+ # session.wfile.write("\r\n")
+ return
+
+ for obj in objects:
+ session.wfile.write(obj.to_wire_str())
+ session.wfile.write("\r\n")
+
+ if referrals:
+ session.wfile.write("\r\n".join(referrals))
+ session.wfile.write("\r\n")
+
+ if session.limit and len(objects) > session.limit:
+ session.wfile.write(330)
+ else:
+ session.wfile.write(Rwhois.ok())
+
+class QueryResult:
+
+ def __init__(self, objs=[], referrals=[]):
+ self.data = objs
+ self.ids = [ x.getid() for x in objs ]
+ self._dict = dict(zip(self.ids, self.ids))
+ self.refs = referrals
+
+ def extend(self, list):
+ if isinstance(list, type(self)):
+ list = list.objects()
+ new_objs = [ x for x in list if not self._dict.has_key(x.getid()) ]
+ new_ids = [ x.getid() for x in new_objs ]
+ self.data.extend(new_objs)
+ self.ids.extend(new_ids)
+ self._dict.update(dict(zip(new_ids, new_ids)))
+
+ def add_referrals(self, referrals):
+ self.refs.extend(referrals)
+
+ def objects(self):
+ return self.data
+
+ def referrals(self):
+ return self.refs
+
+ def ids(self):
+ return self.ids
+
+ def truncate(self, n=0):
+ to_del = self.ids[n:]
+ for i in to_del: del self._dict[i]
+ self.ids = self.ids[:n]
+ self.data = self.data[:n]
+
+
+def match_value(searchval, val):
+ """Determine if a search value matches a data value. If both
+ matching terms are valid CIDR objects, then they are matched
+ according the CIDR wildcard rules (i.e., a single trailing * is a
+ supernet search, ** is a subnet search). If the search value is
+ not wildcarded, then they are just tested for numeric equality.
+ Otherwise, the terms are compared using string semantics
+ (substring, prefix, suffix, and exact match."""
+
+ if match_cidr(searchval, val): return True
+
+ # normalize the values for comparison.
+ searchval = searchval.lower()
+ val = val.lower()
+
+ # the substring case
+ if searchval.startswith("*") and searchval.endswith("*"):
+ sv = searchval.strip("*");
+ if val.find(sv) >= 0:
+ return True
+ else:
+ return False
+ # the suffix case
+ elif searchval.startswith("*"):
+ sv = searchval.lstrip("*")
+ return val.endswith(sv)
+ # the prefix case
+ elif searchval.endswith("*"):
+ sv = searchval.rstrip("*")
+ return val.startswith(sv)
+ # the exact match case
+ else:
+ return searchval == val
+
+def match_values(searchval, val_list):
+
+ for val in val_list:
+ if match_value(searchval, val): return True
+ return False
+
+def match_cidr(searchval, val):
+ """If both terms are valid CIDR values (minus any trailing
+ wildcards of the search value), compare according the CIDR
+ wildcard rules: subnet, supernet, and exact match. If both terms
+ are not CIDR address, return False."""
+
+
+ sv = Cidr.valid_cidr(searchval.rstrip("*"))
+ rv = Cidr.valid_cidr(val)
+
+ if not sv or not rv: return False
+
+ if (searchval.endswith("**")):
+ return rv.is_subnet(sv)
+ elif (searchval.endswith("*")):
+ return rv.is_supernet(sv)
+ else:
+ return rv == sv
+
+
+if __name__ == '__main__':
+
+ import MemDB, Session
+
+ db = MemDB.MemDB()
+
+ print "loading schema:", sys.argv[1]
+ db.init_schema(sys.argv[1])
+ for data_file in sys.argv[2:]:
+ print "loading data file:", data_file
+ db.load_data(data_file)
+ db.index_data()
+
+ QueryParser.db = db
+ processor = QueryProcessor(db)
+
+ session = Session.Context()
+ session.wfile = sys.stdout
+
+ while 1:
+ line = sys.stdin.readline().strip();
+ if not line: break
+ if line.startswith("#"): continue
+
+ print "parsing: '%s'" % line
+ processor.process_query(session, line)
+ session.wfile.write("\r\n");
+ session.wfile.flush()
--- /dev/null
+# This modules contains classes that are fairly general to RWhois
+# server operation.
+
+class RwhoisError(Exception):
+ pass
+
+# The RWhois error codes. Most of these won't ever be used.
+error_codes = { 120 : "Registration Deferred",
+ 130 : "Object Not Authoritative",
+ 230 : "No Objects Found",
+ 320 : "Invalid Attribute",
+ 321 : "Invalid Attribute Syntax",
+ 322 : "Required Attribute Missing",
+ 323 : "Object Reference Not Found",
+ 324 : "Primary Key Not Unique",
+ 325 : "Failed to Update Stale Object",
+ 330 : "Exceeded Response Limit",
+ 331 : "Invalid Limit",
+ 332 : "Nothing To Transfer",
+ 333 : "Not Master for Authority Area",
+ 336 : "Object Not Found",
+ 338 : "Invalid Directive Syntax",
+ 340 : "Invalid Authority Area",
+ 341 : "Invalid Class",
+ 342 : "Invalid Host/Port",
+ 350 : "Invalid Query Syntax",
+ 351 : "Query Too Complex",
+ 352 : "Invalid Security Method",
+ 353 : "Authentication Failed",
+ 354 : "Encryption Failed",
+ 360 : "Corrupt Data. Keyadd Failed",
+ 400 : "Directive Not Available",
+ 401 : "Not Authorized For Directive",
+ 402 : "Unidentified Error",
+ 420 : "Registration Not Authorized",
+ 436 : "Invalid Display Format",
+ 500 : "Memory Allocation Problem",
+ 501 : "Service Not Available",
+ 502 : "Unrecoverable Error",
+ 503 : "Idle Time Exceeded",
+ 560 : ""
+ }
+
+def error_message(value):
+ try:
+ code, msg = value
+ code = int(code)
+ except (TypeError, ValueError):
+ try:
+ code = int(value)
+ msg = None
+ except ValueError:
+ msg = value
+ code = 402
+ if msg:
+ return "%%error %d %s: %s\r\n" % \
+ (code, error_codes.get(code, 402), msg)
+ else:
+ return "%%error %d %s\r\n" % (code, error_codes.get(code, 402))
+
+def ok():
+ return "%ok\r\n"
+
+class rwhoisobject:
+ """This is the standard class for RWhois data objects."""
+
+ def __init__(self):
+ self.data = {}
+ self.attr_order = []
+
+ def get_attr(self, attr, default=None):
+ """This returns a list of values associated with a particular
+ attribute. The default value, if supplied, must be a single
+ (non-sequence) value."""
+
+ if default:
+ return self.data.get(attr.strip().lower(), [default])
+ return self.data.get(attr.strip().lower(), [])
+
+ def get_attr_value(self, attr, default=None):
+ """This returns a single value associated with the attribute.
+ If the attribute has multiple values, the first is
+ returned."""
+
+ return self.data.get(attr.strip().lower(), [default])[0]
+
+ def getid(self):
+ """Return the RWhois ID of this object."""
+
+ return self.get_attr_value("id")
+
+ def add_attr(self, attr, value):
+ """Adds an attribute to the object."""
+
+ attr = attr.strip().lower()
+ if self.data.has_key(attr): self.data[attr].append(value)
+ else:
+ self.attr_order.append(attr)
+ self.data.setdefault(attr, []).append(value)
+
+ def add_attrs(self, attr_list):
+ """Adds a list of (attribute, value) tuples to the object."""
+ for attr, value in attr_list:
+ self.add_attr(attr, value)
+
+ def items(self):
+ """Returns the list of (attribute, value) tuples (actually 2
+ elements lists). Attributes with multiple values produce
+ multiple tuples. The items are returned in the same order
+ they were added to the object."""
+
+ return [ [x, y] for x in self.attr_order for y in self.data[x] ]
+
+ def values(self):
+ """Return the list of values in this object."""
+
+ return [ x for y in self.data.values() for x in y ]
+
+ def __str__(self):
+ """A convenient string representation of this object"""
+ return '\n'.join([':'.join(x) for x in self.items()])
+
+ def __repr__(self):
+ return "<rwhoisobject: " + self.getid() + ">"
+
+ def attrs_to_wire_str(self, attrs, prefix=None):
+ """Return specific attributes in a response formatted string
+ (classname:attr:value)"""
+
+ cn = self.get_attr_value("class-name", "unknown-class")
+ items = [ [cn, x, y] for x in attrs for y in self.data[x] ]
+
+ if prefix:
+ res = '\r\n'.join([ prefix + ':'.join(x) for x in items ])
+ else:
+ res = '\r\n'.join([ ':'.join(x) for x in items ])
+
+ return res;
+
+ def to_wire_str(self, prefix=None):
+ """Return the response formatted string (classname:attr:value)"""
+
+ return self.attrs_to_wire_str(self.attr_order, prefix)
+
+
+
+## A basic test driver
+if __name__ == '__main__':
+
+ obj = rwhoisobject()
+ obj.add_attr('id', '001')
+ obj.add_attr("class-name", 'contact')
+ obj.add_attr("class-name", "foo")
+ obj.add_attr('name', 'Aiden Quinn')
+ obj.add_attr('email', 'aquin@yahoo.com')
+ obj.add_attr('org-name', 'YoYoDyne Inc.')
+ obj.add_attr('email', 'aq@aol.net')
+ obj.add_attr('First-Name', 'Aiden ')
+
+ print "obj:\n", obj
+ print "wire:\n", obj.to_wire_str()
--- /dev/null
+#! /usr/bin/python
+
+import sys, socket, SocketServer
+
+import config, Session
+import QueryParser, QueryProcessor, DirectiveProcessor, Rwhois
+
+
+# server-wide variables
+
+query_processor = None
+directive_processor = None
+
+class RwhoisTCPServer(SocketServer.ThreadingTCPServer):
+ def __init__(self, server_address, RequestHandlerClass):
+ self.allow_reuse_address = True
+ SocketServer.TCPServer.__init__(self, server_address,
+ RequestHandlerClass)
+
+ def verify_request(self, request, client_address):
+ # implement access control here
+ return True
+
+class RwhoisHandler(SocketServer.StreamRequestHandler):
+
+ def readline(self):
+ try:
+ return self.rfile.readline().strip()[:1024]
+ except KeyboardInterrupt:
+ self.quit_flag = True
+ return
+
+ def handle(self):
+
+ self.quit_flag = False
+
+ # output a banner
+ self.wfile.write(config.banner_string);
+ self.wfile.write("\r\n");
+
+ # get a session.
+ session = Session.Context()
+ session.rfile = self.rfile
+ session.wfile = self.wfile
+
+ # first line
+ line = self.readline()
+
+ while not self.rfile.closed:
+ if not line:
+ line = self.readline()
+ continue
+
+ if line.startswith("-"):
+ self.handle_directive(session, line)
+ else:
+ self.handle_query(session, line)
+ if not session.holdconnect:
+ self.quit_flag = True
+
+ self.wfile.flush()
+
+ # check to see if we were asked to quit
+ if self.quit_flag: break
+
+ # next line of input
+ line = self.readline()
+
+ print "done with", self.client_address
+
+ def handle_directive(self, session, line):
+ if (line.startswith("-quit")):
+ self.quit_flag = True
+ self.wfile.write(Rwhois.ok())
+ return
+ directive_processor.process_directive(session, line)
+
+ def handle_query(self, session, line):
+ query_processor.process_query(session, line)
+
+
+def usage(pname):
+ print """usage: %s schema_file data_file [data_file ...]""" % pname
+ sys.exit(64)
+
+def init(argv):
+ import MemDB
+
+ if len(argv) < 3: usage(argv[0])
+ schema_file = argv[1]
+ data_files = argv[2:]
+
+ db = MemDB.MemDB()
+
+ db.init_schema(schema_file)
+ for df in data_files:
+ db.load_data(df)
+ db.index_data()
+
+ QueryParser.db = db
+
+ global query_processor, directive_processor
+
+ query_processor = QueryProcessor.QueryProcessor(db)
+ directive_processor = DirectiveProcessor.DirectiveProcessor(db)
+
+def serve():
+ # initialize the TCP server
+ server = RwhoisTCPServer((config.server_address, config.port),
+ RwhoisHandler)
+
+ # and handle incoming connections
+ try:
+ if not config.server_address:
+ print "listening on port %d" % config.port
+ else:
+ print "listening on %s port %d" % \
+ (config.server_address, config.port)
+ server.serve_forever()
+ except (KeyboardInterrupt, SystemExit):
+ print "interrupted. exiting."
+
+ print "finished serving"
+ sys.exit(0)
+
+if __name__ == "__main__":
+
+ init(sys.argv)
+ serve()
+
--- /dev/null
+import config
+
+class Context:
+ """This class is used to hold session specific variables."""
+ def __init__(self):
+ # each session gets its own query parser.
+ self.queryparser = None
+
+ # these should be set by the handler
+ rfile = None
+ wfile = None
+
+ # set some default values.
+ self.limit = config.default_limit
+ self.holdconnect = False
--- /dev/null
+"""Server global variables should be set here."""
+
+import socket
+
+##### Editable Configuration Options
+
+# the port to listen on.
+port = 4321
+# the interface address to bind to. "" means INADDR_ANY.
+server_address = ""
+
+# the hostname to advertise in the banner.
+server_hostname = socket.getfqdn()
+
+# setting this here sets a default session response limit. 0
+# means no response limit. This should be between min_limit and
+# max_limit defined below.
+default_limit = 0
+# set this to the maximum value that the limit can be set to
+max_limit = 256
+# set this to the minimum value that the limit can be set to
+# if this is zero, you are allowing clients to disable query limits.
+min_limit = 0
+
+
+#### END Editable Configuration Options
+
+version = "0.1"
+banner_string = "%%rwhois V-1.5 %s (pyrwhoisd %s)" % (server_hostname, version)
+
--- /dev/null
+#-----------------------------------------------------------------------------
+# ply: lex.py
+#
+# Author: David M. Beazley (beazley@cs.uchicago.edu)
+# Department of Computer Science
+# University of Chicago
+# Chicago, IL 60637
+#
+# Copyright (C) 2001, David M. Beazley
+#
+# $Header: /home/davidb/src/cvs/python-rwhoisd/rwhoisd/lex.py,v 1.1 2003/04/22 02:19:07 davidb Exp $
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# See the file COPYING for a complete copy of the LGPL.
+#
+#
+# This module automatically constructs a lexical analysis module from regular
+# expression rules defined in a user-defined module. The idea is essentially the same
+# as that used in John Aycock's Spark framework, but the implementation works
+# at the module level rather than requiring the use of classes.
+#
+# This module tries to provide an interface that is closely modeled after
+# the traditional lex interface in Unix. It also differs from Spark
+# in that:
+#
+# - It provides more extensive error checking and reporting if
+# the user supplies a set of regular expressions that can't
+# be compiled or if there is any other kind of a problem in
+# the specification.
+#
+# - The interface is geared towards LALR(1) and LR(1) parser
+# generators. That is tokens are generated one at a time
+# rather than being generated in advanced all in one step.
+#
+# There are a few limitations of this module
+#
+# - The module interface makes it somewhat awkward to support more
+# than one lexer at a time. Although somewhat inelegant from a
+# design perspective, this is rarely a practical concern for
+# most compiler projects.
+#
+# - The lexer requires that the entire input text be read into
+# a string before scanning. I suppose that most machines have
+# enough memory to make this a minor issues, but it makes
+# the lexer somewhat difficult to use in interactive sessions
+# or with streaming data.
+#
+#-----------------------------------------------------------------------------
+
+r"""
+lex.py
+
+This module builds lex-like scanners based on regular expression rules.
+To use the module, simply write a collection of regular expression rules
+and actions like this:
+
+# lexer.py
+import lex
+
+# Define a list of valid tokens
+tokens = (
+ 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS'
+ )
+
+# Define tokens as functions
+def t_IDENTIFIER(t):
+ r' ([a-zA-Z_](\w|_)* '
+ return t
+
+def t_NUMBER(t):
+ r' \d+ '
+ return t
+
+# Some simple tokens with no actions
+t_PLUS = r'\+'
+t_MINUS = r'-'
+
+# Initialize the lexer
+lex.lex()
+
+The tokens list is required and contains a complete list of all valid
+token types that the lexer is allowed to produce. Token types are
+restricted to be valid identifiers. This means that 'MINUS' is a valid
+token type whereas '-' is not.
+
+Rules are defined by writing a function with a name of the form
+t_rulename. Each rule must accept a single argument which is
+a token object generated by the lexer. This token has the following
+attributes:
+
+ t.type = type string of the token. This is initially set to the
+ name of the rule without the leading t_
+ t.value = The value of the lexeme.
+ t.lineno = The value of the line number where the token was encountered
+
+For example, the t_NUMBER() rule above might be called with the following:
+
+ t.type = 'NUMBER'
+ t.value = '42'
+ t.lineno = 3
+
+Each rule returns the token object it would like to supply to the
+parser. In most cases, the token t is returned with few, if any
+modifications. To discard a token for things like whitespace or
+comments, simply return nothing. For instance:
+
+def t_whitespace(t):
+ r' \s+ '
+ pass
+
+For faster lexing, you can also define this in terms of the ignore set like this:
+
+t_ignore = ' \t'
+
+The characters in this string are ignored by the lexer. Use of this feature can speed
+up parsing significantly since scanning will immediately proceed to the next token.
+
+lex requires that the token returned by each rule has an attribute
+t.type. Other than this, rules are free to return any kind of token
+object that they wish and may construct a new type of token object
+from the attributes of t (provided the new object has the required
+type attribute).
+
+If illegal characters are encountered, the scanner executes the
+function t_error(t) where t is a token representing the rest of the
+string that hasn't been matched. If this function isn't defined, a
+LexError exception is raised. The .text attribute of this exception
+object contains the part of the string that wasn't matched.
+
+The t.skip(n) method can be used to skip ahead n characters in the
+input stream. This is usually only used in the error handling rule.
+For instance, the following rule would print an error message and
+continue:
+
+def t_error(t):
+ print "Illegal character in input %s" % t.value[0]
+ t.skip(1)
+
+Of course, a nice scanner might wish to skip more than one character
+if the input looks very corrupted.
+
+The lex module defines a t.lineno attribute on each token that can be used
+to track the current line number in the input. The value of this
+variable is not modified by lex so it is up to your lexer module
+to correctly update its value depending on the lexical properties
+of the input language. To do this, you might write rules such as
+the following:
+
+def t_newline(t):
+ r' \n+ '
+ t.lineno += t.value.count("\n")
+
+To initialize your lexer so that it can be used, simply call the lex.lex()
+function in your rule file. If there are any errors in your
+specification, warning messages or an exception will be generated to
+alert you to the problem.
+
+(dave: this needs to be rewritten)
+To use the newly constructed lexer from another module, simply do
+this:
+
+ import lex
+ import lexer
+ plex.input("position = initial + rate*60")
+
+ while 1:
+ token = plex.token() # Get a token
+ if not token: break # No more tokens
+ ... do whatever ...
+
+Assuming that the module 'lexer' has initialized plex as shown
+above, parsing modules can safely import 'plex' without having
+to import the rule file or any additional imformation about the
+scanner you have defined.
+"""
+
+# -----------------------------------------------------------------------------
+
+
+__version__ = "1.3"
+
+import re, types, sys, copy
+
+# Exception thrown when invalid token encountered and no default
+class LexError(Exception):
+ def __init__(self,message,s):
+ self.args = (message,)
+ self.text = s
+
+# Token class
+class LexToken:
+ def __str__(self):
+ return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno)
+ def __repr__(self):
+ return str(self)
+ def skip(self,n):
+ try:
+ self._skipn += n
+ except AttributeError:
+ self._skipn = n
+
+# -----------------------------------------------------------------------------
+# Lexer class
+#
+# input() - Store a new string in the lexer
+# token() - Get the next token
+# -----------------------------------------------------------------------------
+
+class Lexer:
+ def __init__(self):
+ self.lexre = None # Master regular expression
+ self.lexdata = None # Actual input data (as a string)
+ self.lexpos = 0 # Current position in input text
+ self.lexlen = 0 # Length of the input text
+ self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types
+ self.lexerrorf = None # Error rule (if any)
+ self.lextokens = None # List of valid tokens
+ self.lexignore = None # Ignored characters
+ self.lineno = 1 # Current line number
+ self.debug = 0 # Debugging mode
+ self.optimize = 0 # Optimized mode
+ self.token = self.errtoken
+
+ def __copy__(self):
+ c = Lexer()
+ c.lexre = self.lexre
+ c.lexdata = self.lexdata
+ c.lexpos = self.lexpos
+ c.lexlen = self.lexlen
+ c.lenindexfunc = self.lexindexfunc
+ c.lexerrorf = self.lexerrorf
+ c.lextokens = self.lextokens
+ c.lexignore = self.lexignore
+ c.lineno = self.lineno
+ c.optimize = self.optimize
+ c.token = c.realtoken
+
+ # ------------------------------------------------------------
+ # input() - Push a new string into the lexer
+ # ------------------------------------------------------------
+ def input(self,s):
+ if not isinstance(s,types.StringType):
+ raise ValueError, "Expected a string"
+ self.lexdata = s
+ self.lexpos = 0
+ self.lexlen = len(s)
+ self.token = self.realtoken
+
+ # Change the token routine to point to realtoken()
+ global token
+ if token == self.errtoken:
+ token = self.token
+
+ # ------------------------------------------------------------
+ # errtoken() - Return error if token is called with no data
+ # ------------------------------------------------------------
+ def errtoken(self):
+ raise RuntimeError, "No input string given with input()"
+
+ # ------------------------------------------------------------
+ # token() - Return the next token from the Lexer
+ #
+ # Note: This function has been carefully implemented to be as fast
+ # as possible. Don't make changes unless you really know what
+ # you are doing
+ # ------------------------------------------------------------
+ def realtoken(self):
+ # Make local copies of frequently referenced attributes
+ lexpos = self.lexpos
+ lexlen = self.lexlen
+ lexignore = self.lexignore
+ lexdata = self.lexdata
+
+ while lexpos < lexlen:
+ # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
+ if lexdata[lexpos] in lexignore:
+ lexpos += 1
+ continue
+
+ # Look for a regular expression match
+ m = self.lexre.match(lexdata,lexpos)
+ if m:
+ i = m.lastindex
+ lexpos = m.end()
+ tok = LexToken()
+ tok.value = m.group()
+ tok.lineno = self.lineno
+ tok.lexer = self
+ func,tok.type = self.lexindexfunc[i]
+ if not func:
+ self.lexpos = lexpos
+ return tok
+
+ # If token is processed by a function, call it
+ self.lexpos = lexpos
+ newtok = func(tok)
+ self.lineno = tok.lineno # Update line number
+
+ # Every function must return a token, if nothing, we just move to next token
+ if not newtok: continue
+
+ # Verify type of the token. If not in the token map, raise an error
+ if not self.optimize:
+ if not self.lextokens.has_key(newtok.type):
+ raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
+ func.func_code.co_filename, func.func_code.co_firstlineno,
+ func.__name__, newtok.type),lexdata[lexpos:])
+
+ return newtok
+
+ # No match. Call t_error() if defined.
+ if self.lexerrorf:
+ tok = LexToken()
+ tok.value = self.lexdata[lexpos:]
+ tok.lineno = self.lineno
+ tok.type = "error"
+ tok.lexer = self
+ oldpos = lexpos
+ newtok = self.lexerrorf(tok)
+ lexpos += getattr(tok,"_skipn",0)
+ if oldpos == lexpos:
+ # Error method didn't change text position at all. This is an error.
+ self.lexpos = lexpos
+ raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
+ if not newtok: continue
+ self.lexpos = lexpos
+ return newtok
+
+ self.lexpos = lexpos
+ raise LexError, ("No match found", lexdata[lexpos:])
+
+ # No more input data
+ self.lexpos = lexpos + 1
+ return None
+
+
+# -----------------------------------------------------------------------------
+# validate_file()
+#
+# This checks to see if there are duplicated t_rulename() functions or strings
+# in the parser input file. This is done using a simple regular expression
+# match on each line in the filename.
+# -----------------------------------------------------------------------------
+
+def validate_file(filename):
+ import os.path
+ base,ext = os.path.splitext(filename)
+ if ext != '.py': return 1 # No idea what the file is. Return OK
+
+ try:
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ return 1 # Oh well
+
+ fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
+ sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
+ counthash = { }
+ linen = 1
+ noerror = 1
+ for l in lines:
+ m = fre.match(l)
+ if not m:
+ m = sre.match(l)
+ if m:
+ name = m.group(1)
+ prev = counthash.get(name)
+ if not prev:
+ counthash[name] = linen
+ else:
+ print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
+ noerror = 0
+ linen += 1
+ return noerror
+
+# -----------------------------------------------------------------------------
+# _read_lextab(module)
+#
+# Reads lexer table from a lextab file instead of using introspection.
+# -----------------------------------------------------------------------------
+
+def _read_lextab(lexer, fdict, module):
+ exec "import %s as lextab" % module
+ lexer.lexre = re.compile(lextab._lexre, re.VERBOSE)
+ lexer.lexindexfunc = lextab._lextab
+ for i in range(len(lextab._lextab)):
+ t = lexer.lexindexfunc[i]
+ if t:
+ if t[0]:
+ lexer.lexindexfunc[i] = (fdict[t[0]],t[1])
+ lexer.lextokens = lextab._lextokens
+ lexer.lexignore = lextab._lexignore
+ if lextab._lexerrorf:
+ lexer.lexerrorf = fdict[lextab._lexerrorf]
+
+# -----------------------------------------------------------------------------
+# lex(module)
+#
+# Build all of the regular expression rules from definitions in the supplied module
+# -----------------------------------------------------------------------------
+def lex(module=None,debug=0,optimize=0,lextab="lextab"):
+ ldict = None
+ regex = ""
+ error = 0
+ files = { }
+ lexer = Lexer()
+ lexer.debug = debug
+ lexer.optimize = optimize
+ global token,input
+
+ if module:
+ if not isinstance(module, types.ModuleType):
+ raise ValueError,"Expected a module"
+
+ ldict = module.__dict__
+
+ else:
+ # No module given. We might be able to get information from the caller.
+ try:
+ raise RuntimeError
+ except RuntimeError:
+ e,b,t = sys.exc_info()
+ f = t.tb_frame
+ f = f.f_back # Walk out to our calling function
+ ldict = f.f_globals # Grab its globals dictionary
+
+ if optimize and lextab:
+ try:
+ _read_lextab(lexer,ldict, lextab)
+ if not lexer.lexignore: lexer.lexignore = ""
+ token = lexer.token
+ input = lexer.input
+ return lexer
+
+ except ImportError:
+ pass
+
+ # Get the tokens map
+ tokens = ldict.get("tokens",None)
+ if not tokens:
+ raise SyntaxError,"lex: module does not define 'tokens'"
+ if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
+ raise SyntaxError,"lex: tokens must be a list or tuple."
+
+ # Build a dictionary of valid token names
+ lexer.lextokens = { }
+ if not optimize:
+
+ # Utility function for verifying tokens
+ def is_identifier(s):
+ for c in s:
+ if not (c.isalnum() or c == '_'): return 0
+ return 1
+
+ for n in tokens:
+ if not is_identifier(n):
+ print "lex: Bad token name '%s'" % n
+ error = 1
+ if lexer.lextokens.has_key(n):
+ print "lex: Warning. Token '%s' multiply defined." % n
+ lexer.lextokens[n] = None
+ else:
+ for n in tokens: lexer.lextokens[n] = None
+
+
+ if debug:
+ print "lex: tokens = '%s'" % lexer.lextokens.keys()
+
+ # Get a list of symbols with the t_ prefix
+ tsymbols = [f for f in ldict.keys() if f[:2] == 't_']
+
+ # Now build up a list of functions and a list of strings
+ fsymbols = [ ]
+ ssymbols = [ ]
+ for f in tsymbols:
+ if isinstance(ldict[f],types.FunctionType):
+ fsymbols.append(ldict[f])
+ elif isinstance(ldict[f],types.StringType):
+ ssymbols.append((f,ldict[f]))
+ else:
+ print "lex: %s not defined as a function or string" % f
+ error = 1
+
+ # Sort the functions by line number
+ fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
+
+ # Sort the strings by regular expression length
+ ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
+
+ # Check for non-empty symbols
+ if len(fsymbols) == 0 and len(ssymbols) == 0:
+ raise SyntaxError,"lex: no rules of the form t_rulename are defined."
+
+ # Add all of the rules defined with actions first
+ for f in fsymbols:
+
+ line = f.func_code.co_firstlineno
+ file = f.func_code.co_filename
+ files[file] = None
+
+ if not optimize:
+ if f.func_code.co_argcount > 1:
+ print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.func_code.co_argcount < 1:
+ print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.__name__ == 't_ignore':
+ print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.__name__ == 't_error':
+ lexer.lexerrorf = f
+ continue
+
+ if f.__doc__:
+ if not optimize:
+ try:
+ c = re.compile(f.__doc__, re.VERBOSE)
+ except re.error,e:
+ print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e)
+ error = 1
+ continue
+
+ if debug:
+ print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__)
+
+ # Okay. The regular expression seemed okay. Let's append it to the master regular
+ # expression we're building
+
+ if (regex): regex += "|"
+ regex += "(?P<%s>%s)" % (f.__name__,f.__doc__)
+ else:
+ print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__)
+
+ # Now add all of the simple rules
+ for name,r in ssymbols:
+
+ if name == 't_ignore':
+ lexer.lexignore = r
+ continue
+
+ if not optimize:
+ if name == 't_error':
+ raise SyntaxError,"lex: Rule 't_error' must be defined as a function"
+ error = 1
+ continue
+
+ if not lexer.lextokens.has_key(name[2:]):
+ print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:])
+ error = 1
+ continue
+ try:
+ c = re.compile(r,re.VERBOSE)
+ except re.error,e:
+ print "lex: Invalid regular expression for rule '%s'. %s" % (name,e)
+ error = 1
+ continue
+ if debug:
+ print "lex: Adding rule %s -> '%s'" % (name,r)
+
+ if regex: regex += "|"
+ regex += "(?P<%s>%s)" % (name,r)
+
+ if not optimize:
+ for f in files.keys():
+ if not validate_file(f):
+ error = 1
+ try:
+ if debug:
+ print "lex: regex = '%s'" % regex
+ lexer.lexre = re.compile(regex, re.VERBOSE)
+
+ # Build the index to function map for the matching engine
+ lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1)
+ for f,i in lexer.lexre.groupindex.items():
+ handle = ldict[f]
+ if isinstance(handle,types.FunctionType):
+ lexer.lexindexfunc[i] = (handle,handle.__name__[2:])
+ else:
+ # If rule was specified as a string, we build an anonymous
+ # callback function to carry out the action
+ lexer.lexindexfunc[i] = (None,f[2:])
+
+ # If a lextab was specified, we create a file containing the precomputed
+ # regular expression and index table
+
+ if lextab and optimize:
+ lt = open(lextab+".py","w")
+ lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab)
+ lt.write("_lexre = %s\n" % repr(regex))
+ lt.write("_lextab = [\n");
+ for i in range(0,len(lexer.lexindexfunc)):
+ t = lexer.lexindexfunc[i]
+ if t:
+ if t[0]:
+ lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1])))
+ else:
+ lt.write(" (None,%s),\n" % repr(t[1]))
+ else:
+ lt.write(" None,\n")
+
+ lt.write("]\n");
+ lt.write("_lextokens = %s\n" % repr(lexer.lextokens))
+ lt.write("_lexignore = %s\n" % repr(lexer.lexignore))
+ if (lexer.lexerrorf):
+ lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__))
+ else:
+ lt.write("_lexerrorf = None\n")
+ lt.close()
+
+ except re.error,e:
+ print "lex: Fatal error. Unable to compile regular expression rules. %s" % e
+ error = 1
+ if error:
+ raise SyntaxError,"lex: Unable to build lexer."
+ if not lexer.lexerrorf:
+ print "lex: Warning. no t_error rule is defined."
+
+ if not lexer.lexignore: lexer.lexignore = ""
+
+ # Create global versions of the token() and input() functions
+ token = lexer.token
+ input = lexer.input
+
+ return lexer
+
+# -----------------------------------------------------------------------------
+# run()
+#
+# This runs the lexer as a main program
+# -----------------------------------------------------------------------------
+
+def runmain(lexer=None,data=None):
+ if not data:
+ try:
+ filename = sys.argv[1]
+ f = open(filename)
+ data = f.read()
+ f.close()
+ except IndexError:
+ print "Reading from standard input (type EOF to end):"
+ data = sys.stdin.read()
+
+ if lexer:
+ _input = lexer.input
+ else:
+ _input = input
+ _input(data)
+ if lexer:
+ _token = lexer.token
+ else:
+ _token = token
+
+ while 1:
+ tok = _token()
+ if not tok: break
+ print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno)
+
+
+
+
--- /dev/null
+
+# parsetab.py
+# This file is automatically generated. Do not edit.
+
+_lr_method = 'SLR'
+
+_lr_signature = '\xd4p\x02\xe6^\xeb\x1b\xa7\xf4\r\xb2\xf7!\xf32\x1d'
+
+_lr_action_items = {'AND':([13,1,6,2,15,17,16,14,3,5,4,],[11,-11,-5,-8,-7,-4,-3,-6,-10,11,-9,]),'QUOTEDVALUE':([8,10,12,0,9,11,],[1,1,1,1,1,1,]),'ATTR':([12,0,8,11,],[2,2,2,2,]),'CLASS':([0,],[8,]),'NEQ':([2,],[10,]),'EQ':([2,],[9,]),'OR':([1,15,13,6,4,2,14,5,17,16,3,],[-11,-7,12,-5,-9,-8,-6,12,-4,-3,-10,]),'VALUE':([11,9,12,10,8,0,],[3,3,3,3,3,3,]),'$':([17,14,2,5,3,7,13,16,15,1,4,6,],[-4,-6,-8,-2,-10,0,-1,-3,-7,-11,-9,-5,]),}
+
+_lr_action = { }
+for _k, _v in _lr_action_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_action[(_x,_k)] = _y
+del _lr_action_items
+
+_lr_goto_items = {'querystr':([0,11,12,8,],[6,16,17,6,]),'total':([0,],[7,]),'value':([12,11,10,8,9,0,],[4,4,15,4,14,4,]),'query':([8,0,],[13,5,]),}
+
+_lr_goto = { }
+for _k, _v in _lr_goto_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_goto[(_x,_k)] = _y
+del _lr_goto_items
+_lr_productions = [
+ ("S'",1,None,None,None),
+ ('total',2,'p_total_class_query','QueryParser.py',74),
+ ('total',1,'p_total_query','QueryParser.py',80),
+ ('query',3,'p_query_oper_querystr','QueryParser.py',86),
+ ('query',3,'p_query_oper_querystr','QueryParser.py',87),
+ ('query',1,'p_query_querystr','QueryParser.py',97),
+ ('querystr',3,'p_querystr_attr_value','QueryParser.py',104),
+ ('querystr',3,'p_querystr_attr_value','QueryParser.py',105),
+ ('querystr',1,'p_querystr_attr','QueryParser.py',110),
+ ('querystr',1,'p_querystr_value','QueryParser.py',115),
+ ('value',1,'p_value','QueryParser.py',121),
+ ('value',1,'p_quotedvalue','QueryParser.py',128),
+]
--- /dev/null
+#-----------------------------------------------------------------------------
+# ply: yacc.py
+#
+# Author: David M. Beazley (beazley@cs.uchicago.edu)
+# Department of Computer Science
+# University of Chicago
+# Chicago, IL 60637
+#
+# Copyright (C) 2001, David M. Beazley
+#
+# $Header: /home/davidb/src/cvs/python-rwhoisd/rwhoisd/yacc.py,v 1.1 2003/04/22 02:19:07 davidb Exp $
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# See the file COPYING for a complete copy of the LGPL.
+#
+#
+# This implements an LR parser that is constructed from grammar rules defined
+# as Python functions. Roughly speaking, this module is a cross between
+# John Aycock's Spark system and the GNU bison utility.
+#
+# Disclaimer: This is a work in progress. SLR parsing seems to work fairly
+# well and there is extensive error checking. LALR(1) is in progress. The
+# rest of this file is a bit of a mess. Please pardon the dust.
+#
+# The current implementation is only somewhat object-oriented. The
+# LR parser itself is defined in terms of an object (which allows multiple
+# parsers to co-exist). However, most of the variables used during table
+# construction are defined in terms of global variables. Users shouldn't
+# notice unless they are trying to define multiple parsers at the same
+# time using threads (in which case they should have their head examined).
+#-----------------------------------------------------------------------------
+
+__version__ = "1.3"
+
+#-----------------------------------------------------------------------------
+# === User configurable parameters ===
+#
+# Change these to modify the default behavior of yacc (if you wish)
+#-----------------------------------------------------------------------------
+
+yaccdebug = 1 # Debugging mode. If set, yacc generates a
+ # a 'parser.out' file in the current directory
+
+debug_file = 'parser.out' # Default name of the debugging file
+tab_module = 'parsetab' # Default name of the table module
+default_lr = 'SLR' # Default LR table generation method
+
+error_count = 3 # Number of symbols that must be shifted to leave recovery mode
+
+import re, types, sys, cStringIO, md5, os.path
+
+# Exception raised for yacc-related errors
+class YaccError(Exception): pass
+
+#-----------------------------------------------------------------------------
+# === LR Parsing Engine ===
+#
+# The following classes are used for the LR parser itself. These are not
+# used during table construction and are independent of the actual LR
+# table generation algorithm
+#-----------------------------------------------------------------------------
+
+# This class is used to hold non-terminal grammar symbols during parsing.
+# It normally has the following attributes set:
+# .type = Grammar symbol type
+# .value = Symbol value
+# .lineno = Starting line number
+# .endlineno = Ending line number (optional, set automatically)
+
+class YaccSymbol:
+ def __str__(self): return self.type
+ def __repr__(self): return str(self)
+
+# This class is a wrapper around the objects actually passed to each
+# grammar rule. Index lookup and assignment actually assign the
+# .value attribute of the underlying YaccSymbol object.
+# The lineno() method returns the line number of a given
+# item (or 0 if not defined). The linespan() method returns
+# a tuple of (startline,endline) representing the range of lines
+# for a symbol.
+
+class YaccSlice:
+ def __init__(self,s):
+ self.slice = s
+ self.pbstack = []
+
+ def __getitem__(self,n):
+ return self.slice[n].value
+
+ def __setitem__(self,n,v):
+ self.slice[n].value = v
+
+ def lineno(self,n):
+ return getattr(self.slice[n],"lineno",0)
+
+ def linespan(self,n):
+ startline = getattr(self.slice[n],"lineno",0)
+ endline = getattr(self.slice[n],"endlineno",startline)
+ return startline,endline
+
+ def pushback(self,n):
+ if n <= 0:
+ raise ValueError, "Expected a positive value"
+ if n > (len(self.slice)-1):
+ raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
+ for i in range(0,n):
+ self.pbstack.append(self.slice[-i-1])
+
+# The LR Parsing engine. This is defined as a class so that multiple parsers
+# can exist in the same process. A user never instantiates this directly.
+# Instead, the global yacc() function should be used to create a suitable Parser
+# object.
+
+class Parser:
+ def __init__(self,magic=None):
+
+ # This is a hack to keep users from trying to instantiate a Parser
+ # object directly.
+
+ if magic != "xyzzy":
+ raise YaccError, "Can't instantiate Parser. Use yacc() instead."
+
+ # Reset internal state
+ self.productions = None # List of productions
+ self.errorfunc = None # Error handling function
+ self.action = { } # LR Action table
+ self.goto = { } # LR goto table
+ self.require = { } # Attribute require table
+ self.method = "Unknown LR" # Table construction method used
+
+ def errok(self):
+ self.errorcount = 0
+
+ def restart(self):
+ del self.statestack[:]
+ del self.symstack[:]
+ sym = YaccSymbol()
+ sym.type = '$'
+ self.symstack.append(sym)
+ self.statestack.append(0)
+
+ def parse(self,input=None,lexer=None,debug=0):
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [ ] # Stack of lookahead symbols
+ actions = self.action # Local reference to action table
+ goto = self.goto # Local reference to goto table
+ prod = self.productions # Local reference to production list
+ pslice = YaccSlice(None) # Slice object passed to grammar rules
+ pslice.parser = self # Parser object
+ self.errorcount = 0 # Used during error recovery
+
+ # If no lexer was given, we will try to use the lex module
+ if not lexer:
+ import lex as lexer
+
+ pslice.lexer = lexer
+
+ # If input was supplied, pass to lexer
+ if input:
+ lexer.input(input)
+
+ # Tokenize function
+ get_token = lexer.token
+
+ statestack = [ ] # Stack of parsing states
+ self.statestack = statestack
+ symstack = [ ] # Stack of grammar symbols
+ self.symstack = symstack
+
+ errtoken = None # Err token
+
+ # The start state is assumed to be (0,$)
+ statestack.append(0)
+ sym = YaccSymbol()
+ sym.type = '$'
+ symstack.append(sym)
+
+ while 1:
+ # Get the next symbol on the input. If a lookahead symbol
+ # is already set, we just use that. Otherwise, we'll pull
+ # the next token off of the lookaheadstack or from the lexer
+ if not lookahead:
+ if not lookaheadstack:
+ lookahead = get_token() # Get the next token
+ else:
+ lookahead = lookaheadstack.pop()
+ if not lookahead:
+ lookahead = YaccSymbol()
+ lookahead.type = '$'
+ if debug:
+ print "%-20s : %s" % (lookahead, [xx.type for xx in symstack])
+
+ # Check the action table
+ s = statestack[-1]
+ ltype = lookahead.type
+ t = actions.get((s,ltype),None)
+
+ if t is not None:
+ if t > 0:
+ # shift a symbol on the stack
+ if ltype == '$':
+ # Error, end of input
+ print "yacc: Parse error. EOF"
+ return
+ statestack.append(t)
+ symstack.append(lookahead)
+ lookahead = None
+
+ # Decrease error count on successful shift
+ if self.errorcount > 0:
+ self.errorcount -= 1
+
+ continue
+
+ if t < 0:
+ # reduce a symbol on the stack, emit a production
+ p = prod[-t]
+ pname = p.name
+ plen = p.len
+
+ # Get production function
+ sym = YaccSymbol()
+ sym.type = pname # Production name
+ sym.value = None
+
+ if plen:
+ targ = symstack[-plen-1:]
+ targ[0] = sym
+ try:
+ sym.lineno = targ[1].lineno
+ sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
+ except AttributeError:
+ sym.lineno = 0
+ del symstack[-plen:]
+ del statestack[-plen:]
+ else:
+ sym.lineno = 0
+ targ = [ sym ]
+ pslice.slice = targ
+ pslice.pbstack = []
+ # Call the grammar rule with our special slice object
+ p.func(pslice)
+
+ # Validate attributes of the resulting value attribute
+# if require:
+# try:
+# t0 = targ[0]
+# r = Requires.get(t0.type,None)
+# t0d = t0.__dict__
+# if r:
+# for field in r:
+# tn = t0
+# for fname in field:
+# try:
+# tf = tn.__dict__
+# tn = tf.get(fname)
+# except StandardError:
+# tn = None
+# if not tn:
+# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \
+# (p.file,p.line,p.name,".".join(field))
+# except TypeError,LookupError:
+# print "Bad requires directive " % r
+# pass
+
+
+ # If there was a pushback, put that on the stack
+ if pslice.pbstack:
+ lookaheadstack.append(lookahead)
+ for _t in pslice.pbstack:
+ lookaheadstack.append(_t)
+ lookahead = None
+
+ symstack.append(sym)
+ statestack.append(goto[statestack[-1],pname])
+ continue
+
+ if t == 0:
+ n = symstack[-1]
+ return getattr(n,"value",None)
+
+ if t == None:
+ # We have some kind of parsing error here. To handle this,
+ # we are going to push the current token onto the tokenstack
+ # and replace it with an 'error' token. If there are any synchronization
+ # rules, they may catch it.
+ #
+ # In addition to pushing the error token, we call call the user defined p_error()
+ # function if this is the first syntax error. This function is only called
+ # if errorcount == 0.
+
+ if not self.errorcount:
+ self.errorcount = error_count
+ errtoken = lookahead
+ if errtoken.type == '$':
+ errtoken = None # End of file!
+ if self.errorfunc:
+ global errok,token,restart
+ errok = self.errok # Set some special functions available in error recovery
+ token = get_token
+ restart = self.restart
+ tok = self.errorfunc(errtoken)
+ del errok, token, restart # Delete special functions
+
+ if not self.errorcount:
+ # User must have done some kind of panic mode recovery on their own. The returned token
+ # is the next lookahead
+ lookahead = tok
+ errtoken = None
+ continue
+ else:
+ if errtoken:
+ if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
+ else: lineno = 0
+ if lineno:
+ print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type)
+ else:
+ print "yacc: Syntax error, token=%s" % errtoken.type
+ else:
+ print "yacc: Parse error in input. EOF"
+ return
+
+ else:
+ self.errorcount = error_count
+
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
+ # entire parse has been rolled back and we're completely hosed. The token is
+ # discarded and we just keep going.
+
+ if len(statestack) <= 1 and lookahead.type != '$':
+ lookahead = None
+ errtoken = None
+ # Nuke the pushback stack
+ del lookaheadstack[:]
+ continue
+
+ # case 2: the statestack has a couple of entries on it, but we're
+ # at the end of the file. nuke the top entry and generate an error token
+
+ # Start nuking entries on the stack
+ if lookahead.type == '$':
+ # Whoa. We're really hosed here. Bail out
+ return
+
+ if lookahead.type != 'error':
+ sym = symstack[-1]
+ if sym.type == 'error':
+ # Hmmm. Error is on top of stack, we'll just nuke input
+ # symbol and continue
+ lookahead = None
+ continue
+ t = YaccSymbol()
+ t.type = 'error'
+ if hasattr(lookahead,"lineno"):
+ t.lineno = lookahead.lineno
+ t.value = lookahead
+ lookaheadstack.append(lookahead)
+ lookahead = t
+ else:
+ symstack.pop()
+ statestack.pop()
+
+ continue
+
+ # Call an error function here
+ raise RuntimeError, "yacc: internal parser error!!!\n"
+
+# -----------------------------------------------------------------------------
+# === Parser Construction ===
+#
+# The following functions and variables are used to implement the yacc() function
+# itself. This is pretty hairy stuff involving lots of error checking,
+# construction of LR items, kernels, and so forth. Although a lot of
+# this work is done using global variables, the resulting Parser object
+# is completely self contained--meaning that it is safe to repeatedly
+# call yacc() with different grammars in the same application.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# validate_file()
+#
+# This function checks to see if there are duplicated p_rulename() functions
+# in the parser module file. Without this function, it is really easy for
+# users to make mistakes by cutting and pasting code fragments (and it's a real
+# bugger to try and figure out why the resulting parser doesn't work). Therefore,
+# we just do a little regular expression pattern matching of def statements
+# to try and detect duplicates.
+# -----------------------------------------------------------------------------
+
+def validate_file(filename):
+ base,ext = os.path.splitext(filename)
+ if ext != '.py': return 1 # No idea. Assume it's okay.
+
+ try:
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ return 1 # Oh well
+
+ # Match def p_funcname(
+ fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
+ counthash = { }
+ linen = 1
+ noerror = 1
+ for l in lines:
+ m = fre.match(l)
+ if m:
+ name = m.group(1)
+ prev = counthash.get(name)
+ if not prev:
+ counthash[name] = linen
+ else:
+ print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
+ noerror = 0
+ linen += 1
+ return noerror
+
+# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
+def validate_dict(d):
+ for n,v in d.items():
+ if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue
+ if n[0:2] == 't_': continue
+
+ if n[0:2] == 'p_':
+ print "yacc: Warning. '%s' not defined as a function" % n
+ if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
+ try:
+ doc = v.__doc__.split(" ")
+ if doc[1] == ':':
+ print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n)
+ except StandardError:
+ pass
+
+# -----------------------------------------------------------------------------
+# === GRAMMAR FUNCTIONS ===
+#
+# The following global variables and functions are used to store, manipulate,
+# and verify the grammar rules specified by the user.
+# -----------------------------------------------------------------------------
+
+# Initialize all of the global variables used during grammar construction
+def initialize_vars():
+ global Productions, Prodnames, Prodmap, Terminals
+ global Nonterminals, First, Follow, Precedence, LRitems
+ global Errorfunc, Signature, Requires
+
+ Productions = [None] # A list of all of the productions. The first
+ # entry is always reserved for the purpose of
+ # building an augmented grammar
+
+ Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
+ # productions of that nonterminal.
+
+ Prodmap = { } # A dictionary that is only used to detect duplicate
+ # productions.
+
+ Terminals = { } # A dictionary mapping the names of terminal symbols to a
+ # list of the rules where they are used.
+
+ Nonterminals = { } # A dictionary mapping names of nonterminals to a list
+ # of rule numbers where they are used.
+
+ First = { } # A dictionary of precomputed FIRST(x) symbols
+
+ Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
+
+ Precedence = { } # Precedence rules for each terminal. Contains tuples of the
+ # form ('right',level) or ('nonassoc', level) or ('left',level)
+
+ LRitems = [ ] # A list of all LR items for the grammar. These are the
+ # productions with the "dot" like E -> E . PLUS E
+
+ Errorfunc = None # User defined error handler
+
+ Signature = md5.new() # Digital signature of the grammar rules, precedence
+ # and other information. Used to determined when a
+ # parsing table needs to be regenerated.
+
+ Requires = { } # Requires list
+
+ # File objects used when creating the parser.out debugging file
+ global _vf, _vfc
+ _vf = cStringIO.StringIO()
+ _vfc = cStringIO.StringIO()
+
+# -----------------------------------------------------------------------------
+# class Production:
+#
+# This class stores the raw information about a single production or grammar rule.
+# It has a few required attributes:
+#
+# name - Name of the production (nonterminal)
+# prod - A list of symbols making up its production
+# number - Production number.
+#
+# In addition, a few additional attributes are used to help with debugging or
+# optimization of table generation.
+#
+# file - File where production action is defined.
+# lineno - Line number where action is defined
+# func - Action function
+# prec - Precedence level
+# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
+# then lr_next refers to 'E -> E PLUS . E'
+# lr_index - LR item index (location of the ".") in the prod list.
+# len - Length of the production (number of symbols on right hand side)
+# -----------------------------------------------------------------------------
+
+class Production:
+ def __init__(self,**kw):
+ for k,v in kw.items():
+ setattr(self,k,v)
+ self.lr_index = -1
+ self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure
+ self.usyms = [ ]
+
+ def __str__(self):
+ if self.prod:
+ s = "%s -> %s" % (self.name," ".join(self.prod))
+ else:
+ s = "%s -> <empty>" % self.name
+ return s
+
+ def __repr__(self):
+ return str(self)
+
+ # Compute lr_items from the production
+ def lr_item(self,n):
+ if n > len(self.prod): return None
+ p = Production()
+ p.name = self.name
+ p.prod = list(self.prod)
+ p.number = self.number
+ p.lr_index = n
+ p.prod.insert(n,".")
+ p.prod = tuple(p.prod)
+ p.len = len(p.prod)
+ p.usyms = self.usyms
+
+ # Precompute list of productions immediately following
+ try:
+ p.lrafter = Prodnames[p.prod[n+1]]
+ except (IndexError,KeyError),e:
+ p.lrafter = []
+ try:
+ p.lrbefore = p.prod[n-1]
+ except IndexError:
+ p.lrbefore = None
+
+ return p
+
+class MiniProduction:
+ pass
+
+# Utility function
+def is_identifier(s):
+ for c in s:
+ if not (c.isalnum() or c == '_'): return 0
+ return 1
+
+# -----------------------------------------------------------------------------
+# add_production()
+#
+# Given an action function, this function assembles a production rule.
+# The production rule is assumed to be found in the function's docstring.
+# This rule has the general syntax:
+#
+# name1 ::= production1
+# | production2
+# | production3
+# ...
+# | productionn
+# name2 ::= production1
+# | production2
+# ...
+# -----------------------------------------------------------------------------
+
+def add_production(f,file,line,prodname,syms):
+
+ if Terminals.has_key(prodname):
+ print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname)
+ return -1
+ if prodname == 'error':
+ print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname)
+ return -1
+
+ if not is_identifier(prodname):
+ print "%s:%d: Illegal rule name '%s'" % (file,line,prodname)
+ return -1
+
+ for s in syms:
+ if not is_identifier(s) and s != '%prec':
+ print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)
+ return -1
+
+ # See if the rule is already in the rulemap
+ map = "%s -> %s" % (prodname,syms)
+ if Prodmap.has_key(map):
+ m = Prodmap[map]
+ print "%s:%d: Duplicate rule %s." % (file,line, m)
+ print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line)
+ return -1
+
+ p = Production()
+ p.name = prodname
+ p.prod = syms
+ p.file = file
+ p.line = line
+ p.func = f
+ p.number = len(Productions)
+
+
+ Productions.append(p)
+ Prodmap[map] = p
+ if not Nonterminals.has_key(prodname):
+ Nonterminals[prodname] = [ ]
+
+ # Add all terminals to Terminals
+ i = 0
+ while i < len(p.prod):
+ t = p.prod[i]
+ if t == '%prec':
+ try:
+ precname = p.prod[i+1]
+ except IndexError:
+ print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line)
+ return -1
+
+ prec = Precedence.get(precname,None)
+ if not prec:
+ print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname)
+ return -1
+ else:
+ p.prec = prec
+ del p.prod[i]
+ del p.prod[i]
+ continue
+
+ if Terminals.has_key(t):
+ Terminals[t].append(p.number)
+ # Is a terminal. We'll assign a precedence to p based on this
+ if not hasattr(p,"prec"):
+ p.prec = Precedence.get(t,('right',0))
+ else:
+ if not Nonterminals.has_key(t):
+ Nonterminals[t] = [ ]
+ Nonterminals[t].append(p.number)
+ i += 1
+
+ if not hasattr(p,"prec"):
+ p.prec = ('right',0)
+
+ # Set final length of productions
+ p.len = len(p.prod)
+ p.prod = tuple(p.prod)
+
+ # Calculate unique syms in the production
+ p.usyms = [ ]
+ for s in p.prod:
+ if s not in p.usyms:
+ p.usyms.append(s)
+
+ # Add to the global productions list
+ try:
+ Prodnames[p.name].append(p)
+ except KeyError:
+ Prodnames[p.name] = [ p ]
+ return 0
+
+# Given a raw rule function, this function rips out its doc string
+# and adds rules to the grammar
+
+def add_function(f):
+ line = f.func_code.co_firstlineno
+ file = f.func_code.co_filename
+ error = 0
+
+ if f.func_code.co_argcount > 1:
+ print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
+ return -1
+
+ if f.func_code.co_argcount < 1:
+ print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
+ return -1
+
+ if f.__doc__:
+ # Split the doc string into lines
+ pstrings = f.__doc__.splitlines()
+ lastp = None
+ dline = line
+ for ps in pstrings:
+ dline += 1
+ p = ps.split()
+ if not p: continue
+ try:
+ if p[0] == '|':
+ # This is a continuation of a previous rule
+ if not lastp:
+ print "%s:%d: Misplaced '|'." % (file,dline)
+ return -1
+ prodname = lastp
+ if len(p) > 1:
+ syms = p[1:]
+ else:
+ syms = [ ]
+ else:
+ prodname = p[0]
+ lastp = prodname
+ assign = p[1]
+ if len(p) > 2:
+ syms = p[2:]
+ else:
+ syms = [ ]
+ if assign != ':' and assign != '::=':
+ print "%s:%d: Syntax error. Expected ':'" % (file,dline)
+ return -1
+ e = add_production(f,file,dline,prodname,syms)
+ error += e
+ except StandardError:
+ print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps)
+ error -= 1
+ else:
+ print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__)
+ return error
+
+
+# Cycle checking code (Michael Dyck)
+
+def compute_reachable():
+ '''
+ Find each symbol that can be reached from the start symbol.
+ Print a warning for any nonterminals that can't be reached.
+ (Unused terminals have already had their warning.)
+ '''
+ Reachable = { }
+ for s in Terminals.keys() + Nonterminals.keys():
+ Reachable[s] = 0
+
+ mark_reachable_from( Productions[0].prod[0], Reachable )
+
+ for s in Nonterminals.keys():
+ if not Reachable[s]:
+ print "yacc: Symbol '%s' is unreachable." % s
+
+def mark_reachable_from(s, Reachable):
+ '''
+ Mark all symbols that are reachable from symbol s.
+ '''
+ if Reachable[s]:
+ # We've already reached symbol s.
+ return
+ Reachable[s] = 1
+ for p in Prodnames.get(s,[]):
+ for r in p.prod:
+ mark_reachable_from(r, Reachable)
+
+# -----------------------------------------------------------------------------
+# compute_terminates()
+#
+# This function looks at the various parsing rules and tries to detect
+# infinite recursion cycles (grammar rules where there is no possible way
+# to derive a string of only terminals).
+# -----------------------------------------------------------------------------
+def compute_terminates():
+ '''
+ Raise an error for any symbols that don't terminate.
+ '''
+ Terminates = {}
+
+ # Terminals:
+ for t in Terminals.keys():
+ Terminates[t] = 1
+
+ Terminates['$'] = 1
+
+ # Nonterminals:
+
+ # Initialize to false:
+ for n in Nonterminals.keys():
+ Terminates[n] = 0
+
+ # Then propagate termination until no change:
+ while 1:
+ some_change = 0
+ for (n,pl) in Prodnames.items():
+ # Nonterminal n terminates iff any of its productions terminates.
+ for p in pl:
+ # Production p terminates iff all of its rhs symbols terminate.
+ for s in p.prod:
+ if not Terminates[s]:
+ # The symbol s does not terminate,
+ # so production p does not terminate.
+ p_terminates = 0
+ break
+ else:
+ # didn't break from the loop,
+ # so every symbol s terminates
+ # so production p terminates.
+ p_terminates = 1
+
+ if p_terminates:
+ # symbol n terminates!
+ if not Terminates[n]:
+ Terminates[n] = 1
+ some_change = 1
+ # Don't need to consider any more productions for this n.
+ break
+
+ if not some_change:
+ break
+
+ some_error = 0
+ for (s,terminates) in Terminates.items():
+ if not terminates:
+ if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
+ # s is used-but-not-defined, and we've already warned of that,
+ # so it would be overkill to say that it's also non-terminating.
+ pass
+ else:
+ print "yacc: Infinite recursion detected for symbol '%s'." % s
+ some_error = 1
+
+ return some_error
+
+# -----------------------------------------------------------------------------
+# verify_productions()
+#
+# This function examines all of the supplied rules to see if they seem valid.
+# -----------------------------------------------------------------------------
+def verify_productions(cycle_check=1):
+ error = 0
+ for p in Productions:
+ if not p: continue
+
+ for s in p.prod:
+ if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
+ print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s)
+ error = 1
+ continue
+
+ unused_tok = 0
+ # Now verify all of the tokens
+ if yaccdebug:
+ _vf.write("Unused terminals:\n\n")
+ for s,v in Terminals.items():
+ if s != 'error' and not v:
+ print "yacc: Warning. Token '%s' defined, but not used." % s
+ if yaccdebug: _vf.write(" %s\n"% s)
+ unused_tok += 1
+
+ # Print out all of the productions
+ if yaccdebug:
+ _vf.write("\nGrammar\n\n")
+ for i in range(1,len(Productions)):
+ _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
+
+ unused_prod = 0
+ # Verify the use of all productions
+ for s,v in Nonterminals.items():
+ if not v:
+ p = Prodnames[s][0]
+ print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s)
+ unused_prod += 1
+
+
+ if unused_tok == 1:
+ print "yacc: Warning. There is 1 unused token."
+ if unused_tok > 1:
+ print "yacc: Warning. There are %d unused tokens." % unused_tok
+
+ if unused_prod == 1:
+ print "yacc: Warning. There is 1 unused rule."
+ if unused_prod > 1:
+ print "yacc: Warning. There are %d unused rules." % unused_prod
+
+ if yaccdebug:
+ _vf.write("\nTerminals, with rules where they appear\n\n")
+ ks = Terminals.keys()
+ ks.sort()
+ for k in ks:
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
+ _vf.write("\nNonterminals, with rules where they appear\n\n")
+ ks = Nonterminals.keys()
+ ks.sort()
+ for k in ks:
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
+
+ if (cycle_check):
+ compute_reachable()
+ error += compute_terminates()
+# error += check_cycles()
+ return error
+
+# -----------------------------------------------------------------------------
+# build_lritems()
+#
+# This function walks the list of productions and builds a complete set of the
+# LR items. The LR items are stored in two ways: First, they are uniquely
+# numbered and placed in the list _lritems. Second, a linked list of LR items
+# is built for each production. For example:
+#
+# E -> E PLUS E
+#
+# Creates the list
+#
+# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
+# -----------------------------------------------------------------------------
+
+def build_lritems():
+ for p in Productions:
+ lastlri = p
+ lri = p.lr_item(0)
+ i = 0
+ while 1:
+ lri = p.lr_item(i)
+ lastlri.lr_next = lri
+ if not lri: break
+ lri.lr_num = len(LRitems)
+ LRitems.append(lri)
+ lastlri = lri
+ i += 1
+
+ # In order for the rest of the parser generator to work, we need to
+ # guarantee that no more lritems are generated. Therefore, we nuke
+ # the p.lr_item method. (Only used in debugging)
+ # Production.lr_item = None
+
+# -----------------------------------------------------------------------------
+# add_precedence()
+#
+# Given a list of precedence rules, add to the precedence table.
+# -----------------------------------------------------------------------------
+
+def add_precedence(plist):
+ plevel = 0
+ error = 0
+ for p in plist:
+ plevel += 1
+ try:
+ prec = p[0]
+ terms = p[1:]
+ if prec != 'left' and prec != 'right' and prec != 'nonassoc':
+ print "yacc: Invalid precedence '%s'" % prec
+ return -1
+ for t in terms:
+ if Precedence.has_key(t):
+ print "yacc: Precedence already specified for terminal '%s'" % t
+ error += 1
+ continue
+ Precedence[t] = (prec,plevel)
+ except:
+ print "yacc: Invalid precedence table."
+ error += 1
+
+ return error
+
+# -----------------------------------------------------------------------------
+# augment_grammar()
+#
+# Compute the augmented grammar. This is just a rule S' -> start where start
+# is the starting symbol.
+# -----------------------------------------------------------------------------
+
+def augment_grammar(start=None):
+ if not start:
+ start = Productions[1].name
+ Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
+ Productions[0].usyms = [ start ]
+ Nonterminals[start].append(0)
+
+
+# -------------------------------------------------------------------------
+# first()
+#
+# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
+#
+# During execution of compute_first1, the result may be incomplete.
+# Afterward (e.g., when called from compute_follow()), it will be complete.
+# -------------------------------------------------------------------------
+def first(beta):
+
+ # We are computing First(x1,x2,x3,...,xn)
+ result = [ ]
+ for x in beta:
+ x_produces_empty = 0
+
+ # Add all the non-<empty> symbols of First[x] to the result.
+ for f in First[x]:
+ if f == '<empty>':
+ x_produces_empty = 1
+ else:
+ if f not in result: result.append(f)
+
+ if x_produces_empty:
+ # We have to consider the next x in beta,
+ # i.e. stay in the loop.
+ pass
+ else:
+ # We don't have to consider any further symbols in beta.
+ break
+ else:
+ # There was no 'break' from the loop,
+ # so x_produces_empty was true for all x in beta,
+ # so beta produces empty as well.
+ result.append('<empty>')
+
+ return result
+
+
+# FOLLOW(x)
+# Given a non-terminal. This function computes the set of all symbols
+# that might follow it. Dragon book, p. 189.
+
+def compute_follow(start=None):
+ # Add '$' to the follow list of the start symbol
+ for k in Nonterminals.keys():
+ Follow[k] = [ ]
+
+ if not start:
+ start = Productions[1].name
+
+ Follow[start] = [ '$' ]
+
+ while 1:
+ didadd = 0
+ for p in Productions[1:]:
+ # Here is the production set
+ for i in range(len(p.prod)):
+ B = p.prod[i]
+ if Nonterminals.has_key(B):
+ # Okay. We got a non-terminal in a production
+ fst = first(p.prod[i+1:])
+ hasempty = 0
+ for f in fst:
+ if f != '<empty>' and f not in Follow[B]:
+ Follow[B].append(f)
+ didadd = 1
+ if f == '<empty>':
+ hasempty = 1
+ if hasempty or i == (len(p.prod)-1):
+ # Add elements of follow(a) to follow(b)
+ for f in Follow[p.name]:
+ if f not in Follow[B]:
+ Follow[B].append(f)
+ didadd = 1
+ if not didadd: break
+
+ if 0 and yaccdebug:
+ _vf.write('\nFollow:\n')
+ for k in Nonterminals.keys():
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
+
+# -------------------------------------------------------------------------
+# compute_first1()
+#
+# Compute the value of FIRST1(X) for all symbols
+# -------------------------------------------------------------------------
+def compute_first1():
+
+ # Terminals:
+ for t in Terminals.keys():
+ First[t] = [t]
+
+ First['$'] = ['$']
+ First['#'] = ['#'] # what's this for?
+
+ # Nonterminals:
+
+ # Initialize to the empty set:
+ for n in Nonterminals.keys():
+ First[n] = []
+
+ # Then propagate symbols until no change:
+ while 1:
+ some_change = 0
+ for n in Nonterminals.keys():
+ for p in Prodnames[n]:
+ for f in first(p.prod):
+ if f not in First[n]:
+ First[n].append( f )
+ some_change = 1
+ if not some_change:
+ break
+
+ if 0 and yaccdebug:
+ _vf.write('\nFirst:\n')
+ for k in Nonterminals.keys():
+ _vf.write("%-20s : %s\n" %
+ (k, " ".join([str(s) for s in First[k]])))
+
+# -----------------------------------------------------------------------------
+# === SLR Generation ===
+#
+# The following functions are used to construct SLR (Simple LR) parsing tables
+# as described on p.221-229 of the dragon book.
+# -----------------------------------------------------------------------------
+
+# Global variables for the LR parsing engine
+def lr_init_vars():
+ global _lr_action, _lr_goto, _lr_method
+ global _lr_goto_cache
+
+ _lr_action = { } # Action table
+ _lr_goto = { } # Goto table
+ _lr_method = "Unknown" # LR method used
+ _lr_goto_cache = { }
+
+# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
+# prodlist is a list of productions.
+
+_add_count = 0 # Counter used to detect cycles
+
+def lr0_closure(I):
+ global _add_count
+
+ _add_count += 1
+ prodlist = Productions
+
+ # Add everything in I to J
+ J = I[:]
+ didadd = 1
+ while didadd:
+ didadd = 0
+ for j in J:
+ for x in j.lrafter:
+ if x.lr0_added == _add_count: continue
+ # Add B --> .G to J
+ J.append(x.lr_next)
+ x.lr0_added = _add_count
+ didadd = 1
+
+ return J
+
+# Compute the LR(0) goto function goto(I,X) where I is a set
+# of LR(0) items and X is a grammar symbol. This function is written
+# in a way that guarantees uniqueness of the generated goto sets
+# (i.e. the same goto set will never be returned as two different Python
+# objects). With uniqueness, we can later do fast set comparisons using
+# id(obj) instead of element-wise comparison.
+
+def lr0_goto(I,x):
+ # First we look for a previously cached entry
+ g = _lr_goto_cache.get((id(I),x),None)
+ if g: return g
+
+ # Now we generate the goto set in a way that guarantees uniqueness
+ # of the result
+
+ s = _lr_goto_cache.get(x,None)
+ if not s:
+ s = { }
+ _lr_goto_cache[x] = s
+
+ gs = [ ]
+ for p in I:
+ n = p.lr_next
+ if n and n.lrbefore == x:
+ s1 = s.get(id(n),None)
+ if not s1:
+ s1 = { }
+ s[id(n)] = s1
+ gs.append(n)
+ s = s1
+ g = s.get('$',None)
+ if not g:
+ if gs:
+ g = lr0_closure(gs)
+ s['$'] = g
+ else:
+ s['$'] = gs
+ _lr_goto_cache[(id(I),x)] = g
+ return g
+
+# Compute the kernel of a set of LR(0) items
+def lr0_kernel(I):
+ KI = [ ]
+ for p in I:
+ if p.name == "S'" or p.lr_index > 0 or p.len == 0:
+ KI.append(p)
+
+ return KI
+
+_lr0_cidhash = { }
+
+# Compute the LR(0) sets of item function
+def lr0_items():
+
+ C = [ lr0_closure([Productions[0].lr_next]) ]
+ i = 0
+ for I in C:
+ _lr0_cidhash[id(I)] = i
+ i += 1
+
+ # Loop over the items in C and each grammar symbols
+ i = 0
+ while i < len(C):
+ I = C[i]
+ i += 1
+
+ # Collect all of the symbols that could possibly be in the goto(I,X) sets
+ asyms = { }
+ for ii in I:
+ for s in ii.usyms:
+ asyms[s] = None
+
+ for x in asyms.keys():
+ g = lr0_goto(I,x)
+ if not g: continue
+ if _lr0_cidhash.has_key(id(g)): continue
+ _lr0_cidhash[id(g)] = len(C)
+ C.append(g)
+
+ return C
+
+# -----------------------------------------------------------------------------
+# slr_parse_table()
+#
+# This function constructs an SLR table.
+# -----------------------------------------------------------------------------
+def slr_parse_table():
+ global _lr_method
+ goto = _lr_goto # Goto array
+ action = _lr_action # Action array
+ actionp = { } # Action production array (temporary)
+
+ _lr_method = "SLR"
+
+ n_srconflict = 0
+ n_rrconflict = 0
+
+ print "yacc: Generating SLR parsing table..."
+ if yaccdebug:
+ _vf.write("\n\nParsing method: SLR\n\n")
+
+ # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
+ # This determines the number of states
+
+ C = lr0_items()
+
+ # Build the parser table, state by state
+ st = 0
+ for I in C:
+ # Loop over each production in I
+ actlist = [ ] # List of actions
+
+ if yaccdebug:
+ _vf.write("\nstate %d\n\n" % st)
+ for p in I:
+ _vf.write(" (%d) %s\n" % (p.number, str(p)))
+ _vf.write("\n")
+
+ for p in I:
+ try:
+ if p.prod[-1] == ".":
+ if p.name == "S'":
+ # Start symbol. Accept!
+ action[st,"$"] = 0
+ actionp[st,"$"] = p
+ else:
+ # We are at the end of a production. Reduce!
+ for a in Follow[p.name]:
+ actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
+ r = action.get((st,a),None)
+ if r is not None:
+ # Whoa. Have a shift/reduce or reduce/reduce conflict
+ if r > 0:
+ # Need to decide on shift or reduce here
+ # By default we favor shifting. Need to add
+ # some precedence rules here.
+ sprec,slevel = Productions[actionp[st,a].number].prec
+ rprec,rlevel = Precedence.get(a,('right',0))
+ if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
+ # We really need to reduce here.
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ if not slevel and not rlevel:
+ _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
+ n_srconflict += 1
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
+ action[st,a] = None
+ else:
+ # Hmmm. Guess we'll keep the shift
+ if not slevel and not rlevel:
+ _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
+ n_srconflict +=1
+ elif r < 0:
+ # Reduce/reduce conflict. In this case, we favor the rule
+ # that was defined first in the grammar file
+ oldp = Productions[-r]
+ pp = Productions[p.number]
+ if oldp.line > pp.line:
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ # print "Reduce/reduce conflict in state %d" % st
+ n_rrconflict += 1
+ _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
+ _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
+ else:
+ print "Unknown conflict in state %d" % st
+ else:
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ else:
+ i = p.lr_index
+ a = p.prod[i+1] # Get symbol right after the "."
+ if Terminals.has_key(a):
+ g = lr0_goto(I,a)
+ j = _lr0_cidhash.get(id(g),-1)
+ if j >= 0:
+ # We are in a shift state
+ actlist.append((a,p,"shift and go to state %d" % j))
+ r = action.get((st,a),None)
+ if r is not None:
+ # Whoa have a shift/reduce or shift/shift conflict
+ if r > 0:
+ if r != j:
+ print "Shift/shift conflict in state %d" % st
+ elif r < 0:
+ # Do a precedence check.
+ # - if precedence of reduce rule is higher, we reduce.
+ # - if precedence of reduce is same and left assoc, we reduce.
+ # - otherwise we shift
+ rprec,rlevel = Productions[actionp[st,a].number].prec
+ sprec,slevel = Precedence.get(a,('right',0))
+ if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
+ # We decide to shift here... highest precedence to shift
+ action[st,a] = j
+ actionp[st,a] = p
+ if not slevel and not rlevel:
+ n_srconflict += 1
+ _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
+ action[st,a] = None
+ else:
+ # Hmmm. Guess we'll keep the reduce
+ if not slevel and not rlevel:
+ n_srconflict +=1
+ _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
+
+ else:
+ print "Unknown conflict in state %d" % st
+ else:
+ action[st,a] = j
+ actionp[st,a] = p
+
+ except StandardError,e:
+ raise YaccError, "Hosed in slr_parse_table", e
+
+ # Print the actions associated with each terminal
+ if yaccdebug:
+ for a,p,m in actlist:
+ if action.has_key((st,a)):
+ if p is actionp[st,a]:
+ _vf.write(" %-15s %s\n" % (a,m))
+ _vf.write("\n")
+ for a,p,m in actlist:
+ if action.has_key((st,a)):
+ if p is not actionp[st,a]:
+ _vf.write(" ! %-15s [ %s ]\n" % (a,m))
+
+ # Construct the goto table for this state
+ if yaccdebug:
+ _vf.write("\n")
+ nkeys = { }
+ for ii in I:
+ for s in ii.usyms:
+ if Nonterminals.has_key(s):
+ nkeys[s] = None
+ for n in nkeys.keys():
+ g = lr0_goto(I,n)
+ j = _lr0_cidhash.get(id(g),-1)
+ if j >= 0:
+ goto[st,n] = j
+ if yaccdebug:
+ _vf.write(" %-15s shift and go to state %d\n" % (n,j))
+
+ st += 1
+
+ if n_srconflict == 1:
+ print "yacc: %d shift/reduce conflict" % n_srconflict
+ if n_srconflict > 1:
+ print "yacc: %d shift/reduce conflicts" % n_srconflict
+ if n_rrconflict == 1:
+ print "yacc: %d reduce/reduce conflict" % n_rrconflict
+ if n_rrconflict > 1:
+ print "yacc: %d reduce/reduce conflicts" % n_rrconflict
+
+
+# -----------------------------------------------------------------------------
+# ==== LALR(1) Parsing ====
+# **** UNFINISHED! 6/16/01
+# -----------------------------------------------------------------------------
+
+
+# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where
+# p is a LR0 item and a is a terminal
+
+_lr1_add_count = 0
+
+def lr1_closure(I):
+ global _lr1_add_count
+
+ _lr1_add_count += 1
+
+ J = I[:]
+
+ # Loop over items (p,a) in I.
+ ji = 0
+ while ji < len(J):
+ p,a = J[ji]
+ # p = [ A -> alpha . B beta]
+
+ # For each production B -> gamma
+ for B in p.lr1_after:
+ f = tuple(p.lr1_beta + (a,))
+
+ # For each terminal b in first(Beta a)
+ for b in first(f):
+ # Check if (B -> . gamma, b) is in J
+ # Only way this can happen is if the add count mismatches
+ pn = B.lr_next
+ if pn.lr_added.get(b,0) == _lr1_add_count: continue
+ pn.lr_added[b] = _lr1_add_count
+ J.append((pn,b))
+ ji += 1
+
+ return J
+
+def lalr_parse_table():
+
+ # Compute some lr1 information about all of the productions
+ for p in LRitems:
+ try:
+ after = p.prod[p.lr_index + 1]
+ p.lr1_after = Prodnames[after]
+ p.lr1_beta = p.prod[p.lr_index + 2:]
+ except LookupError:
+ p.lr1_after = [ ]
+ p.lr1_beta = [ ]
+ p.lr_added = { }
+
+ # Compute the LR(0) items
+ C = lr0_items()
+ CK = []
+ for I in C:
+ CK.append(lr0_kernel(I))
+
+ print CK
+
+# -----------------------------------------------------------------------------
+# ==== LR Utility functions ====
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# _lr_write_tables()
+#
+# This function writes the LR parsing tables to a file
+# -----------------------------------------------------------------------------
+
+def lr_write_tables(modulename=tab_module):
+ filename = modulename + ".py"
+ try:
+ f = open(filename,"w")
+
+ f.write("""
+# %s
+# This file is automatically generated. Do not edit.
+
+_lr_method = %s
+
+_lr_signature = %s
+""" % (filename, repr(_lr_method), repr(Signature.digest())))
+
+ # Change smaller to 0 to go back to original tables
+ smaller = 1
+
+ # Factor out names to try and make smaller
+ if smaller:
+ items = { }
+
+ for k,v in _lr_action.items():
+ i = items.get(k[1])
+ if not i:
+ i = ([],[])
+ items[k[1]] = i
+ i[0].append(k[0])
+ i[1].append(v)
+
+ f.write("\n_lr_action_items = {")
+ for k,v in items.items():
+ f.write("%r:([" % k)
+ for i in v[0]:
+ f.write("%r," % i)
+ f.write("],[")
+ for i in v[1]:
+ f.write("%r," % i)
+
+ f.write("]),")
+ f.write("}\n")
+
+ f.write("""
+_lr_action = { }
+for _k, _v in _lr_action_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_action[(_x,_k)] = _y
+del _lr_action_items
+""")
+
+ else:
+ f.write("\n_lr_action = { ");
+ for k,v in _lr_action.items():
+ f.write("(%r,%r):%r," % (k[0],k[1],v))
+ f.write("}\n");
+
+ if smaller:
+ # Factor out names to try and make smaller
+ items = { }
+
+ for k,v in _lr_goto.items():
+ i = items.get(k[1])
+ if not i:
+ i = ([],[])
+ items[k[1]] = i
+ i[0].append(k[0])
+ i[1].append(v)
+
+ f.write("\n_lr_goto_items = {")
+ for k,v in items.items():
+ f.write("%r:([" % k)
+ for i in v[0]:
+ f.write("%r," % i)
+ f.write("],[")
+ for i in v[1]:
+ f.write("%r," % i)
+
+ f.write("]),")
+ f.write("}\n")
+
+ f.write("""
+_lr_goto = { }
+for _k, _v in _lr_goto_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_goto[(_x,_k)] = _y
+del _lr_goto_items
+""")
+ else:
+ f.write("\n_lr_goto = { ");
+ for k,v in _lr_goto.items():
+ f.write("(%r,%r):%r," % (k[0],k[1],v))
+ f.write("}\n");
+
+ # Write production table
+ f.write("_lr_productions = [\n")
+ for p in Productions:
+ if p:
+ if (p.func):
+ f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
+ else:
+ f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len))
+ else:
+ f.write(" None,\n")
+ f.write("]\n")
+ f.close()
+
+ except IOError,e:
+ print "Unable to create '%s'" % filename
+ print e
+ return
+
+def lr_read_tables(module=tab_module,optimize=0):
+ global _lr_action, _lr_goto, _lr_productions, _lr_method
+ try:
+ exec "import %s as parsetab" % module
+
+ if (optimize) or (Signature.digest() == parsetab._lr_signature):
+ _lr_action = parsetab._lr_action
+ _lr_goto = parsetab._lr_goto
+ _lr_productions = parsetab._lr_productions
+ _lr_method = parsetab._lr_method
+ return 1
+ else:
+ return 0
+
+ except (ImportError,AttributeError):
+ return 0
+
+# -----------------------------------------------------------------------------
+# yacc(module)
+#
+# Build the parser module
+# -----------------------------------------------------------------------------
+
+def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0):
+ global yaccdebug
+ yaccdebug = debug
+
+ initialize_vars()
+ files = { }
+ error = 0
+
+ # Add starting symbol to signature
+ if start:
+ Signature.update(start)
+
+ # Try to figure out what module we are working with
+ if module:
+ # User supplied a module object.
+ if not isinstance(module, types.ModuleType):
+ raise ValueError,"Expected a module"
+
+ ldict = module.__dict__
+
+ else:
+ # No module given. We might be able to get information from the caller.
+ # Throw an exception and unwind the traceback to get the globals
+
+ try:
+ raise RuntimeError
+ except RuntimeError:
+ e,b,t = sys.exc_info()
+ f = t.tb_frame
+ f = f.f_back # Walk out to our calling function
+ ldict = f.f_globals # Grab its globals dictionary
+
+ # If running in optimized mode. We're going to
+
+ if (optimize and lr_read_tables(tabmodule,1)):
+ # Read parse table
+ del Productions[:]
+ for p in _lr_productions:
+ if not p:
+ Productions.append(None)
+ else:
+ m = MiniProduction()
+ m.name = p[0]
+ m.len = p[1]
+ m.file = p[3]
+ m.line = p[4]
+ if p[2]:
+ m.func = ldict[p[2]]
+ Productions.append(m)
+
+ else:
+ # Get the tokens map
+ tokens = ldict.get("tokens",None)
+
+ if not tokens:
+ raise YaccError,"module does not define a list 'tokens'"
+ if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
+ raise YaccError,"tokens must be a list or tuple."
+
+ # Check to see if a requires dictionary is defined.
+ requires = ldict.get("require",None)
+ if requires:
+ if not (isinstance(requires,types.DictType)):
+ raise YaccError,"require must be a dictionary."
+
+ for r,v in requires.items():
+ try:
+ if not (isinstance(v,types.ListType)):
+ raise TypeError
+ v1 = [x.split(".") for x in v]
+ Requires[r] = v1
+ except StandardError:
+ print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
+
+
+ # Build the dictionary of terminals. We a record a 0 in the
+ # dictionary to track whether or not a terminal is actually
+ # used in the grammar
+
+ if 'error' in tokens:
+ print "yacc: Illegal token 'error'. Is a reserved word."
+ raise YaccError,"Illegal token name"
+
+ for n in tokens:
+ if Terminals.has_key(n):
+ print "yacc: Warning. Token '%s' multiply defined." % n
+ Terminals[n] = [ ]
+
+ Terminals['error'] = [ ]
+
+ # Get the precedence map (if any)
+ prec = ldict.get("precedence",None)
+ if prec:
+ if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
+ raise YaccError,"precedence must be a list or tuple."
+ add_precedence(prec)
+ Signature.update(repr(prec))
+
+ for n in tokens:
+ if not Precedence.has_key(n):
+ Precedence[n] = ('right',0) # Default, right associative, 0 precedence
+
+ # Look for error handler
+ ef = ldict.get('p_error',None)
+ if ef:
+ if not isinstance(ef,types.FunctionType):
+ raise YaccError,"'p_error' defined, but is not a function."
+ eline = ef.func_code.co_firstlineno
+ efile = ef.func_code.co_filename
+ files[efile] = None
+
+ if (ef.func_code.co_argcount != 1):
+ raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
+ global Errorfunc
+ Errorfunc = ef
+ else:
+ print "yacc: Warning. no p_error() function is defined."
+
+ # Get the list of built-in functions with p_ prefix
+ symbols = [ldict[f] for f in ldict.keys()
+ if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_'
+ and ldict[f].__name__ != 'p_error')]
+
+ # Check for non-empty symbols
+ if len(symbols) == 0:
+ raise YaccError,"no rules of the form p_rulename are defined."
+
+ # Sort the symbols by line number
+ symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
+
+ # Add all of the symbols to the grammar
+ for f in symbols:
+ if (add_function(f)) < 0:
+ error += 1
+ else:
+ files[f.func_code.co_filename] = None
+
+ # Make a signature of the docstrings
+ for f in symbols:
+ if f.__doc__:
+ Signature.update(f.__doc__)
+
+ lr_init_vars()
+
+ if error:
+ raise YaccError,"Unable to construct parser."
+
+ if not lr_read_tables(tabmodule):
+
+ # Validate files
+ for filename in files.keys():
+ if not validate_file(filename):
+ error = 1
+
+ # Validate dictionary
+ validate_dict(ldict)
+
+ if start and not Prodnames.has_key(start):
+ raise YaccError,"Bad starting symbol '%s'" % start
+
+ augment_grammar(start)
+ error = verify_productions(cycle_check=check_recursion)
+ otherfunc = [ldict[f] for f in ldict.keys()
+ if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')]
+
+ if error:
+ raise YaccError,"Unable to construct parser."
+
+ build_lritems()
+ compute_first1()
+ compute_follow(start)
+
+ if method == 'SLR':
+ slr_parse_table()
+ elif method == 'LALR1':
+ lalr_parse_table()
+ return
+ else:
+ raise YaccError, "Unknown parsing method '%s'" % method
+
+ lr_write_tables(tabmodule)
+
+ if yaccdebug:
+ try:
+ f = open(debug_file,"w")
+ f.write(_vfc.getvalue())
+ f.write("\n\n")
+ f.write(_vf.getvalue())
+ f.close()
+ except IOError,e:
+ print "yacc: can't create '%s'" % debug_file,e
+
+ # Made it here. Create a parser object and set up its internal state.
+ # Set global parse() method to bound method of parser object.
+
+ p = Parser("xyzzy")
+ p.productions = Productions
+ p.errorfunc = Errorfunc
+ p.action = _lr_action
+ p.goto = _lr_goto
+ p.method = _lr_method
+ p.require = Requires
+
+ global parse
+ parse = p.parse
+
+ # Clean up all of the globals we created
+ if (not optimize):
+ yacc_cleanup()
+ return p
+
+# yacc_cleanup function. Delete all of the global variables
+# used during table construction
+
+def yacc_cleanup():
+ global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
+ del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
+
+ global Productions, Prodnames, Prodmap, Terminals
+ global Nonterminals, First, Follow, Precedence, LRitems
+ global Errorfunc, Signature, Requires
+
+ del Productions, Prodnames, Prodmap, Terminals
+ del Nonterminals, First, Follow, Precedence, LRitems
+ del Errorfunc, Signature, Requires
+
+ global _vf, _vfc
+ del _vf, _vfc
+
+
+# Stub that raises an error if parsing is attempted without first calling yacc()
+def parse(*args,**kwargs):
+ raise YaccError, "yacc: No parser built with yacc()"
+
--- /dev/null
+## Data file:
+## This is much like a rwhoisd-1.5.x series data file. It consists
+## of "attribute: value" pairs, separated by lines of "---" or (for
+## this file format) blank lines. Unlike the rwhoisd file format,
+## the Class-Name attribute is required.
+
+ID: 666.10.0.0.0/8
+Class-Name: network
+Auth-Area: 10.0.0.0/8
+Network-Name: A-NET
+IP-Network: 10.0.0.0/8
+Organization:777.a.com
+Tech-Contact:222.a.com
+Admin-Contact:222.a.com
+Created: 19961022
+Updated: 19961023
+Updated-By: hostmaster@a.com
+
+ID:333.a.com
+Auth-Area:a.com
+Class-Name: domain
+Guardian:444.a.com
+Domain-Name: a.com
+Primary-Server:5551.a.com
+Secondary-Server:5552.a.com
+Organization:777.a.com
+Admin-Contact:222.a.com
+Tech-Contact:222.a.com
+Billing-Contact:222.a.com
+Created:19961022
+Updated:19961023
+Updated-By:hostmaster@a.com
+
+ID:222.a.com
+Auth-Area: a.com
+Class-Name: contact
+Name:Public, John Q.
+Email:johnq@a.com
+Type:I
+First-Name:John
+Last-Name:Public
+Phone:(847)-391-7926
+Fax:(847)-338-0340
+Organization:777.a.com
+See-Also:http://www.a.com/~johnq
+Created:11961022
+Updated:11961023
+Updated-By:hostmaster@a.com
+
+ID:223.a.com
+Auth-Area:a.com
+Class-Name: contact
+Name:Doe, Jane
+Email:janed@a.com
+Type:I
+First-Name:Jane
+Last-Name:Doe
+Last-Name:Doe
+Last-Name:Doe
+Phone:(847)-391-7943
+Fax:(847)-338-0340
+Organization:777.a.com
+Created:11961025
+Updated:11961025
+Updated-By:hostmaster@a.com
+
+ID: 5552.a.com
+Auth-Area: a.com
+Class-Name: host
+Host-Name: ns2.a.com
+IP-Address: 10.0.0.2
+Created: 19961022
+Updated: 19961023
+Updated-By: hostmaster@a.com
+
+ID: 777.a.com
+Auth-Area: a.com
+Class-Name: organization
+Org-Name: A Communications, Inc.
+Street-Address: #600 - 1380 Burrars St.
+City: Vaner
+State: CM
+Postal-Code: V6Z 2H3
+Country-Code: NL
+Phone: (401) 555-6721
+Created: 19961022
+Updated: 19961023
+Updated-By: hostmaster@a.com
+
+ID:888.a.com
+Auth-Area: a.com
+Class-Name: referral
+Guardian:444.a.com
+Referral:rwhois://rwhois.second.a.com:4321/Auth-Area=fddi.a.com
+Organization:777.a.com
+Referred-Auth-Area:fddi.a.com
+Created:19961022
+Updated:19961023
+Updated-By:hostmaster@a.com
--- /dev/null
+#10.131.252.227
+network 10.131.252/24
+network 24.210.66.0/24
+domain a.com
+domain domain-Name = a.com
+contact doe
+contact public and type = "I"
+contact public and type = O or doe and type = I
+contact Pub*
+contact pub*
+contact "Pub*"
+network 11/8
+network foo=bar
\ No newline at end of file
--- /dev/null
+network class-name = bar and foo OR ip-address != foo and "1 2 3 4" and First-Name = " Bob**"
+Bob
+network 10.0.0.0/8
+domain domain-name="example*" and domain-name=*ple.net and host-name="f*"
+domain organization="foo"
--- /dev/null
+## Schema description file:
+## consists of <attribute_name> = <index_type> pairs, one per line.
+## index_type is one of N, C, or A.
+## N = normal, string valued index.
+## C = cidr index (for ip addresses and netblocks)
+## A = all, both a normal and a cidr index
+
+domain-name = N
+email = N
+host-name = N
+ip-address = C
+ip-network = C
+last-name = N
+name = N
+network-name = N
+org-name = N
--- /dev/null
+#!/usr/bin/env python
+
+from distutils.core import setup
+import os
+
+setup(name="python-rwhoisd",
+ version="0.1",
+ description="A lightweight RWhois server written in Python",
+ author="David Blacka",
+ author_email="davidb@verisignlabs.com",
+ url="http://www.rwhois.net/",
+ packages=['rwhoisd'],
+ scripts=os.listdir('bin'),
+ data_files=[('sample_data',
+ ['sample_data/example_schema',
+ 'sample_data/example_data'])]
+ )