import bisect, types
import MemIndex, Cidr
from Rwhois import rwhoisobject

class MemDB:

    def __init__(self):

        # a dictonary holding the various attribute indexes.  The keys
        # are lowercase attribute names, values are MemIndex or
        # CidrMemIndex objects.
        self.indexes = {}

        # a dictonary holding the actual rwhoisobjects.  keys are
        # string IDs, values are rwhoisobject instances.
        self.main_index = {}

        # dictonary holding all of the seen attributes.  keys are
        # lowercase attribute names, value is a character indicating
        # the index type (if indexed), or None if not indexed.  Index
        # type characters a 'N' for normal string index, 'C' for CIDR
        # index.
        self.attrs = {}

        # Lists containing attribute names that have indexes by type.
        # This exists so unconstrained searches can just iterate over
        # them.
        self.normal_indexes = []
        self.cidr_indexes   = []

        # dictonary holding all of the seen class names.  keys are
        # lowercase classnames, value is always None.
        self.classes = {}

        # dictionary holding all of the seen auth-areas.  keys are
        # lowercase authority area names, value is always None.
        self.authareas = {}

    def init_schema(self, schema_file):
        """Initialize the schema from a schema file.  Currently the
        schema file is a list of 'attribute_name = index_type' pairs,
        one per line.  index_type is one of N or C, where N means a
        normal string index, and C means a CIDR index.

        It should be noted that this database implementation
        implements a global namespace for attributes, which isn't
        really correct according to RFC 2167.  RFC 2167 dictates that
        different authority area are actually autonomous and thus have
        separate schemas."""

        # initialize base schema

        self.attrs['id']         = "N"
        self.attrs['auth-area']  = None
        self.attrs['class-name'] = None
        self.attrs['updated']    = None
        self.attrs['referred-auth-area'] = "R"

        sf = open(schema_file, "r")

        for line in sf.xreadlines():
            line = line.strip()
            if not line or line.startswith("#"): continue

            attr, it = line.split("=")
            self.attrs[attr.strip().lower()] = it.strip()[0].upper()

        for attr, index_type in self.attrs.items():
            if index_type == "N":
                # normal index
                self.indexes[attr] = MemIndex.MemIndex()
                self.normal_indexes.append(attr)
            elif index_type == "A":
                # "all" index -- both a normal and a cidr index
                self.indexes[attr] = MemIndex.ComboMemIndex()
                self.normal_indexes.append(attr)
                self.cidr_indexes.append(attr)
            elif index_type == "R":
                # referral index, an all index that must be searched
                # explictly by attribute
                self.indexes[attr] = MemIndex.ComboMemIndex()
            elif index_type == "C":
                # a cidr index
                self.indexes[attr] = MemIndex.CidrMemIndex()
                self.cidr_indexes.append(attr)
        return

    def add_object(self, obj):
        """Add an rwhoisobject to the raw indexes, including the
        master index."""

        # add the object to the main index
        id = obj.getid()
        if not id: return
        id = id.lower()

        self.main_index[id] = obj

        for a,v in obj.items():
            # note the attribute.
            index_type = self.attrs.setdefault(a, None)
            v = v.lower()
            # make sure that we note the auth-area and class
            if a == 'auth-area':
                self.authareas.setdefault(v, None)
            elif a == 'class-name':
                self.classes.setdefault(v, None)

            if index_type:
                index = self.indexes[a]
                index.add(v, id)

    def load_data(self, data_file):
        """Load data from rwhoisd-style TXT files (i.e., attr:value,
        records separated with a "---" bare line)."""

        df = open(data_file, "r")
        obj = rwhoisobject()

        for line in df.xreadlines():
            line = line.strip()
            if line.startswith("#"): continue
            if not line or line.startswith("---"):
                # we've reached the end of an object, so index it.
                self.add_object(obj)
                # reset obj
                obj = rwhoisobject()
                continue

            a, v = line.split(":", 1)
            obj.add_attr(a, v.lstrip())

        self.add_object(obj)
        return

    def index_data(self):
        """Prepare the indexes for searching.  Currently, this isn't
        strictly necessary (the indexes will prepare themselves when
        necessary), but it should elminate a penalty on initial
        searches"""

        for i in self.indexes.values():
            i.prepare()
        return

    def is_attribute(self, attr):
        return self.attrs.has_key(attr.lower())

    def is_indexed_attr(self, attr):
        if self.is_attribute(attr):
            return self.attrs[attr.lower()]
        return False

    def is_objectclass(self, objectclass):
        return self.classes.has_key(objectclass.lower())

    def is_autharea(self, aa):
        return self.authareas.has_key(aa.lower())

    def get_authareas(self):
        return self.authareas.keys()
    
    def fetch_objects(self, id_list):
        return [ self.main_index[x] for x in id_list
                 if self.main_index.has_key(x) ]

    def search_attr(self, attr, value, max = 0):

        """Search for a value in a particular attribute's index.  If
        the attribute is cidr indexed, an attempt to convert value
        into a Cidr object will be made.  Returns a list of object ids
        (or an empty list if nothing was found)"""

        attr = attr.lower()
        index_type = self.attrs.get(attr)
        index = self.indexes.get(attr)
        if not index: return []

        super_prefix_match = False
        if value.endswith("**"):
            super_prefix_match = True

        prefix_match = False
        if value.endswith("*"):
            value = value.rstrip("*")
            prefix_match = True

        if index_type == 'C' and not isinstance(value, Cidr.Cidr):
            value = Cidr.valid_cidr(value)
        else:
            value = value.strip().lower()

        if index_type == 'C' and super_prefix_match:
            return index.find_subnets(value, max)

        res = index.find(value, prefix_match, max)
        return IndexResult(res)

    def search_normal(self, value, max = 0):
        """Search for a value in the 'normal' (string keyed) indexes.
        Returns a list of object ids, or an empty list if nothing was
        found."""

        res = IndexResult()

        for attr in self.normal_indexes:
            res.extend(self.search_attr(attr, value, max))
            if max:
                if len(res) >= max:
                    res.truncate(max)
                    return res
        return res

    def search_cidr(self, value, max = 0):
        """Search for a value in the cidr indexes.  Returns a list of
        object ids, or an empty list if nothing was found."""

        res = IndexResult()
        for attr in self.cidr_indexes:
            res.extend(self.search_attr(attr, value, max))
            if max:
                if len(res) >= max:
                    res.truncate(max)
                    return res
        return res

    def search_referral(self, value, max = 0):
        """Given a heirarchal value, search for referrals.  Returns a
        list of object ids or an empty list."""

        return self.search_attr("referred-auth-area", value, max)

    def object_iterator(self):
        return self.main_index.itervalues()

class IndexResult:
    def __init__(self, list=None):
        if not list: list = []
        self.data = list
        self._dict = dict(zip(self.data, self.data))

    def extend(self, list):
        if isinstance(list, type(self)):
            list = list.list()
        new_els = [ x for x in list if not self._dict.has_key(x) ]
        self.data.extend(new_els)
        self._dict.update(dict(zip(new_els, new_els)))

    def list(self):
        return self.data

    def truncate(self, n=0):
        to_del = self.data[n:]
        for i in to_del: del self._dict[i]
        self.data = self.data[:n]


# test driver
if __name__ == "__main__":
    import sys
    db = MemDB()

    print "loading schema:", sys.argv[1]
    db.init_schema(sys.argv[1])
    for data_file in sys.argv[2:]:
        print "loading data file:", data_file
        db.load_data(data_file)
    db.index_data()

    print "Schema: authority areas"
    for a in db.authareas.keys():
        print "   %s" % a
    print "Schema: classes"
    for c in db.classes.keys():
        print "   %s" % c
    print "Schema: attributes"
    for a in db.attrs.keys():
        print "   %s" % a

    print "Is 'Network' a class?", db.is_objectclass("Network")
        
#    for k, v in db.main_index.items():
#        print "main_index[", k, "]:", v

    print "searching for a.com"
    res = db.search_attr("domain-name", "a.com")
    print res.list()
    print [ str(x) for x in db.fetch_objects(res.list()) ]

    print "searching for doe"
    res = db.search_normal("doe")
    print res.list()
    print [ str(x) for x in db.fetch_objects(res.list()) ]

    print "searching for 10.0.0.2"
    res = db.search_cidr("10.0.0.2")
    print res.list()
    print [ str(x) for x in db.fetch_objects(res.list()) ]

    print "searching for fddi.a.com"
    res = db.search_normal("fddi.a.com")
    print res.list()

    print "searching referral index for fddi.a.com"
    res = db.search_attr("referred-auth-area", "fddi.a.com")
    print res.list()
    print [ str(x) for x in db.fetch_objects(res.list()) ]