rwhoisd/MemDB.py

   1 # This file is part of python-rwhoisd
   2 #
   3 # Copyright (C) 2003, David E. Blacka
   4 #
   5 # $Id: MemDB.py,v 1.3 2003/04/28 16:44:09 davidb Exp $
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 # General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  20 # USA
  21
  22 import bisect, types
  23 import MemIndex, Cidr
  24 from Rwhois import rwhoisobject
  25
  26 class MemDB:
  27
  28     def __init__(self):
  29
  30         # a dictonary holding the various attribute indexes.  The keys
  31         # are lowercase attribute names, values are MemIndex or
  32         # CidrMemIndex objects.
  33         self.indexes = {}
  34
  35         # a dictonary holding the actual rwhoisobjects.  keys are
  36         # string IDs, values are rwhoisobject instances.
  37         self.main_index = {}
  38
  39         # dictonary holding all of the seen attributes.  keys are
  40         # lowercase attribute names, value is a character indicating
  41         # the index type (if indexed), or None if not indexed.  Index
  42         # type characters a 'N' for normal string index, 'C' for CIDR
  43         # index.
  44         self.attrs = {}
  45
  46         # Lists containing attribute names that have indexes by type.
  47         # This exists so unconstrained searches can just iterate over
  48         # them.
  49         self.normal_indexes = []
  50         self.cidr_indexes   = []
  51
  52         # dictonary holding all of the seen class names.  keys are
  53         # lowercase classnames, value is always None.
  54         self.classes = {}
  55
  56         # dictionary holding all of the seen auth-areas.  keys are
  57         # lowercase authority area names, value is always None.
  58         self.authareas = {}
  59
  60     def init_schema(self, schema_file):
  61         """Initialize the schema from a schema file.  Currently the
  62         schema file is a list of 'attribute_name = index_type' pairs,
  63         one per line.  index_type is one of N or C, where N means a
  64         normal string index, and C means a CIDR index.
  65
  66         It should be noted that this database implementation
  67         implements a global namespace for attributes, which isn't
  68         really correct according to RFC 2167.  RFC 2167 dictates that
  69         different authority area are actually autonomous and thus have
  70         separate schemas."""
  71
  72         # initialize base schema
  73
  74         self.attrs['id']         = "N"
  75         self.attrs['auth-area']  = None
  76         self.attrs['class-name'] = None
  77         self.attrs['updated']    = None
  78         self.attrs['referred-auth-area'] = "R"
  79
  80         sf = open(schema_file, "r")
  81
  82         for line in sf.xreadlines():
  83             line = line.strip()
  84             if not line or line.startswith("#"): continue
  85
  86             attr, it = line.split("=")
  87             self.attrs[attr.strip().lower()] = it.strip()[0].upper()
  88
  89         for attr, index_type in self.attrs.items():
  90             if index_type == "N":
  91                 # normal index
  92                 self.indexes[attr] = MemIndex.MemIndex()
  93                 self.normal_indexes.append(attr)
  94             elif index_type == "A":
  95                 # "all" index -- both a normal and a cidr index
  96                 self.indexes[attr] = MemIndex.ComboMemIndex()
  97                 self.normal_indexes.append(attr)
  98                 self.cidr_indexes.append(attr)
  99             elif index_type == "R":
 100                 # referral index, an all index that must be searched
 101                 # explictly by attribute
 102                 self.indexes[attr] = MemIndex.ComboMemIndex()
 103             elif index_type == "C":
 104                 # a cidr index
 105                 self.indexes[attr] = MemIndex.CidrMemIndex()
 106                 self.cidr_indexes.append(attr)
 107         return
 108
 109     def add_object(self, obj):
 110         """Add an rwhoisobject to the raw indexes, including the
 111         master index."""
 112
 113         # add the object to the main index
 114         id = obj.getid()
 115         if not id: return
 116         id = id.lower()
 117
 118         self.main_index[id] = obj
 119
 120         for a,v in obj.items():
 121             # note the attribute.
 122             index_type = self.attrs.setdefault(a, None)
 123             v = v.lower()
 124             # make sure that we note the auth-area and class
 125             if a == 'auth-area':
 126                 self.authareas.setdefault(v, None)
 127             elif a == 'class-name':
 128                 self.classes.setdefault(v, None)
 129
 130             if index_type:
 131                 index = self.indexes[a]
 132                 index.add(v, id)
 133
 134     def load_data(self, data_file):
 135         """Load data from rwhoisd-style TXT files (i.e., attr:value,
 136         records separated with a "---" bare line)."""
 137
 138         df = open(data_file, "r")
 139         obj = rwhoisobject()
 140
 141         for line in df.xreadlines():
 142             line = line.strip()
 143             if line.startswith("#"): continue
 144             if not line or line.startswith("---"):
 145                 # we've reached the end of an object, so index it.
 146                 self.add_object(obj)
 147                 # reset obj
 148                 obj = rwhoisobject()
 149                 continue
 150
 151             a, v = line.split(":", 1)
 152             obj.add_attr(a, v.lstrip())
 153
 154         self.add_object(obj)
 155         return
 156
 157     def index_data(self):
 158         """Prepare the indexes for searching.  Currently, this isn't
 159         strictly necessary (the indexes will prepare themselves when
 160         necessary), but it should elminate a penalty on initial
 161         searches"""
 162
 163         for i in self.indexes.values():
 164             i.prepare()
 165         return
 166
 167     def is_attribute(self, attr):
 168         return self.attrs.has_key(attr.lower())
 169
 170     def is_indexed_attr(self, attr):
 171         if self.is_attribute(attr):
 172             return self.attrs[attr.lower()]
 173         return False
 174
 175     def is_objectclass(self, objectclass):
 176         return self.classes.has_key(objectclass.lower())
 177
 178     def is_autharea(self, aa):
 179         return self.authareas.has_key(aa.lower())
 180
 181     def get_authareas(self):
 182         return self.authareas.keys()
 183
 184     def fetch_objects(self, id_list):
 185         return [ self.main_index[x] for x in id_list
 186                  if self.main_index.has_key(x) ]
 187
 188     def search_attr(self, attr, value, max = 0):
 189
 190         """Search for a value in a particular attribute's index.  If
 191         the attribute is cidr indexed, an attempt to convert value
 192         into a Cidr object will be made.  Returns a list of object ids
 193         (or an empty list if nothing was found)"""
 194
 195         attr = attr.lower()
 196         index_type = self.attrs.get(attr)
 197         index = self.indexes.get(attr)
 198         if not index: return []
 199
 200         super_prefix_match = False
 201         if value.endswith("**"):
 202             super_prefix_match = True
 203
 204         prefix_match = False
 205         if value.endswith("*"):
 206             value = value.rstrip("*")
 207             prefix_match = True
 208
 209         if index_type == 'C' and not isinstance(value, Cidr.Cidr):
 210             value = Cidr.valid_cidr(value)
 211         else:
 212             value = value.strip().lower()
 213
 214         if index_type == 'C' and super_prefix_match:
 215             return index.find_subnets(value, max)
 216
 217         res = index.find(value, prefix_match, max)
 218         return IndexResult(res)
 219
 220     def search_normal(self, value, max = 0):
 221         """Search for a value in the 'normal' (string keyed) indexes.
 222         Returns a list of object ids, or an empty list if nothing was
 223         found."""
 224
 225         res = IndexResult()
 226
 227         for attr in self.normal_indexes:
 228             res.extend(self.search_attr(attr, value, max))
 229             if max:
 230                 if len(res) >= max:
 231                     res.truncate(max)
 232                     return res
 233         return res
 234
 235     def search_cidr(self, value, max = 0):
 236         """Search for a value in the cidr indexes.  Returns a list of
 237         object ids, or an empty list if nothing was found."""
 238
 239         res = IndexResult()
 240         for attr in self.cidr_indexes:
 241             res.extend(self.search_attr(attr, value, max))
 242             if max:
 243                 if len(res) >= max:
 244                     res.truncate(max)
 245                     return res
 246         return res
 247
 248     def search_referral(self, value, max = 0):
 249         """Given a heirarchal value, search for referrals.  Returns a
 250         list of object ids or an empty list."""
 251
 252         return self.search_attr("referred-auth-area", value, max)
 253
 254     def object_iterator(self):
 255         return self.main_index.itervalues()
 256
 257 class IndexResult:
 258     def __init__(self, list=None):
 259         if not list: list = []
 260         self.data = list
 261         self._dict = dict(zip(self.data, self.data))
 262
 263     def __len__(self):
 264         return len(self.data)
 265
 266     def extend(self, list):
 267         if isinstance(list, type(self)):
 268             list = list.list()
 269         new_els = [ x for x in list if not self._dict.has_key(x) ]
 270         self.data.extend(new_els)
 271         self._dict.update(dict(zip(new_els, new_els)))
 272
 273     def list(self):
 274         return self.data
 275
 276     def truncate(self, n=0):
 277         to_del = self.data[n:]
 278         for i in to_del: del self._dict[i]
 279         self.data = self.data[:n]
 280
 281
 282 # test driver
 283 if __name__ == "__main__":
 284     import sys
 285     db = MemDB()
 286
 287     print "loading schema:", sys.argv[1]
 288     db.init_schema(sys.argv[1])
 289     for data_file in sys.argv[2:]:
 290         print "loading data file:", data_file
 291         db.load_data(data_file)
 292     db.index_data()
 293
 294     print "Schema: authority areas"
 295     for a in db.authareas.keys():
 296         print "   %s" % a
 297     print "Schema: classes"
 298     for c in db.classes.keys():
 299         print "   %s" % c
 300     print "Schema: attributes"
 301     for a in db.attrs.keys():
 302         print "   %s" % a
 303
 304     print "Is 'Network' a class?", db.is_objectclass("Network")
 305
 306 #    for k, v in db.main_index.items():
 307 #        print "main_index[", k, "]:", v
 308
 309     print "searching for a.com"
 310     res = db.search_attr("domain-name", "a.com")
 311     print res.list()
 312     print [ str(x) for x in db.fetch_objects(res.list()) ]
 313
 314     print "searching for doe"
 315     res = db.search_normal("doe")
 316     print res.list()
 317     print [ str(x) for x in db.fetch_objects(res.list()) ]
 318
 319     print "searching for 10.0.0.2"
 320     res = db.search_cidr("10.0.0.2")
 321     print res.list()
 322     print [ str(x) for x in db.fetch_objects(res.list()) ]
 323
 324     print "searching for fddi.a.com"
 325     res = db.search_normal("fddi.a.com")
 326     print res.list()
 327
 328     print "searching referral index for fddi.a.com"
 329     res = db.search_attr("referred-auth-area", "fddi.a.com")
 330     print res.list()
 331     print [ str(x) for x in db.fetch_objects(res.list()) ]
 332
 333