1 # This file is part of python-rwhoisd
3 # Copyright (C) 2003, David E. Blacka
5 # $Id: MemDB.py,v 1.3 2003/04/28 16:44:09 davidb Exp $
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24 from Rwhois import rwhoisobject
30 # a dictonary holding the various attribute indexes. The keys
31 # are lowercase attribute names, values are MemIndex or
32 # CidrMemIndex objects.
35 # a dictonary holding the actual rwhoisobjects. keys are
36 # string IDs, values are rwhoisobject instances.
39 # dictionary holding all of the seen attributes. keys are
40 # lowercase attribute names, value is a character indicating
41 # the index type (if indexed), or None if not indexed. Index
42 # type characters a 'N' for normal string index, 'C' for CIDR
46 # Lists containing attribute names that have indexes by type.
47 # This exists so unconstrained searches can just iterate over
49 self.normal_indexes = []
50 self.cidr_indexes = []
52 # dictonary holding all of the seen class names. keys are
53 # lowercase classnames, value is always None.
56 # dictionary holding all of the seen auth-areas. keys are
57 # lowercase authority area names, value is always None.
60 def init_schema(self, schema_file):
61 """Initialize the schema from a schema file. Currently the
62 schema file is a list of 'attribute_name = index_type' pairs,
63 one per line. index_type is one of N or C, where N means a
64 normal string index, and C means a CIDR index.
66 It should be noted that this database implementation
67 implements a global namespace for attributes, which isn't
68 really correct according to RFC 2167. RFC 2167 dictates that
69 different authority area are actually autonomous and thus have
72 # initialize base schema
74 self.attrs['id'] = "N"
75 self.attrs['auth-area'] = None
76 self.attrs['class-name'] = None
77 self.attrs['updated'] = None
78 self.attrs['referred-auth-area'] = "R"
80 sf = open(schema_file, "r")
82 for line in sf.xreadlines():
84 if not line or line.startswith("#"): continue
86 attr, it = line.split("=")
87 self.attrs[attr.strip().lower()] = it.strip()[0].upper()
89 for attr, index_type in self.attrs.items():
92 self.indexes[attr] = MemIndex.MemIndex()
93 self.normal_indexes.append(attr)
94 elif index_type == "A":
95 # "all" index -- both a normal and a cidr index
96 self.indexes[attr] = MemIndex.ComboMemIndex()
97 self.normal_indexes.append(attr)
98 self.cidr_indexes.append(attr)
99 elif index_type == "R":
100 # referral index, an all index that must be searched
101 # explictly by attribute
102 self.indexes[attr] = MemIndex.ComboMemIndex()
103 elif index_type == "C":
105 self.indexes[attr] = MemIndex.CidrMemIndex()
106 self.cidr_indexes.append(attr)
109 def add_object(self, obj):
110 """Add an rwhoisobject to the raw indexes, including the
113 # add the object to the main index
118 self.main_index[id] = obj
120 for a,v in obj.items():
121 # note the attribute.
122 index_type = self.attrs.setdefault(a, None)
124 # make sure that we note the auth-area and class
126 self.authareas.setdefault(v, None)
127 elif a == 'class-name':
128 self.classes.setdefault(v, None)
131 index = self.indexes[a]
134 def load_data(self, data_file):
135 """Load data from rwhoisd-style TXT files (i.e., attr:value,
136 records separated with a "---" bare line)."""
138 df = open(data_file, "r")
141 for line in df.xreadlines():
143 if line.startswith("#"): continue
144 if not line or line.startswith("---"):
145 # we've reached the end of an object, so index it.
151 a, v = line.split(":", 1)
152 obj.add_attr(a, v.lstrip())
157 def index_data(self):
158 """Prepare the indexes for searching. Currently, this isn't
159 strictly necessary (the indexes will prepare themselves when
160 necessary), but it should elminate a penalty on initial
163 for i in self.indexes.values():
167 def is_attribute(self, attr):
168 return self.attrs.has_key(attr.lower())
170 def is_indexed_attr(self, attr):
171 if self.is_attribute(attr):
172 return self.attrs[attr.lower()]
175 def is_objectclass(self, objectclass):
176 return self.classes.has_key(objectclass.lower())
178 def is_autharea(self, aa):
179 return self.authareas.has_key(aa.lower())
181 def get_authareas(self):
182 return self.authareas.keys()
184 def fetch_objects(self, id_list):
185 return [ self.main_index[x] for x in id_list
186 if self.main_index.has_key(x) ]
188 def search_attr(self, attr, value, max = 0):
190 """Search for a value in a particular attribute's index. If
191 the attribute is cidr indexed, an attempt to convert value
192 into a Cidr object will be made. Returns a list of object ids
193 (or an empty list if nothing was found)"""
196 index_type = self.attrs.get(attr)
197 index = self.indexes.get(attr)
198 if not index: return []
200 super_prefix_match = False
201 if value.endswith("**"):
202 super_prefix_match = True
205 if value.endswith("*"):
206 value = value.rstrip("*")
209 if index_type == 'C' and not isinstance(value, Cidr.Cidr):
210 value = Cidr.valid_cidr(value)
212 value = value.strip().lower()
214 if index_type == 'C' and super_prefix_match:
215 return index.find_subnets(value, max)
217 res = index.find(value, prefix_match, max)
218 return IndexResult(res)
220 def search_normal(self, value, max = 0):
221 """Search for a value in the 'normal' (string keyed) indexes.
222 Returns a list of object ids, or an empty list if nothing was
227 for attr in self.normal_indexes:
228 res.extend(self.search_attr(attr, value, max))
235 def search_cidr(self, value, max = 0):
236 """Search for a value in the cidr indexes. Returns a list of
237 object ids, or an empty list if nothing was found."""
240 for attr in self.cidr_indexes:
241 res.extend(self.search_attr(attr, value, max))
248 def search_referral(self, value, max = 0):
249 """Given a heirarchal value, search for referrals. Returns a
250 list of object ids or an empty list."""
252 return self.search_attr("referred-auth-area", value, max)
254 def object_iterator(self):
255 return self.main_index.itervalues()
258 def __init__(self, list=None):
259 if not list: list = []
261 self._dict = dict(zip(self.data, self.data))
264 return len(self.data)
266 def extend(self, list):
267 if isinstance(list, type(self)):
269 new_els = [ x for x in list if not self._dict.has_key(x) ]
270 self.data.extend(new_els)
271 self._dict.update(dict(zip(new_els, new_els)))
276 def truncate(self, n=0):
277 to_del = self.data[n:]
278 for i in to_del: del self._dict[i]
279 self.data = self.data[:n]
283 if __name__ == "__main__":
287 print "loading schema:", sys.argv[1]
288 db.init_schema(sys.argv[1])
289 for data_file in sys.argv[2:]:
290 print "loading data file:", data_file
291 db.load_data(data_file)
294 print "Schema: authority areas"
295 for a in db.authareas.keys():
297 print "Schema: classes"
298 for c in db.classes.keys():
300 print "Schema: attributes"
301 for a in db.attrs.keys():
304 print "Is 'Network' a class?", db.is_objectclass("Network")
306 # for k, v in db.main_index.items():
307 # print "main_index[", k, "]:", v
309 print "searching for a.com"
310 res = db.search_attr("domain-name", "a.com")
312 print [ str(x) for x in db.fetch_objects(res.list()) ]
314 print "searching for doe"
315 res = db.search_normal("doe")
317 print [ str(x) for x in db.fetch_objects(res.list()) ]
319 print "searching for 10.0.0.2"
320 res = db.search_cidr("10.0.0.2")
322 print [ str(x) for x in db.fetch_objects(res.list()) ]
324 print "searching for fddi.a.com"
325 res = db.search_normal("fddi.a.com")
328 print "searching referral index for fddi.a.com"
329 res = db.search_attr("referred-auth-area", "fddi.a.com")
331 print [ str(x) for x in db.fetch_objects(res.list()) ]