1 # This file is part of python-rwhoisd
3 # Copyright (C) 2003, David E. Blacka
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 from Rwhois import rwhoisobject
28 # a dictonary holding the various attribute indexes. The keys
29 # are lowercase attribute names, values are MemIndex or
30 # CidrMemIndex objects.
33 # a dictonary holding the actual rwhoisobjects. keys are
34 # string IDs, values are rwhoisobject instances.
37 # dictionary holding all of the seen attributes. keys are
38 # lowercase attribute names, value is a character indicating
39 # the index type (if indexed), or None if not indexed. Index
40 # type characters a 'N' for normal string index, 'C' for CIDR
44 # Lists containing attribute names that have indexes by type.
45 # This exists so unconstrained searches can just iterate over
47 self.normal_indexes = []
48 self.cidr_indexes = []
50 # dictonary holding all of the seen class names. keys are
51 # lowercase classnames, value is always None.
54 # dictionary holding all of the seen auth-areas. keys are
55 # lowercase authority area names, value is always None.
58 def init_schema(self, schema_file):
59 """Initialize the schema from a schema file. Currently the
60 schema file is a list of 'attribute_name = index_type' pairs,
61 one per line. index_type is one of N or C, where N means a
62 normal string index, and C means a CIDR index.
64 It should be noted that this database implementation
65 implements a global namespace for attributes, which isn't
66 really correct according to RFC 2167. RFC 2167 dictates that
67 different authority area are actually autonomous and thus have
70 # initialize base schema
72 self.attrs['id'] = "N"
73 self.attrs['auth-area'] = None
74 self.attrs['class-name'] = None
75 self.attrs['updated'] = None
76 self.attrs['referred-auth-area'] = "R"
78 sf = open(schema_file, "r")
80 for line in sf.xreadlines():
82 if not line or line.startswith("#"): continue
84 attr, it = line.split("=")
85 self.attrs[attr.strip().lower()] = it.strip()[0].upper()
87 for attr, index_type in self.attrs.items():
90 self.indexes[attr] = MemIndex.MemIndex()
91 self.normal_indexes.append(attr)
92 elif index_type == "A":
93 # "all" index -- both a normal and a cidr index
94 self.indexes[attr] = MemIndex.ComboMemIndex()
95 self.normal_indexes.append(attr)
96 self.cidr_indexes.append(attr)
97 elif index_type == "R":
98 # referral index, an all index that must be searched
99 # explictly by attribute
100 self.indexes[attr] = MemIndex.ComboMemIndex()
101 elif index_type == "C":
103 self.indexes[attr] = MemIndex.CidrMemIndex()
104 self.cidr_indexes.append(attr)
107 def add_object(self, obj):
108 """Add an rwhoisobject to the raw indexes, including the
111 # add the object to the main index
116 self.main_index[id] = obj
118 for a,v in obj.items():
119 # note the attribute.
120 index_type = self.attrs.setdefault(a, None)
122 # make sure that we note the auth-area and class
124 self.authareas.setdefault(v, None)
125 elif a == 'class-name':
126 self.classes.setdefault(v, None)
129 index = self.indexes[a]
132 def load_data(self, data_file):
133 """Load data from rwhoisd-style TXT files (i.e., attr:value,
134 records separated with a "---" bare line)."""
136 df = open(data_file, "r")
139 for line in df.xreadlines():
141 if line.startswith("#"): continue
142 if not line or line.startswith("---"):
143 # we've reached the end of an object, so index it.
149 a, v = line.split(":", 1)
150 obj.add_attr(a, v.lstrip())
155 def index_data(self):
156 """Prepare the indexes for searching. Currently, this isn't
157 strictly necessary (the indexes will prepare themselves when
158 necessary), but it should eliminate a penalty on initial
161 for i in self.indexes.values():
165 def is_attribute(self, attr):
166 return self.attrs.has_key(attr.lower())
168 def is_indexed_attr(self, attr):
169 if self.is_attribute(attr):
170 return self.attrs[attr.lower()]
173 def is_objectclass(self, objectclass):
174 return self.classes.has_key(objectclass.lower())
176 def is_autharea(self, aa):
177 return self.authareas.has_key(aa.lower())
179 def get_authareas(self):
180 return self.authareas.keys()
182 def fetch_objects(self, id_list):
183 return [ self.main_index[x] for x in id_list
184 if self.main_index.has_key(x) ]
186 def search_attr(self, attr, value, max = 0):
188 """Search for a value in a particular attribute's index. If
189 the attribute is cidr indexed, an attempt to convert value
190 into a Cidr object will be made. Returns a list of object ids
191 (or an empty list if nothing was found)"""
194 index_type = self.attrs.get(attr)
195 index = self.indexes.get(attr)
196 if not index: return []
198 super_prefix_match = False
199 if value.endswith("**"):
200 super_prefix_match = True
203 if value.endswith("*"):
204 value = value.rstrip("*")
207 if index_type == 'C' and not isinstance(value, Cidr.Cidr):
208 value = Cidr.valid_cidr(value)
210 value = value.strip().lower()
212 if index_type == 'C' and super_prefix_match:
213 return index.find_subnets(value, max)
215 res = index.find(value, prefix_match, max)
216 return IndexResult(res)
218 def search_normal(self, value, max = 0):
219 """Search for a value in the 'normal' (string keyed) indexes.
220 Returns a list of object ids, or an empty list if nothing was
225 for attr in self.normal_indexes:
226 res.extend(self.search_attr(attr, value, max))
233 def search_cidr(self, value, max = 0):
234 """Search for a value in the cidr indexes. Returns a list of
235 object ids, or an empty list if nothing was found."""
238 for attr in self.cidr_indexes:
239 res.extend(self.search_attr(attr, value, max))
246 def search_referral(self, value, max = 0):
247 """Given a heirarchal value, search for referrals. Returns a
248 list of object ids or an empty list."""
250 return self.search_attr("referred-auth-area", value, max)
252 def object_iterator(self):
253 return self.main_index.itervalues()
256 def __init__(self, list=None):
257 if not list: list = []
259 self._dict = dict(zip(self.data, self.data))
262 return len(self.data)
264 def extend(self, list):
265 if isinstance(list, type(self)):
267 new_els = [ x for x in list if not self._dict.has_key(x) ]
268 self.data.extend(new_els)
269 self._dict.update(dict(zip(new_els, new_els)))
274 def truncate(self, n=0):
275 to_del = self.data[n:]
276 for i in to_del: del self._dict[i]
277 self.data = self.data[:n]
281 if __name__ == "__main__":
285 print "loading schema:", sys.argv[1]
286 db.init_schema(sys.argv[1])
287 for data_file in sys.argv[2:]:
288 print "loading data file:", data_file
289 db.load_data(data_file)
292 print "Schema: authority areas"
293 for a in db.authareas.keys():
295 print "Schema: classes"
296 for c in db.classes.keys():
298 print "Schema: attributes"
299 for a in db.attrs.keys():
302 print "Is 'Network' a class?", db.is_objectclass("Network")
304 # for k, v in db.main_index.items():
305 # print "main_index[", k, "]:", v
307 print "searching for a.com"
308 res = db.search_attr("domain-name", "a.com")
310 print [ str(x) for x in db.fetch_objects(res.list()) ]
312 print "searching for doe"
313 res = db.search_normal("doe")
315 print [ str(x) for x in db.fetch_objects(res.list()) ]
317 print "searching for 10.0.0.2"
318 res = db.search_cidr("10.0.0.2")
320 print [ str(x) for x in db.fetch_objects(res.list()) ]
322 print "searching for fddi.a.com"
323 res = db.search_normal("fddi.a.com")
326 print "searching referral index for fddi.a.com"
327 res = db.search_attr("referred-auth-area", "fddi.a.com")
329 print [ str(x) for x in db.fetch_objects(res.list()) ]