1
2 """
3 Pure Python GeoIP API
4
5 @author: Jennifer Ennis <zaylea@gmail.com>
6 @author: William Tisäter <william@defunct.cc>
7
8 @license: Copyright(C) 2004 MaxMind LLC
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
22 """
23
24 import os
25 import socket
26 import codecs
27 from math import floor
28 from threading import Lock
29
30 try:
31 import mmap
32 except ImportError:
33 mmap = None
34
35 try:
36 from StringIO import StringIO
37 range = xrange
38 except ImportError:
39 from io import StringIO, BytesIO
40
41 from pygeoip import util, const
42 from pygeoip.const import PY2, PY3
43 from pygeoip.timezone import time_zone_by_country_and_region
44
45
46 STANDARD = const.STANDARD
47 MMAP_CACHE = const.MMAP_CACHE
48 MEMORY_CACHE = const.MEMORY_CACHE
49
50 ENCODING = const.ENCODING
55
81
84 __metaclass__ = _GeoIPMetaclass
85
86 - def __init__(self, filename, flags=0, cache=True):
87 """
88 Initialize the class.
89
90 @param filename: Path to a geoip database.
91 @type filename: str
92 @param flags: Flags that affect how the database is processed.
93 Currently supported flags are STANDARD (the default),
94 MEMORY_CACHE (preload the whole file into memory) and
95 MMAP_CACHE (access the file via mmap).
96 @type flags: int
97 @param cache: Used in tests to skip instance caching
98 @type cache: bool
99 """
100 self._flags = flags
101 self._netmask = None
102
103 if self._flags & const.MMAP_CACHE and mmap is None:
104 import warnings
105 warnings.warn("MMAP_CACHE cannot be used without a mmap module")
106 self._flags &= ~const.MMAP_CACHE
107
108 if self._flags & const.MMAP_CACHE:
109 f = codecs.open(filename, 'rb', ENCODING)
110 access = mmap.ACCESS_READ
111 self._fp = mmap.mmap(f.fileno(), 0, access=access)
112 self._type = 'MMAP_CACHE'
113 f.close()
114 elif self._flags & const.MEMORY_CACHE:
115 f = codecs.open(filename, 'rb', ENCODING)
116 self._memory = f.read()
117 self._fp = self._str_to_fp(self._memory)
118 self._type = 'MEMORY_CACHE'
119 f.close()
120 else:
121 self._fp = codecs.open(filename, 'rb', ENCODING)
122 self._type = 'STANDARD'
123
124 self._lock = Lock()
125 self._setup_segments()
126
127 @classmethod
129 """
130 Convert bytes data to file handle object
131
132 @param data: string data
133 @type data: str
134 @return: file handle object
135 @rtype: StringIO or BytesIO
136 """
137 return BytesIO(bytearray(data, ENCODING)) if PY3 else StringIO(data)
138
140 """
141 Parses the database file to determine what kind of database is
142 being used and setup segment sizes and start points that will
143 be used by the seek*() methods later.
144
145 Supported databases:
146
147 * COUNTRY_EDITION
148 * COUNTRY_EDITION_V6
149 * REGION_EDITION_REV0
150 * REGION_EDITION_REV1
151 * CITY_EDITION_REV0
152 * CITY_EDITION_REV1
153 * CITY_EDITION_REV1_V6
154 * ORG_EDITION
155 * ISP_EDITION
156 * ASNUM_EDITION
157 * ASNUM_EDITION_V6
158
159 """
160 self._databaseType = const.COUNTRY_EDITION
161 self._recordLength = const.STANDARD_RECORD_LENGTH
162 self._databaseSegments = const.COUNTRY_BEGIN
163
164 self._lock.acquire()
165 filepos = self._fp.tell()
166 self._fp.seek(-3, os.SEEK_END)
167
168 for i in range(const.STRUCTURE_INFO_MAX_SIZE):
169 chars = chr(255) * 3
170 delim = self._fp.read(3)
171
172 if PY3 and type(delim) is bytes:
173 delim = delim.decode(ENCODING)
174
175 if PY2:
176 chars = chars.decode(ENCODING)
177 if type(delim) is str:
178 delim = delim.decode(ENCODING)
179
180 if delim == chars:
181 byte = self._fp.read(1)
182 self._databaseType = ord(byte)
183
184
185 if self._databaseType >= 106:
186 self._databaseType -= 105
187
188 if self._databaseType == const.REGION_EDITION_REV0:
189 self._databaseSegments = const.STATE_BEGIN_REV0
190
191 elif self._databaseType == const.REGION_EDITION_REV1:
192 self._databaseSegments = const.STATE_BEGIN_REV1
193
194 elif self._databaseType in (const.CITY_EDITION_REV0,
195 const.CITY_EDITION_REV1,
196 const.CITY_EDITION_REV1_V6,
197 const.ORG_EDITION,
198 const.ISP_EDITION,
199 const.ASNUM_EDITION,
200 const.ASNUM_EDITION_V6):
201 self._databaseSegments = 0
202 buf = self._fp.read(const.SEGMENT_RECORD_LENGTH)
203
204 if PY3 and type(buf) is bytes:
205 buf = buf.decode(ENCODING)
206
207 for j in range(const.SEGMENT_RECORD_LENGTH):
208 self._databaseSegments += (ord(buf[j]) << (j * 8))
209
210 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
211 if self._databaseType in LONG_RECORDS:
212 self._recordLength = const.ORG_RECORD_LENGTH
213 break
214 else:
215 self._fp.seek(-4, os.SEEK_CUR)
216
217 self._fp.seek(filepos, os.SEEK_SET)
218 self._lock.release()
219
221 """
222 Using the record length and appropriate start points, seek to the
223 country that corresponds to the converted IP address integer.
224
225 @param ipnum: result of ip2long conversion
226 @type ipnum: int
227 @return: offset of start of record
228 @rtype: int
229 """
230 try:
231 offset = 0
232 seek_depth = 127 if len(str(ipnum)) > 10 else 31
233
234 for depth in range(seek_depth, -1, -1):
235 if self._flags & const.MEMORY_CACHE:
236 startIndex = 2 * self._recordLength * offset
237 endIndex = startIndex + (2 * self._recordLength)
238 buf = self._memory[startIndex:endIndex]
239 else:
240 startIndex = 2 * self._recordLength * offset
241 readLength = 2 * self._recordLength
242 self._lock.acquire()
243 self._fp.seek(startIndex, os.SEEK_SET)
244 buf = self._fp.read(readLength)
245 self._lock.release()
246
247 if PY3 and type(buf) is bytes:
248 buf = buf.decode(ENCODING)
249
250 x = [0, 0]
251 for i in range(2):
252 for j in range(self._recordLength):
253 byte = buf[self._recordLength * i + j]
254 x[i] += ord(byte) << (j * 8)
255 if ipnum & (1 << depth):
256 if x[1] >= self._databaseSegments:
257 self._netmask = seek_depth - depth + 1
258 return x[1]
259 offset = x[1]
260 else:
261 if x[0] >= self._databaseSegments:
262 self._netmask = seek_depth - depth + 1
263 return x[0]
264 offset = x[0]
265 except (IndexError, UnicodeDecodeError):
266 pass
267
268 raise GeoIPError('Corrupt database')
269
271 """
272 Seek and return organization or ISP name for ipnum.
273 @param ipnum: Converted IP address
274 @type ipnum: int
275 @return: org/isp name
276 @rtype: str
277 """
278 seek_org = self._seek_country(ipnum)
279 if seek_org == self._databaseSegments:
280 return None
281
282 read_length = (2 * self._recordLength - 1) * self._databaseSegments
283 self._lock.acquire()
284 self._fp.seek(seek_org + read_length, os.SEEK_SET)
285 buf = self._fp.read(const.MAX_ORG_RECORD_LENGTH)
286 self._lock.release()
287
288 if PY3 and type(buf) is bytes:
289 buf = buf.decode(ENCODING)
290
291 return buf[:buf.index(chr(0))]
292
294 """
295 Seek and return the region information.
296
297 @param ipnum: Converted IP address
298 @type ipnum: int
299 @return: dict containing country_code and region_code
300 @rtype: dict
301 """
302 region_code = None
303 country_code = None
304 seek_country = self._seek_country(ipnum)
305
306 def get_region_code(offset):
307 region1 = chr(offset // 26 + 65)
308 region2 = chr(offset % 26 + 65)
309 return ''.join([region1, region2])
310
311 if self._databaseType == const.REGION_EDITION_REV0:
312 seek_region = seek_country - const.STATE_BEGIN_REV0
313 if seek_region >= 1000:
314 country_code = 'US'
315 region_code = get_region_code(seek_region - 1000)
316 else:
317 country_code = const.COUNTRY_CODES[seek_region]
318 elif self._databaseType == const.REGION_EDITION_REV1:
319 seek_region = seek_country - const.STATE_BEGIN_REV1
320 if seek_region < const.US_OFFSET:
321 pass
322 elif seek_region < const.CANADA_OFFSET:
323 country_code = 'US'
324 region_code = get_region_code(seek_region - const.US_OFFSET)
325 elif seek_region < const.WORLD_OFFSET:
326 country_code = 'CA'
327 region_code = get_region_code(seek_region - const.CANADA_OFFSET)
328 else:
329 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
330 if index in const.COUNTRY_CODES:
331 country_code = const.COUNTRY_CODES[index]
332 elif self._databaseType in const.CITY_EDITIONS:
333 rec = self._get_record(ipnum)
334 region_code = rec.get('region_code')
335 country_code = rec.get('country_code')
336
337 return {'country_code': country_code, 'region_code': region_code}
338
340 """
341 Populate location dict for converted IP.
342
343 @param ipnum: Converted IP address
344 @type ipnum: int
345 @return: dict with city, region_code, area_code, time_zone,
346 dma_code, metro_code, country_code3, latitude, postal_code,
347 longitude, country_code, country_name, continent
348 @rtype: dict
349 """
350 seek_country = self._seek_country(ipnum)
351 if seek_country == self._databaseSegments:
352 return {}
353
354 read_length = (2 * self._recordLength - 1) * self._databaseSegments
355 self._lock.acquire()
356 self._fp.seek(seek_country + read_length, os.SEEK_SET)
357 buf = self._fp.read(const.FULL_RECORD_LENGTH)
358 self._lock.release()
359
360 if PY3 and type(buf) is bytes:
361 buf = buf.decode(ENCODING)
362
363 record = {
364 'dma_code': 0,
365 'area_code': 0,
366 'metro_code': None,
367 'postal_code': None
368 }
369
370 latitude = 0
371 longitude = 0
372
373 char = ord(buf[0])
374 record['country_code'] = const.COUNTRY_CODES[char]
375 record['country_code3'] = const.COUNTRY_CODES3[char]
376 record['country_name'] = const.COUNTRY_NAMES[char]
377 record['continent'] = const.CONTINENT_NAMES[char]
378
379 def read_data(buf, pos):
380 cur = pos
381 while buf[cur] != '\0':
382 cur += 1
383 return cur, buf[pos:cur] if cur > pos else None
384
385 offset, record['region_code'] = read_data(buf, 1)
386 offset, record['city'] = read_data(buf, offset + 1)
387 offset, record['postal_code'] = read_data(buf, offset + 1)
388 offset = offset + 1
389
390 for j in range(3):
391 latitude += (ord(buf[offset + j]) << (j * 8))
392
393 for j in range(3):
394 longitude += (ord(buf[offset + j + 3]) << (j * 8))
395
396 record['latitude'] = (latitude / 10000.0) - 180.0
397 record['longitude'] = (longitude / 10000.0) - 180.0
398
399 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6):
400 if record['country_code'] == 'US':
401 dma_area = 0
402 for j in range(3):
403 dma_area += ord(buf[offset + j + 6]) << (j * 8)
404
405 record['dma_code'] = int(floor(dma_area / 1000))
406 record['area_code'] = dma_area % 1000
407 record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
408
409 params = (record['country_code'], record['region_code'])
410 record['time_zone'] = time_zone_by_country_and_region(*params)
411
412 return record
413
415 if self._databaseType in const.IPV6_EDITIONS:
416 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
417 family, socktype, proto, canonname, sockaddr = response[0]
418 address, port, flow, scope = sockaddr
419 return address
420 else:
421 return socket.gethostbyname(hostname)
422
424 """
425 Looks up the index for the country which is the key for the
426 code and name.
427
428 @param addr: IPv4 or IPv6 address
429 @type addr: str
430 @return: network byte order 32-bit integer
431 @rtype: int
432 """
433 ipv = 6 if addr.find(':') >= 0 else 4
434 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
435 raise GeoIPError('Invalid database type; expected IPv6 address')
436 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
437 raise GeoIPError('Invalid database type; expected IPv4 address')
438
439 ipnum = util.ip2long(addr)
440 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
441
443 """
444 Return the netmask depth of the last lookup.
445
446 @return: network depth
447 @rtype: int
448 """
449 return self._netmask
450
452 """
453 Returns 2-letter country code (e.g. 'US') for specified IP address.
454 Use this method if you have a Country, Region, or City database.
455
456 @param addr: IP address
457 @type addr: str
458 @return: 2-letter country code
459 @rtype: str
460 """
461 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
462 if self._databaseType in VALID_EDITIONS:
463 country_id = self._id_by_addr(addr)
464 return const.COUNTRY_CODES[country_id]
465 elif self._databaseType in const.REGION_CITY_EDITIONS:
466 return self.region_by_addr(addr).get('country_code')
467
468 raise GeoIPError('Invalid database type, expected Country, City or Region')
469
471 """
472 Returns 2-letter country code (e.g. 'US') for specified hostname.
473 Use this method if you have a Country, Region, or City database.
474
475 @param hostname: Hostname
476 @type hostname: str
477 @return: 2-letter country code
478 @rtype: str
479 """
480 addr = self._gethostbyname(hostname)
481 return self.country_code_by_addr(addr)
482
484 """
485 Returns full country name for specified IP address.
486 Use this method if you have a Country or City database.
487
488 @param addr: IP address
489 @type addr: str
490 @return: country name
491 @rtype: str
492 """
493 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
494 if self._databaseType in VALID_EDITIONS:
495 country_id = self._id_by_addr(addr)
496 return const.COUNTRY_NAMES[country_id]
497 elif self._databaseType in const.CITY_EDITIONS:
498 return self.record_by_addr(addr).get('country_name')
499 else:
500 message = 'Invalid database type, expected Country or City'
501 raise GeoIPError(message)
502
504 """
505 Returns full country name for specified hostname.
506 Use this method if you have a Country database.
507
508 @param hostname: Hostname
509 @type hostname: str
510 @return: country name
511 @rtype: str
512 """
513 addr = self._gethostbyname(hostname)
514 return self.country_name_by_addr(addr)
515
517 """
518 Lookup Organization, ISP or ASNum for given IP address.
519 Use this method if you have an Organization, ISP or ASNum database.
520
521 @param addr: IP address
522 @type addr: str
523 @return: organization or ISP name
524 @rtype: str
525 """
526 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
527 if self._databaseType not in valid:
528 message = 'Invalid database type, expected Org, ISP or ASNum'
529 raise GeoIPError(message)
530
531 ipnum = util.ip2long(addr)
532 return self._get_org(ipnum)
533
535 """
536 Lookup the organization (or ISP) for hostname.
537 Use this method if you have an Organization/ISP database.
538
539 @param hostname: Hostname
540 @type hostname: str
541 @return: Organization or ISP name
542 @rtype: str
543 """
544 addr = self._gethostbyname(hostname)
545 return self.org_by_addr(addr)
546
548 """
549 Look up the record for a given IP address.
550 Use this method if you have a City database.
551
552 @param addr: IP address
553 @type addr: str
554 @return: Dictionary with country_code, country_code3, country_name,
555 region, city, postal_code, latitude, longitude, dma_code,
556 metro_code, area_code, region_code, time_zone
557 @rtype: dict
558 """
559 if self._databaseType not in const.CITY_EDITIONS:
560 message = 'Invalid database type, expected City'
561 raise GeoIPError(message)
562
563 ipnum = util.ip2long(addr)
564 rec = self._get_record(ipnum)
565 if not rec:
566 return None
567
568 return rec
569
571 """
572 Look up the record for a given hostname.
573 Use this method if you have a City database.
574
575 @param hostname: Hostname
576 @type hostname: str
577 @return: Dictionary with country_code, country_code3, country_name,
578 region, city, postal_code, latitude, longitude, dma_code,
579 metro_code, area_code, region_code, time_zone
580 @rtype: dict
581 """
582 addr = self._gethostbyname(hostname)
583 return self.record_by_addr(addr)
584
586 """
587 Lookup the region for given IP address.
588 Use this method if you have a Region database.
589
590 @param addr: IP address
591 @type addr: str
592 @return: Dictionary containing country_code and region_code
593 @rtype: dict
594 """
595 if self._databaseType not in const.REGION_CITY_EDITIONS:
596 message = 'Invalid database type, expected Region or City'
597 raise GeoIPError(message)
598
599 ipnum = util.ip2long(addr)
600 return self._get_region(ipnum)
601
603 """
604 Lookup the region for given hostname.
605 Use this method if you have a Region database.
606
607 @param hostname: Hostname
608 @type hostname: str
609 @return: Dictionary containing country_code, region_code and region
610 @rtype: dict
611 """
612 addr = self._gethostbyname(hostname)
613 return self.region_by_addr(addr)
614
616 """
617 Look up the time zone for a given IP address.
618 Use this method if you have a Region or City database.
619
620 @param addr: IP address
621 @type addr: str
622 @return: Time zone
623 @rtype: str
624 """
625 if self._databaseType not in const.CITY_EDITIONS:
626 message = 'Invalid database type, expected City'
627 raise GeoIPError(message)
628
629 ipnum = util.ip2long(addr)
630 return self._get_record(ipnum).get('time_zone')
631
633 """
634 Look up the time zone for a given hostname.
635 Use this method if you have a Region or City database.
636
637 @param hostname: Hostname
638 @type hostname: str
639 @return: Time zone
640 @rtype: str
641 """
642 addr = self._gethostbyname(hostname)
643 return self.time_zone_by_addr(addr)
644