Package pygeoip
[hide private]
[frames] | no frames]

Source Code for Package pygeoip

  1  """ 
  2  Pure Python GeoIP API. The API is based off of U{MaxMind's C-based Python API<http://www.maxmind.com/app/python>}, 
  3  but the code itself is based on the U{pure PHP5 API<http://pear.php.net/package/Net_GeoIP/>} 
  4  by Jim Winstead and Hans Lellelid. 
  5   
  6  It is mostly a drop-in replacement, except the 
  7  C{new} and C{open} methods are gone. You should instantiate the L{GeoIP} class yourself: 
  8   
  9  C{gi = GeoIP('/path/to/GeoIP.dat', pygeoip.MEMORY_CACHE)} 
 10   
 11  @author: Jennifer Ennis <zaylea at gmail dot com> 
 12   
 13  @license: 
 14  Copyright(C) 2004 MaxMind LLC 
 15   
 16  This program is free software: you can redistribute it and/or modify 
 17  it under the terms of the GNU Lesser General Public License as published by 
 18  the Free Software Foundation, either version 3 of the License, or 
 19  (at your option) any later version. 
 20   
 21  This program is distributed in the hope that it will be useful, 
 22  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 24  GNU General Public License for more details. 
 25   
 26  You should have received a copy of the GNU Lesser General Public License 
 27  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 28  """ 
 29   
 30  from __future__ import with_statement, division 
 31  import os 
 32  import six 
 33  import math 
 34  import socket 
 35  import mmap 
 36  import gzip 
 37  import codecs 
 38   
 39  try: 
 40      from StringIO import StringIO 
 41  except ImportError: 
 42      from io import StringIO 
 43   
 44  import const 
 45  from util import ip2long 
 46  from timezone import time_zone_by_country_and_region 
 47   
 48   
 49  MMAP_CACHE = const.MMAP_CACHE 
 50  MEMORY_CACHE = const.MEMORY_CACHE 
 51  STANDARD = const.STANDARD 
 52   
 53   
54 -class GeoIPError(Exception):
55 pass
56 57
58 -class GeoIPMetaclass(type):
59 - def __new__(cls, *args, **kwargs):
60 """ 61 Singleton method to gets an instance without reparsing the db. Unique 62 instances are instantiated based on the filename of the db. Flags are 63 ignored for this, i.e. if you initialize one with STANDARD flag (default) 64 and then try later to initialize with MEMORY_CACHE, it will still 65 return the STANDARD one. 66 """ 67 68 if not hasattr(cls, '_instances'): 69 cls._instances = {} 70 71 if len(args) > 0: 72 filename = args[0] 73 elif 'filename' in kwargs: 74 filename = kwargs['filename'] 75 76 if not filename in cls._instances: 77 cls._instances[filename] = type.__new__(cls, *args, **kwargs) 78 79 return cls._instances[filename]
80 81 82 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {}) 83 84
85 -class GeoIP(GeoIPBase):
86 - def __init__(self, filename, flags=0):
87 """ 88 Initialize the class. 89 90 @param filename: path to a geoip database. If MEMORY_CACHE is used, 91 the file can be gzipped. 92 @type filename: str 93 @param flags: flags that affect how the database is processed. 94 Currently the only supported flags are STANDARD (the default), 95 MEMORY_CACHE (preload the whole file into memory), and 96 MMAP_CACHE (access the file via mmap). 97 @type flags: int 98 """ 99 self._filename = filename 100 self._flags = flags 101 102 if self._flags & const.MMAP_CACHE: 103 with open(filename, 'rb') as f: 104 self._filehandle = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 105 106 elif self._flags & const.MEMORY_CACHE: 107 if filename.endswith('.gz'): 108 opener = gzip.open 109 else: 110 opener = open 111 112 with opener(filename, 'rb') as f: 113 self._memoryBuffer = f.read() 114 self._filehandle = StringIO(self._memoryBuffer) 115 else: 116 self._filehandle = codecs.open(filename, 'rb','latin_1') 117 118 self._setup_segments()
119
120 - def _setup_segments(self):
121 """ 122 Parses the database file to determine what kind of database is being used and setup 123 segment sizes and start points that will be used by the seek*() methods later. 124 """ 125 self._databaseType = const.COUNTRY_EDITION 126 self._recordLength = const.STANDARD_RECORD_LENGTH 127 128 filepos = self._filehandle.tell() 129 self._filehandle.seek(-3, os.SEEK_END) 130 131 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 132 delim = self._filehandle.read(3) 133 134 if delim == six.b(chr(255) * 3): 135 self._databaseType = ord(self._filehandle.read(1)) 136 137 if (self._databaseType >= 106): 138 # backwards compatibility with databases from April 2003 and earlier 139 self._databaseType -= 105 140 141 if self._databaseType == const.REGION_EDITION_REV0: 142 self._databaseSegments = const.STATE_BEGIN_REV0 143 144 elif self._databaseType == const.REGION_EDITION_REV1: 145 self._databaseSegments = const.STATE_BEGIN_REV1 146 147 elif self._databaseType in (const.CITY_EDITION_REV0, 148 const.CITY_EDITION_REV1, 149 const.ORG_EDITION, 150 const.ISP_EDITION, 151 const.ASNUM_EDITION): 152 self._databaseSegments = 0 153 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH) 154 155 for j in range(const.SEGMENT_RECORD_LENGTH): 156 self._databaseSegments += (ord(buf[j]) << (j * 8)) 157 158 if self._databaseType in (const.ORG_EDITION, const.ISP_EDITION): 159 self._recordLength = const.ORG_RECORD_LENGTH 160 161 break 162 else: 163 self._filehandle.seek(-4, os.SEEK_CUR) 164 165 if self._databaseType == const.COUNTRY_EDITION: 166 self._databaseSegments = const.COUNTRY_BEGIN 167 168 self._filehandle.seek(filepos, os.SEEK_SET)
169
170 - def _lookup_country_id(self, addr):
171 """ 172 Get the country index. 173 174 This method is called by the _lookupCountryCode and _lookupCountryName 175 methods. It looks up the index ('id') for the country which is the key 176 for the code and name. 177 178 @param addr: The IP address 179 @type addr: str 180 @return: network byte order 32-bit integer 181 @rtype: int 182 """ 183 184 ipnum = ip2long(addr) 185 186 if not ipnum: 187 raise ValueError("Invalid IP address: %s" % addr) 188 189 if self._databaseType != const.COUNTRY_EDITION: 190 raise GeoIPError('Invalid database type; country_* methods expect '\ 191 'Country database') 192 193 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
194
195 - def _seek_country(self, ipnum):
196 """ 197 Using the record length and appropriate start points, seek to the 198 country that corresponds to the converted IP address integer. 199 200 @param ipnum: result of ip2long conversion 201 @type ipnum: int 202 @return: offset of start of record 203 @rtype: int 204 """ 205 offset = 0 206 207 for depth in range(31, -1, -1): 208 209 if self._flags & const.MEMORY_CACHE: 210 startIndex = 2 * self._recordLength * offset 211 length = 2 * self._recordLength 212 endIndex = startIndex + length 213 buf = self._memoryBuffer[startIndex:endIndex] 214 else: 215 self._filehandle.seek(2 * self._recordLength * offset, os.SEEK_SET) 216 buf = self._filehandle.read(2 * self._recordLength) 217 218 x = [0,0] 219 220 for i in range(2): 221 for j in range(self._recordLength): 222 x[i] += ord(buf[self._recordLength * i + j]) << (j * 8) 223 224 if ipnum & (1 << depth): 225 226 if x[1] >= self._databaseSegments: 227 return x[1] 228 229 offset = x[1] 230 231 else: 232 233 if x[0] >= self._databaseSegments: 234 return x[0] 235 236 offset = x[0] 237 238 239 raise Exception('Error traversing database - perhaps it is corrupt?')
240
241 - def _get_org(self, ipnum):
242 """ 243 Seek and return organization (or ISP) name for converted IP addr. 244 @param ipnum: Converted IP address 245 @type ipnum: int 246 @return: org/isp name 247 @rtype: str 248 """ 249 250 seek_org = self._seek_country(ipnum) 251 if seek_org == self._databaseSegments: 252 return None 253 254 record_pointer = seek_org + (2 * self._recordLength - 1) * self._databaseSegments 255 256 self._filehandle.seek(record_pointer, os.SEEK_SET) 257 258 org_buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH) 259 260 return org_buf[:org_buf.index(chr(0))]
261
262 - def _get_region(self, ipnum):
263 """ 264 Seek and return the region info (dict containing country_code and region_name). 265 266 @param ipnum: converted IP address 267 @type ipnum: int 268 @return: dict containing country_code and region_name 269 @rtype: dict 270 """ 271 country_code = '' 272 region = '' 273 274 if self._databaseType == const.REGION_EDITION_REV0: 275 seek_country = self._seek_country(ipnum) 276 seek_region = seek_country - const.STATE_BEGIN_REV0 277 if seek_region >= 1000: 278 country_code = 'US' 279 region = ''.join([chr((seek_region // 1000) // 26 + 65), chr((seek_region // 1000) % 26 + 65)]) 280 else: 281 country_code = const.COUNTRY_CODES[seek_region] 282 region = '' 283 elif self._databaseType == const.REGION_EDITION_REV1: 284 seek_country = self._seek_country(ipnum) 285 seek_region = seek_country - const.STATE_BEGIN_REV1 286 if seek_region < const.US_OFFSET: 287 country_code = ''; 288 region = '' 289 elif seek_region < const.CANADA_OFFSET: 290 country_code = 'US' 291 region = ''.join([chr((seek_region - const.US_OFFSET) // 26 + 65), chr((seek_region - const.US_OFFSET) % 26 + 65)]) 292 elif seek_region < const.WORLD_OFFSET: 293 country_code = 'CA' 294 region = ''.join([chr((seek_region - const.CANADA_OFFSET) // 26 + 65), chr((seek_region - const.CANADA_OFFSET) % 26 + 65)]) 295 else: 296 i = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 297 if i < len(const.COUNTRY_CODES): 298 #country_code = const.COUNTRY_CODES[(seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE] 299 country_code = const.COUNTRY_CODES[i] 300 else: 301 country_code = '' 302 region = '' 303 304 elif self._databaseType in (const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 305 rec = self._get_record(ipnum) 306 country_code = rec['country_code'] if 'country_code' in rec else '' 307 region = rec['region_name'] if 'region_name' in rec else '' 308 309 return {'country_code' : country_code, 'region_name' : region }
310
311 - def _get_record(self, ipnum):
312 """ 313 Populate location dict for converted IP. 314 315 @param ipnum: converted IP address 316 @type ipnum: int 317 @return: dict with country_code, country_code3, country_name, 318 region, city, postal_code, latitude, longitude, 319 dma_code, metro_code, area_code, region_name, time_zone 320 @rtype: dict 321 """ 322 seek_country = self._seek_country(ipnum) 323 if seek_country == self._databaseSegments: 324 return None 325 326 record_pointer = seek_country + (2 * self._recordLength - 1) * self._databaseSegments 327 328 self._filehandle.seek(record_pointer, os.SEEK_SET) 329 record_buf = self._filehandle.read(const.FULL_RECORD_LENGTH) 330 331 record = {} 332 333 record_buf_pos = 0 334 char = ord(record_buf[record_buf_pos]) 335 #char = record_buf[record_buf_pos] if six.PY3 else ord(record_buf[record_buf_pos]) 336 record['country_code'] = const.COUNTRY_CODES[char] 337 record['country_code3'] = const.COUNTRY_CODES3[char] 338 record['country_name'] = const.COUNTRY_NAMES[char] 339 record_buf_pos += 1 340 str_length = 0 341 342 # get region 343 char = ord(record_buf[record_buf_pos+str_length]) 344 while (char != 0): 345 str_length += 1 346 char = ord(record_buf[record_buf_pos+str_length]) 347 348 if str_length > 0: 349 record['region_name'] = record_buf[record_buf_pos:record_buf_pos+str_length] 350 351 record_buf_pos += str_length + 1 352 str_length = 0 353 354 # get city 355 char = ord(record_buf[record_buf_pos+str_length]) 356 while (char != 0): 357 str_length += 1 358 char = ord(record_buf[record_buf_pos+str_length]) 359 360 if str_length > 0: 361 record['city'] = record_buf[record_buf_pos:record_buf_pos+str_length] 362 else: 363 record['city'] = '' 364 365 record_buf_pos += str_length + 1 366 str_length = 0 367 368 # get the postal code 369 char = ord(record_buf[record_buf_pos+str_length]) 370 while (char != 0): 371 str_length += 1 372 char = ord(record_buf[record_buf_pos+str_length]) 373 374 if str_length > 0: 375 record['postal_code'] = record_buf[record_buf_pos:record_buf_pos+str_length] 376 else: 377 record['postal_code'] = None 378 379 record_buf_pos += str_length + 1 380 str_length = 0 381 382 latitude = 0 383 longitude = 0 384 for j in range(3): 385 char = ord(record_buf[record_buf_pos]) 386 record_buf_pos += 1 387 latitude += (char << (j * 8)) 388 389 record['latitude'] = (latitude/10000.0) - 180.0 390 391 for j in range(3): 392 char = ord(record_buf[record_buf_pos]) 393 record_buf_pos += 1 394 longitude += (char << (j * 8)) 395 396 record['longitude'] = (longitude/10000.0) - 180.0 397 398 if self._databaseType == const.CITY_EDITION_REV1: 399 dmaarea_combo = 0 400 if record['country_code'] == 'US': 401 for j in range(3): 402 char = ord(record_buf[record_buf_pos]) 403 record_buf_pos += 1 404 dmaarea_combo += (char << (j*8)) 405 406 record['dma_code'] = int(math.floor(dmaarea_combo/1000)) 407 record['area_code'] = dmaarea_combo%1000 408 else: 409 record['dma_code'] = 0 410 record['area_code'] = 0 411 412 if 'dma_code' in record and record['dma_code'] in const.DMA_MAP: 413 record['metro_code'] = const.DMA_MAP[record['dma_code']] 414 else: 415 record['metro_code'] = '' 416 417 if 'country_code' in record: 418 record['time_zone'] = time_zone_by_country_and_region( 419 record['country_code'], record.get('region_name')) or '' 420 else: 421 record['time_zone'] = '' 422 423 return record
424
425 - def country_code_by_addr(self, addr):
426 """ 427 Returns 2-letter country code (e.g. 'US') for specified IP address. 428 Use this method if you have a Country, Region, or City database. 429 430 @param addr: IP address 431 @type addr: str 432 @return: 2-letter country code 433 @rtype: str 434 """ 435 try: 436 if self._databaseType == const.COUNTRY_EDITION: 437 country_id = self._lookup_country_id(addr) 438 return const.COUNTRY_CODES[country_id] 439 elif self._databaseType in (const.REGION_EDITION_REV0, const.REGION_EDITION_REV1, 440 const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 441 return self.region_by_addr(addr)['country_code'] 442 else: 443 raise GeoIPError('Invalid database type; country_* methods expect '\ 444 'Country, City, or Region database') 445 446 except ValueError: 447 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
448
449 - def country_code_by_name(self, hostname):
450 """ 451 Returns 2-letter country code (e.g. 'US') for specified hostname. 452 Use this method if you have a Country, Region, or City database. 453 454 @param hostname: host name 455 @type hostname: str 456 @return: 2-letter country code 457 @rtype: str 458 """ 459 addr = socket.gethostbyname(hostname) 460 461 return self.country_code_by_addr(addr)
462
463 - def country_name_by_addr(self, addr):
464 """ 465 Returns full country name for specified IP address. 466 Use this method if you have a Country or City database. 467 468 @param addr: IP address 469 @type addr: str 470 @return: country name 471 @rtype: str 472 """ 473 try: 474 if self._databaseType == const.COUNTRY_EDITION: 475 country_id = self._lookup_country_id(addr) 476 return const.COUNTRY_NAMES[country_id] 477 elif self._databaseType in (const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 478 return self.record_by_addr(addr)['country_name'] 479 else: 480 raise GeoIPError('Invalid database type; country_* methods expect '\ 481 'Country or City database') 482 except ValueError: 483 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
484
485 - def country_name_by_name(self, hostname):
486 """ 487 Returns full country name for specified hostname. 488 Use this method if you have a Country database. 489 490 @param hostname: host name 491 @type hostname: str 492 @return: country name 493 @rtype: str 494 """ 495 addr = socket.gethostbyname(hostname) 496 return self.country_name_by_addr(addr)
497
498 - def org_by_addr(self, addr):
499 """ 500 Lookup the organization (or ISP) for given IP address. 501 Use this method if you have an Organization/ISP database. 502 503 @param addr: IP address 504 @type addr: str 505 @return: organization or ISP name 506 @rtype: str 507 """ 508 try: 509 ipnum = ip2long(addr) 510 511 if not ipnum: 512 raise ValueError("Invalid IP address: %s" % addr) 513 514 if self._databaseType not in (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION): 515 raise GeoIPError('Invalid database type; org_* methods expect '\ 516 'Org/ISP database') 517 518 return self._get_org(ipnum) 519 except ValueError: 520 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
521
522 - def org_by_name(self, hostname):
523 """ 524 Lookup the organization (or ISP) for hostname. 525 Use this method if you have an Organization/ISP database. 526 527 @param hostname: host name 528 @type hostname: str 529 @return: organization or ISP name 530 @rtype: str 531 """ 532 addr = socket.gethostbyname(hostname) 533 534 return self.org_by_addr(addr)
535
536 - def record_by_addr(self, addr):
537 """ 538 Look up the record for a given IP address. 539 Use this method if you have a City database. 540 541 @param addr: IP address 542 @type addr: str 543 @return: dict with country_code, country_code3, country_name, 544 region, city, postal_code, latitude, longitude, 545 dma_code, metro_code, area_code, region_name, time_zone 546 @rtype: dict 547 """ 548 try: 549 ipnum = ip2long(addr) 550 551 if not ipnum: 552 raise ValueError("Invalid IP address: %s" % addr) 553 554 if not self._databaseType in (const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 555 raise GeoIPError('Invalid database type; record_* methods expect City database') 556 557 return self._get_record(ipnum) 558 except ValueError: 559 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
560
561 - def record_by_name(self, hostname):
562 """ 563 Look up the record for a given hostname. 564 Use this method if you have a City database. 565 566 @param hostname: host name 567 @type hostname: str 568 @return: dict with country_code, country_code3, country_name, 569 region, city, postal_code, latitude, longitude, 570 dma_code, metro_code, area_code, region_name, time_zone 571 @rtype: dict 572 """ 573 addr = socket.gethostbyname(hostname) 574 575 return self.record_by_addr(addr)
576
577 - def region_by_addr(self, addr):
578 """ 579 Lookup the region for given IP address. 580 Use this method if you have a Region database. 581 582 @param addr: IP address 583 @type addr: str 584 @return: dict containing country_code, region, 585 and region_name 586 @rtype: dict 587 """ 588 try: 589 ipnum = ip2long(addr) 590 591 if not ipnum: 592 raise ValueError("Invalid IP address: %s" % addr) 593 594 if not self._databaseType in (const.REGION_EDITION_REV0, const.REGION_EDITION_REV1, 595 const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 596 raise GeoIPError('Invalid database type; region_* methods expect '\ 597 'Region or City database') 598 599 return self._get_region(ipnum) 600 except ValueError: 601 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
602
603 - def region_by_name(self, hostname):
604 """ 605 Lookup the region for given hostname. 606 Use this method if you have a Region database. 607 608 @param hostname: host name 609 @type hostname: str 610 @return: dict containing country_code, region, 611 and region_name 612 @rtype: dict 613 """ 614 addr = socket.gethostbyname(hostname) 615 return self.region_by_addr(addr)
616
617 - def time_zone_by_addr(self, addr):
618 """ 619 Look up the time zone for a given IP address. 620 Use this method if you have a Region or City database. 621 622 @param addr: IP address 623 @type addr: str 624 @return: Time zone 625 @rtype: str 626 """ 627 try: 628 ipnum = ip2long(addr) 629 630 if not ipnum: 631 raise ValueError("Invalid IP address: %s" % addr) 632 633 if not self._databaseType in (const.REGION_EDITION_REV0, const.REGION_EDITION_REV1, 634 const.CITY_EDITION_REV0, const.CITY_EDITION_REV1): 635 raise GeoIPError('Invalid database type; region_* methods expect '\ 636 'Region or City database') 637 638 return self._get_record(ipnum)['time_zone'] 639 except ValueError: 640 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
641
642 - def time_zone_by_name(self, hostname):
643 """ 644 Look up the time zone for a given hostname. 645 Use this method if you have a Region or City database. 646 647 @param hostname: host name 648 @type hostname: str 649 @return: Time zone 650 @rtype: str 651 """ 652 addr = socket.gethostbyname(hostname) 653 return self.time_zone_by_addr(addr)
654