Package pygeoip
[hide private]
[frames] | no frames]

Source Code for Package pygeoip

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Pure Python GeoIP API 
  4   
  5  The API is based on U{MaxMind's C-based Python 
  6  API<http://www.maxmind.com/app/python>}, but the code itself is based on 
  7  the U{pure PHP5 API<http://pear.php.net/package/Net_GeoIP/>} by Jim Winstead 
  8  and Hans Lellelid. 
  9   
 10  It is mostly a drop-in replacement, except the C{new} and C{open} methods 
 11  are gone. You should instantiate the L{GeoIP} class yourself: 
 12   
 13  C{gi = GeoIP('/path/to/GeoIP.dat', pygeoip.MEMORY_CACHE)} 
 14   
 15  @author: Jennifer Ennis <zaylea at gmail dot com> 
 16  @author: William Tisäter <william@defunct.cc> 
 17   
 18  @license: 
 19  Copyright(C) 2004 MaxMind LLC 
 20   
 21  This program is free software: you can redistribute it and/or modify 
 22  it under the terms of the GNU Lesser General Public License as published by 
 23  the Free Software Foundation, either version 3 of the License, or 
 24  (at your option) any later version. 
 25   
 26  This program is distributed in the hope that it will be useful, 
 27  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 28  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 29  GNU General Public License for more details. 
 30   
 31  You should have received a copy of the GNU Lesser General Public License 
 32  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 33  """ 
 34   
 35  import os 
 36  import math 
 37  import socket 
 38  import mmap 
 39  import codecs 
 40  from threading import Lock 
 41   
 42  try: 
 43      from StringIO import StringIO 
 44  except ImportError: 
 45      from io import StringIO 
 46   
 47  import pygeoip.const 
 48  from pygeoip import util 
 49  from pygeoip.const import PY2, PY3 
 50  from pygeoip.timezone import time_zone_by_country_and_region 
 51   
 52   
 53  MMAP_CACHE = const.MMAP_CACHE 
 54  MEMORY_CACHE = const.MEMORY_CACHE 
 55  STANDARD = const.STANDARD 
 56   
 57   
58 -class GeoIPError(Exception):
59 pass
60 61
62 -class GeoIPMetaclass(type):
63 - def __new__(cls, *args, **kwargs):
64 """ 65 Singleton method to gets an instance without reparsing the db. Unique 66 instances are instantiated based on the filename of the db. Flags are 67 ignored for this, i.e. if you initialize one with STANDARD 68 flag (default) and then try later to initialize with MEMORY_CACHE, it 69 will still return the STANDARD one. 70 """ 71 if not hasattr(cls, '_instances'): 72 cls._instances = {} 73 74 if len(args) > 0: 75 filename = args[0] 76 elif 'filename' in kwargs: 77 filename = kwargs['filename'] 78 79 if filename not in cls._instances: 80 cls._instances[filename] = type.__new__(cls, *args, **kwargs) 81 82 return cls._instances[filename]
83 84 85 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {}) 86 87
88 -class GeoIP(GeoIPBase):
89 - def __init__(self, filename, flags=0):
90 """ 91 Initialize the class. 92 93 @param filename: Path to a geoip database. 94 @type filename: str 95 @param flags: Flags that affect how the database is processed. 96 Currently supported flags are STANDARD (the default), 97 MEMORY_CACHE (preload the whole file into memory) and 98 MMAP_CACHE (access the file via mmap). 99 @type flags: int 100 """ 101 self._filename = filename 102 self._flags = flags 103 104 if self._flags & const.MMAP_CACHE: 105 f = open(filename, 'rb') 106 access = mmap.ACCESS_READ 107 self._filehandle = mmap.mmap(f.fileno(), 0, access=access) 108 f.close() 109 110 elif self._flags & const.MEMORY_CACHE: 111 f = open(filename, 'rb') 112 self._memoryBuffer = f.read() 113 self._filehandle = StringIO(self._memoryBuffer) 114 f.close() 115 116 else: 117 self._filehandle = codecs.open(filename, 'rb', 'iso-8859-1') 118 119 self._lock = Lock() 120 self._setup_segments()
121
122 - def _setup_segments(self):
123 """ 124 Parses the database file to determine what kind of database is 125 being used and setup segment sizes and start points that will 126 be used by the seek*() methods later. 127 128 Supported databases: 129 130 * COUNTRY_EDITION 131 * REGION_EDITION_REV0 132 * REGION_EDITION_REV1 133 * CITY_EDITION_REV0 134 * CITY_EDITION_REV1 135 * ORG_EDITION 136 * ISP_EDITION 137 * ASNUM_EDITION 138 139 """ 140 self._databaseType = const.COUNTRY_EDITION 141 self._recordLength = const.STANDARD_RECORD_LENGTH 142 self._databaseSegments = const.COUNTRY_BEGIN 143 144 self._lock.acquire() 145 filepos = self._filehandle.tell() 146 self._filehandle.seek(-3, os.SEEK_END) 147 148 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 149 chars = chr(255) * 3 150 flag = 'unicode_escape' 151 delim = self._filehandle.read(3) 152 if (delim == chars) if PY3 else (delim == unicode(chars, flag)): 153 self._databaseType = ord(self._filehandle.read(1)) 154 155 # Compatibility with databases from April 2003 and earlier 156 if (self._databaseType >= 106): 157 self._databaseType -= 105 158 159 if self._databaseType == const.REGION_EDITION_REV0: 160 self._databaseSegments = const.STATE_BEGIN_REV0 161 162 elif self._databaseType == const.REGION_EDITION_REV1: 163 self._databaseSegments = const.STATE_BEGIN_REV1 164 165 elif self._databaseType in (const.CITY_EDITION_REV0, 166 const.CITY_EDITION_REV1, 167 const.ORG_EDITION, 168 const.ISP_EDITION, 169 const.ASNUM_EDITION): 170 self._databaseSegments = 0 171 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH) 172 173 for j in range(const.SEGMENT_RECORD_LENGTH): 174 self._databaseSegments += (ord(buf[j]) << (j * 8)) 175 176 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) 177 if self._databaseType in LONG_RECORDS: 178 self._recordLength = const.ORG_RECORD_LENGTH 179 break 180 else: 181 self._filehandle.seek(-4, os.SEEK_CUR) 182 183 self._filehandle.seek(filepos, os.SEEK_SET) 184 self._lock.release()
185
186 - def _seek_country(self, ipnum):
187 """ 188 Using the record length and appropriate start points, seek to the 189 country that corresponds to the converted IP address integer. 190 191 @param ipnum: result of ip2long conversion 192 @type ipnum: int 193 @return: offset of start of record 194 @rtype: int 195 """ 196 offset = 0 197 seek_depth = 127 if len(str(ipnum)) > 10 else 31 198 199 for depth in range(seek_depth, -1, -1): 200 if self._flags & const.MEMORY_CACHE: 201 startIndex = 2 * self._recordLength * offset 202 endIndex = startIndex + (2 * self._recordLength) 203 buf = self._memoryBuffer[startIndex:endIndex] 204 else: 205 startIndex = 2 * self._recordLength * offset 206 readLength = 2 * self._recordLength 207 self._lock.acquire() 208 self._filehandle.seek(startIndex, os.SEEK_SET) 209 buf = self._filehandle.read(readLength) 210 self._lock.release() 211 212 x = [0, 0] 213 for i in range(2): 214 for j in range(self._recordLength): 215 x[i] += ord(buf[self._recordLength * i + j]) << (j * 8) 216 if ipnum & (1 << depth): 217 if x[1] >= self._databaseSegments: 218 return x[1] 219 offset = x[1] 220 else: 221 if x[0] >= self._databaseSegments: 222 return x[0] 223 offset = x[0] 224 225 raise GeoIPError('Corrupt database')
226
227 - def _get_org(self, ipnum):
228 """ 229 Seek and return organization or ISP name for ipnum. 230 @param ipnum: Converted IP address 231 @type ipnum: int 232 @return: org/isp name 233 @rtype: str 234 """ 235 seek_org = self._seek_country(ipnum) 236 if seek_org == self._databaseSegments: 237 return None 238 239 read_length = (2 * self._recordLength - 1) * self._databaseSegments 240 self._lock.acquire() 241 self._filehandle.seek(seek_org + read_length, os.SEEK_SET) 242 org_buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH) 243 self._lock.release() 244 245 return org_buf[:org_buf.index(chr(0))]
246
247 - def _get_region(self, ipnum):
248 """ 249 Seek and return the region info (dict containing country_code 250 and region_name). 251 252 @param ipnum: Converted IP address 253 @type ipnum: int 254 @return: dict containing country_code and region_name 255 @rtype: dict 256 """ 257 region = '' 258 country_code = '' 259 seek_country = self._seek_country(ipnum) 260 261 def get_region_name(offset): 262 region1 = chr(offset // 26 + 65) 263 region2 = chr(offset % 26 + 65) 264 return ''.join([region1, region2])
265 266 if self._databaseType == const.REGION_EDITION_REV0: 267 seek_region = seek_country - const.STATE_BEGIN_REV0 268 if seek_region >= 1000: 269 country_code = 'US' 270 region = get_region_name(seek_region - 1000) 271 else: 272 country_code = const.COUNTRY_CODES[seek_region] 273 elif self._databaseType == const.REGION_EDITION_REV1: 274 seek_region = seek_country - const.STATE_BEGIN_REV1 275 if seek_region < const.US_OFFSET: 276 pass 277 elif seek_region < const.CANADA_OFFSET: 278 country_code = 'US' 279 region = get_region_name(seek_region - const.US_OFFSET) 280 elif seek_region < const.WORLD_OFFSET: 281 country_code = 'CA' 282 region = get_region_name(seek_region - const.CANADA_OFFSET) 283 else: 284 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 285 if index in const.COUNTRY_CODES: 286 country_code = const.COUNTRY_CODES[index] 287 elif self._databaseType in const.CITY_EDITIONS: 288 rec = self._get_record(ipnum) 289 country_code = rec['country_code'] if 'country_code' in rec else '' 290 region = rec['region_name'] if 'region_name' in rec else '' 291 292 return {'country_code': country_code, 'region_name': region}
293
294 - def _get_record(self, ipnum):
295 """ 296 Populate location dict for converted IP. 297 298 @param ipnum: Converted IP address 299 @type ipnum: int 300 @return: dict with country_code, country_code3, country_name, 301 region, city, postal_code, latitude, longitude, 302 dma_code, metro_code, area_code, region_name, time_zone 303 @rtype: dict 304 """ 305 seek_country = self._seek_country(ipnum) 306 if seek_country == self._databaseSegments: 307 return None 308 309 read_length = (2 * self._recordLength - 1) * self._databaseSegments 310 self._lock.acquire() 311 self._filehandle.seek(seek_country + read_length, os.SEEK_SET) 312 record_buf = self._filehandle.read(const.FULL_RECORD_LENGTH) 313 self._lock.release() 314 315 record = { 316 'dma_code': 0, 317 'area_code': 0, 318 'metro_code': '', 319 'postal_code': '' 320 } 321 322 latitude = 0 323 longitude = 0 324 record_buf_pos = 0 325 326 # Get country 327 char = ord(record_buf[record_buf_pos]) 328 record['country_code'] = const.COUNTRY_CODES[char] 329 record['country_code3'] = const.COUNTRY_CODES3[char] 330 record['country_name'] = const.COUNTRY_NAMES[char] 331 record_buf_pos += 1 332 333 def get_data(record_buf, record_buf_pos): 334 offset = record_buf_pos 335 char = ord(record_buf[offset]) 336 while (char != 0): 337 offset += 1 338 char = ord(record_buf[offset]) 339 if offset > record_buf_pos: 340 return (offset, record_buf[record_buf_pos:offset]) 341 return (offset, '')
342 343 offset, record['region_name'] = get_data(record_buf, record_buf_pos) 344 offset, record['city'] = get_data(record_buf, offset + 1) 345 offset, record['postal_code'] = get_data(record_buf, offset + 1) 346 record_buf_pos = offset + 1 347 348 for j in range(3): 349 char = ord(record_buf[record_buf_pos]) 350 record_buf_pos += 1 351 latitude += (char << (j * 8)) 352 353 for j in range(3): 354 char = ord(record_buf[record_buf_pos]) 355 record_buf_pos += 1 356 longitude += (char << (j * 8)) 357 358 record['latitude'] = (latitude / 10000.0) - 180.0 359 record['longitude'] = (longitude / 10000.0) - 180.0 360 361 if self._databaseType == const.CITY_EDITION_REV1: 362 dmaarea_combo = 0 363 if record['country_code'] == 'US': 364 for j in range(3): 365 char = ord(record_buf[record_buf_pos]) 366 dmaarea_combo += (char << (j * 8)) 367 record_buf_pos += 1 368 369 record['dma_code'] = int(math.floor(dmaarea_combo / 1000)) 370 record['area_code'] = dmaarea_combo % 1000 371 372 if record['dma_code'] in const.DMA_MAP: 373 record['metro_code'] = const.DMA_MAP[record['dma_code']] 374 375 params = (record['country_code'], record['region_name']) 376 record['time_zone'] = time_zone_by_country_and_region(*params) 377 378 return record 379
380 - def _gethostbyname(self, hostname):
381 if self._databaseType in const.IPV6_EDITIONS: 382 try: 383 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) 384 except socket.gaierror: 385 return '' 386 family, socktype, proto, canonname, sockaddr = response[0] 387 address, port, flow, scope = sockaddr 388 return address 389 else: 390 return socket.gethostbyname(hostname)
391
392 - def id_by_addr(self, addr):
393 """ 394 Get the country index. 395 Looks up the index for the country which is the key for 396 the code and name. 397 398 @param addr: The IP address 399 @type addr: str 400 @return: network byte order 32-bit integer 401 @rtype: int 402 """ 403 ipnum = util.ip2long(addr) 404 if not ipnum: 405 raise ValueError("Invalid IP address: %s" % addr) 406 407 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 408 if self._databaseType not in COUNTY_EDITIONS: 409 message = 'Invalid database type, expected Country' 410 raise GeoIPError(message) 411 412 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
413
414 - def country_code_by_addr(self, addr):
415 """ 416 Returns 2-letter country code (e.g. 'US') for specified IP address. 417 Use this method if you have a Country, Region, or City database. 418 419 @param addr: IP address 420 @type addr: str 421 @return: 2-letter country code 422 @rtype: str 423 """ 424 try: 425 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 426 if self._databaseType in VALID_EDITIONS: 427 ipv = 6 if addr.find(':') >= 0 else 4 428 429 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: 430 message = 'Invalid database type; expected IPv6 address' 431 raise ValueError(message) 432 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: 433 message = 'Invalid database type; expected IPv4 address' 434 raise ValueError(message) 435 436 country_id = self.id_by_addr(addr) 437 438 return const.COUNTRY_CODES[country_id] 439 elif self._databaseType in const.REGION_CITY_EDITIONS: 440 return self.region_by_addr(addr)['country_code'] 441 442 message = 'Invalid database type, expected Country, City or Region' 443 raise GeoIPError(message) 444 except ValueError: 445 raise GeoIPError('Failed to lookup address %s' % addr)
446
447 - def country_code_by_name(self, hostname):
448 """ 449 Returns 2-letter country code (e.g. 'US') for specified hostname. 450 Use this method if you have a Country, Region, or City database. 451 452 @param hostname: Hostname 453 @type hostname: str 454 @return: 2-letter country code 455 @rtype: str 456 """ 457 addr = self._gethostbyname(hostname) 458 return self.country_code_by_addr(addr)
459
460 - def country_name_by_addr(self, addr):
461 """ 462 Returns full country name for specified IP address. 463 Use this method if you have a Country or City database. 464 465 @param addr: IP address 466 @type addr: str 467 @return: country name 468 @rtype: str 469 """ 470 try: 471 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 472 if self._databaseType in VALID_EDITIONS: 473 return const.COUNTRY_NAMES[self.id_by_addr(addr)] 474 elif self._databaseType in const.CITY_EDITIONS: 475 return self.record_by_addr(addr)['country_name'] 476 else: 477 message = 'Invalid database type, expected Country or City' 478 raise GeoIPError(message) 479 except ValueError: 480 raise GeoIPError('Failed to lookup address %s' % addr)
481
482 - def country_name_by_name(self, hostname):
483 """ 484 Returns full country name for specified hostname. 485 Use this method if you have a Country database. 486 487 @param hostname: Hostname 488 @type hostname: str 489 @return: country name 490 @rtype: str 491 """ 492 addr = self._gethostbyname(hostname) 493 return self.country_name_by_addr(addr)
494
495 - def org_by_addr(self, addr):
496 """ 497 Lookup Organization, ISP or ASNum for given IP address. 498 Use this method if you have an Organization, ISP or ASNum database. 499 500 @param addr: IP address 501 @type addr: str 502 @return: organization or ISP name 503 @rtype: str 504 """ 505 try: 506 ipnum = util.ip2long(addr) 507 if not ipnum: 508 raise ValueError('Invalid IP address') 509 510 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION) 511 if self._databaseType not in valid: 512 message = 'Invalid database type, expected Org, ISP or ASNum' 513 raise GeoIPError(message) 514 515 return self._get_org(ipnum) 516 except ValueError: 517 raise GeoIPError('Failed to lookup address %s' % addr)
518
519 - def org_by_name(self, hostname):
520 """ 521 Lookup the organization (or ISP) for hostname. 522 Use this method if you have an Organization/ISP database. 523 524 @param hostname: Hostname 525 @type hostname: str 526 @return: Organization or ISP name 527 @rtype: str 528 """ 529 addr = socket.gethostbyname(hostname) 530 return self.org_by_addr(addr)
531
532 - def record_by_addr(self, addr):
533 """ 534 Look up the record for a given IP address. 535 Use this method if you have a City database. 536 537 @param addr: IP address 538 @type addr: str 539 @return: Dictionary with country_code, country_code3, country_name, 540 region, city, postal_code, latitude, longitude, dma_code, 541 metro_code, area_code, region_name, time_zone 542 @rtype: dict 543 """ 544 try: 545 ipnum = util.ip2long(addr) 546 if not ipnum: 547 raise ValueError('Invalid IP address') 548 549 if self._databaseType not in const.CITY_EDITIONS: 550 message = 'Invalid database type, expected City' 551 raise GeoIPError(message) 552 553 return self._get_record(ipnum) 554 except ValueError: 555 raise GeoIPError('Failed to lookup address %s' % addr)
556
557 - def record_by_name(self, hostname):
558 """ 559 Look up the record for a given hostname. 560 Use this method if you have a City database. 561 562 @param hostname: Hostname 563 @type hostname: str 564 @return: Dictionary with country_code, country_code3, country_name, 565 region, city, postal_code, latitude, longitude, dma_code, 566 metro_code, area_code, region_name, time_zone 567 @rtype: dict 568 """ 569 addr = socket.gethostbyname(hostname) 570 return self.record_by_addr(addr)
571
572 - def region_by_addr(self, addr):
573 """ 574 Lookup the region for given IP address. 575 Use this method if you have a Region database. 576 577 @param addr: IP address 578 @type addr: str 579 @return: Dictionary containing country_code, region and region_name 580 @rtype: dict 581 """ 582 try: 583 ipnum = util.ip2long(addr) 584 if not ipnum: 585 raise ValueError('Invalid IP address') 586 587 if self._databaseType not in const.REGION_CITY_EDITIONS: 588 message = 'Invalid database type, expected Region or City' 589 raise GeoIPError(message) 590 591 return self._get_region(ipnum) 592 except ValueError: 593 raise GeoIPError('Failed to lookup address %s' % addr)
594
595 - def region_by_name(self, hostname):
596 """ 597 Lookup the region for given hostname. 598 Use this method if you have a Region database. 599 600 @param hostname: Hostname 601 @type hostname: str 602 @return: Dictionary containing country_code, region, and region_name 603 @rtype: dict 604 """ 605 addr = socket.gethostbyname(hostname) 606 return self.region_by_addr(addr)
607
608 - def time_zone_by_addr(self, addr):
609 """ 610 Look up the time zone for a given IP address. 611 Use this method if you have a Region or City database. 612 613 @param addr: IP address 614 @type addr: str 615 @return: Time zone 616 @rtype: str 617 """ 618 try: 619 ipnum = util.ip2long(addr) 620 if not ipnum: 621 raise ValueError('Invalid IP address') 622 623 if self._databaseType not in const.CITY_EDITIONS: 624 message = 'Invalid database type, expected City' 625 raise GeoIPError(message) 626 627 return self._get_record(ipnum)['time_zone'] 628 except ValueError: 629 raise GeoIPError('Failed to lookup address %s' % addr)
630
631 - def time_zone_by_name(self, hostname):
632 """ 633 Look up the time zone for a given hostname. 634 Use this method if you have a Region or City database. 635 636 @param hostname: Hostname 637 @type hostname: str 638 @return: Time zone 639 @rtype: str 640 """ 641 addr = socket.gethostbyname(hostname) 642 return self.time_zone_by_addr(addr)
643