Package pygeoip
[frames] | no frames]

Source Code for Package pygeoip

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Pure Python GeoIP API 
  4   
  5  The API is based on MaxMind's C-based Python API, but the code itself is 
  6  ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid. 
  7   
  8  @author: Jennifer Ennis <zaylea@gmail.com> 
  9   
 10  @license: Copyright(C) 2004 MaxMind LLC 
 11   
 12  This program is free software: you can redistribute it and/or modify 
 13  it under the terms of the GNU Lesser General Public License as published by 
 14  the Free Software Foundation, either version 3 of the License, or 
 15  (at your option) any later version. 
 16   
 17  This program is distributed in the hope that it will be useful, 
 18  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 20  GNU General Public License for more details. 
 21   
 22  You should have received a copy of the GNU Lesser General Public License 
 23  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 24  """ 
 25   
 26  import os 
 27  import math 
 28  import socket 
 29  import codecs 
 30  from threading import Lock 
 31   
 32  try: 
 33      import mmap 
 34  except ImportError: 
 35      mmap = None 
 36   
 37  try: 
 38      from StringIO import StringIO 
 39  except ImportError: 
 40      from io import StringIO, BytesIO 
 41   
 42  from pygeoip import util, const 
 43  from pygeoip.const import PY2, PY3 
 44  from pygeoip.timezone import time_zone_by_country_and_region 
 45   
 46   
 47  STANDARD = const.STANDARD 
 48  MMAP_CACHE = const.MMAP_CACHE 
 49  MEMORY_CACHE = const.MEMORY_CACHE 
 50   
 51  ENCODING = const.ENCODING 
 52   
 53   
54 -class _GeoIPMetaclass(type):
55 - def __new__(cls, *args, **kwargs):
56 """ Singleton method to gets an instance without reparsing 57 the database, the filename is being used as cache key. 58 """ 59 if not hasattr(cls, '_instances'): 60 cls._instances = {} 61 62 if len(args) > 0: 63 filename = args[0] 64 elif 'filename' in kwargs: 65 filename = kwargs['filename'] 66 67 if filename not in cls._instances: 68 cls._instances[filename] = type.__new__(cls, *args, **kwargs) 69 70 return cls._instances[filename]
71 72 73 _GeoIPBase = _GeoIPMetaclass('GeoIPBase', (object,), {}) 74 75
76 -class GeoIPError(Exception):
77 pass
78 79
80 -class GeoIP(_GeoIPBase):
81 - def __init__(self, filename, flags=0):
82 """ 83 Initialize the class. 84 85 @param filename: Path to a geoip database. 86 @type filename: str 87 @param flags: Flags that affect how the database is processed. 88 Currently supported flags are STANDARD (the default), 89 MEMORY_CACHE (preload the whole file into memory) and 90 MMAP_CACHE (access the file via mmap). 91 @type flags: int 92 """ 93 self._filename = filename 94 self._flags = flags 95 96 if self._flags & const.MMAP_CACHE and mmap is None: 97 import warnings 98 warnings.warn("MMAP_CACHE cannot be used without a mmap module") 99 self._flags &= ~const.MMAP_CACHE 100 101 elif self._flags & const.MMAP_CACHE: 102 f = open(filename, 'rb') 103 access = mmap.ACCESS_READ 104 self._filehandle = mmap.mmap(f.fileno(), 0, access=access) 105 f.close() 106 107 elif self._flags & const.MEMORY_CACHE: 108 f = open(filename, 'rb') 109 self._memoryBuffer = f.read() 110 iohandle = BytesIO if PY3 else StringIO 111 self._filehandle = iohandle(self._memoryBuffer) 112 f.close() 113 114 else: 115 self._filehandle = codecs.open(filename, 'rb', ENCODING) 116 117 self._lock = Lock() 118 self._setup_segments()
119
120 - def _setup_segments(self):
121 """ 122 Parses the database file to determine what kind of database is 123 being used and setup segment sizes and start points that will 124 be used by the seek*() methods later. 125 126 Supported databases: 127 128 * COUNTRY_EDITION 129 * COUNTRY_EDITION_V6 130 * REGION_EDITION_REV0 131 * REGION_EDITION_REV1 132 * CITY_EDITION_REV0 133 * CITY_EDITION_REV1 134 * CITY_EDITION_REV1_V6 135 * ORG_EDITION 136 * ISP_EDITION 137 * ASNUM_EDITION 138 * ASNUM_EDITION_V6 139 140 """ 141 self._databaseType = const.COUNTRY_EDITION 142 self._recordLength = const.STANDARD_RECORD_LENGTH 143 self._databaseSegments = const.COUNTRY_BEGIN 144 145 self._lock.acquire() 146 filepos = self._filehandle.tell() 147 self._filehandle.seek(-3, os.SEEK_END) 148 149 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 150 chars = chr(255) * 3 151 delim = self._filehandle.read(3) 152 153 if PY3 and type(delim) is bytes: 154 delim = delim.decode(ENCODING) 155 156 if PY2: 157 chars = chars.decode(ENCODING) 158 if type(delim) is str: 159 delim = delim.decode(ENCODING) 160 161 if delim == chars: 162 byte = self._filehandle.read(1) 163 self._databaseType = ord(byte) 164 165 # Compatibility with databases from April 2003 and earlier 166 if (self._databaseType >= 106): 167 self._databaseType -= 105 168 169 if self._databaseType == const.REGION_EDITION_REV0: 170 self._databaseSegments = const.STATE_BEGIN_REV0 171 172 elif self._databaseType == const.REGION_EDITION_REV1: 173 self._databaseSegments = const.STATE_BEGIN_REV1 174 175 elif self._databaseType in (const.CITY_EDITION_REV0, 176 const.CITY_EDITION_REV1, 177 const.CITY_EDITION_REV1_V6, 178 const.ORG_EDITION, 179 const.ISP_EDITION, 180 const.ASNUM_EDITION, 181 const.ASNUM_EDITION_V6): 182 self._databaseSegments = 0 183 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH) 184 185 if PY3 and type(buf) is bytes: 186 buf = buf.decode(ENCODING) 187 188 for j in range(const.SEGMENT_RECORD_LENGTH): 189 self._databaseSegments += (ord(buf[j]) << (j * 8)) 190 191 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) 192 if self._databaseType in LONG_RECORDS: 193 self._recordLength = const.ORG_RECORD_LENGTH 194 break 195 else: 196 self._filehandle.seek(-4, os.SEEK_CUR) 197 198 self._filehandle.seek(filepos, os.SEEK_SET) 199 self._lock.release()
200
201 - def _seek_country(self, ipnum):
202 """ 203 Using the record length and appropriate start points, seek to the 204 country that corresponds to the converted IP address integer. 205 206 @param ipnum: result of ip2long conversion 207 @type ipnum: int 208 @return: offset of start of record 209 @rtype: int 210 """ 211 try: 212 offset = 0 213 seek_depth = 127 if len(str(ipnum)) > 10 else 31 214 215 for depth in range(seek_depth, -1, -1): 216 if self._flags & const.MEMORY_CACHE: 217 startIndex = 2 * self._recordLength * offset 218 endIndex = startIndex + (2 * self._recordLength) 219 buf = self._memoryBuffer[startIndex:endIndex] 220 else: 221 startIndex = 2 * self._recordLength * offset 222 readLength = 2 * self._recordLength 223 self._lock.acquire() 224 self._filehandle.seek(startIndex, os.SEEK_SET) 225 buf = self._filehandle.read(readLength) 226 self._lock.release() 227 228 if PY3 and type(buf) is bytes: 229 buf = buf.decode(ENCODING) 230 231 x = [0, 0] 232 for i in range(2): 233 for j in range(self._recordLength): 234 byte = buf[self._recordLength * i + j] 235 x[i] += ord(byte) << (j * 8) 236 if ipnum & (1 << depth): 237 if x[1] >= self._databaseSegments: 238 return x[1] 239 offset = x[1] 240 else: 241 if x[0] >= self._databaseSegments: 242 return x[0] 243 offset = x[0] 244 except: 245 pass 246 247 raise GeoIPError('Corrupt database')
248
249 - def _get_org(self, ipnum):
250 """ 251 Seek and return organization or ISP name for ipnum. 252 @param ipnum: Converted IP address 253 @type ipnum: int 254 @return: org/isp name 255 @rtype: str 256 """ 257 seek_org = self._seek_country(ipnum) 258 if seek_org == self._databaseSegments: 259 return None 260 261 read_length = (2 * self._recordLength - 1) * self._databaseSegments 262 self._lock.acquire() 263 self._filehandle.seek(seek_org + read_length, os.SEEK_SET) 264 buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH) 265 self._lock.release() 266 267 if PY3 and type(buf) is bytes: 268 buf = buf.decode(ENCODING) 269 270 return buf[:buf.index(chr(0))]
271
272 - def _get_region(self, ipnum):
273 """ 274 Seek and return the region info (dict containing country_code 275 and region_name). 276 277 @param ipnum: Converted IP address 278 @type ipnum: int 279 @return: dict containing country_code and region_name 280 @rtype: dict 281 """ 282 region = '' 283 country_code = '' 284 seek_country = self._seek_country(ipnum) 285 286 def get_region_name(offset): 287 region1 = chr(offset // 26 + 65) 288 region2 = chr(offset % 26 + 65) 289 return ''.join([region1, region2])
290 291 if self._databaseType == const.REGION_EDITION_REV0: 292 seek_region = seek_country - const.STATE_BEGIN_REV0 293 if seek_region >= 1000: 294 country_code = 'US' 295 region = get_region_name(seek_region - 1000) 296 else: 297 country_code = const.COUNTRY_CODES[seek_region] 298 elif self._databaseType == const.REGION_EDITION_REV1: 299 seek_region = seek_country - const.STATE_BEGIN_REV1 300 if seek_region < const.US_OFFSET: 301 pass 302 elif seek_region < const.CANADA_OFFSET: 303 country_code = 'US' 304 region = get_region_name(seek_region - const.US_OFFSET) 305 elif seek_region < const.WORLD_OFFSET: 306 country_code = 'CA' 307 region = get_region_name(seek_region - const.CANADA_OFFSET) 308 else: 309 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 310 if index in const.COUNTRY_CODES: 311 country_code = const.COUNTRY_CODES[index] 312 elif self._databaseType in const.CITY_EDITIONS: 313 rec = self._get_record(ipnum) 314 region = rec.get('region_name', '') 315 country_code = rec.get('country_code', '') 316 317 return {'country_code': country_code, 'region_name': region}
318
319 - def _get_record(self, ipnum):
320 """ 321 Populate location dict for converted IP. 322 323 @param ipnum: Converted IP address 324 @type ipnum: int 325 @return: dict with country_code, country_code3, country_name, 326 region, city, postal_code, latitude, longitude, 327 dma_code, metro_code, area_code, region_name, time_zone 328 @rtype: dict 329 """ 330 seek_country = self._seek_country(ipnum) 331 if seek_country == self._databaseSegments: 332 return {} 333 334 read_length = (2 * self._recordLength - 1) * self._databaseSegments 335 self._lock.acquire() 336 self._filehandle.seek(seek_country + read_length, os.SEEK_SET) 337 buf = self._filehandle.read(const.FULL_RECORD_LENGTH) 338 self._lock.release() 339 340 if PY3 and type(buf) is bytes: 341 buf = buf.decode(ENCODING) 342 343 record = { 344 'dma_code': 0, 345 'area_code': 0, 346 'metro_code': '', 347 'postal_code': '' 348 } 349 350 latitude = 0 351 longitude = 0 352 buf_pos = 0 353 354 # Get country 355 char = ord(buf[buf_pos]) 356 record['country_code'] = const.COUNTRY_CODES[char] 357 record['country_code3'] = const.COUNTRY_CODES3[char] 358 record['country_name'] = const.COUNTRY_NAMES[char] 359 record['continent'] = const.CONTINENT_NAMES[char] 360 361 buf_pos += 1 362 def get_data(buf, buf_pos): 363 offset = buf_pos 364 char = ord(buf[offset]) 365 while (char != 0): 366 offset += 1 367 char = ord(buf[offset]) 368 if offset > buf_pos: 369 return (offset, buf[buf_pos:offset]) 370 return (offset, '')
371 372 offset, record['region_name'] = get_data(buf, buf_pos) 373 offset, record['city'] = get_data(buf, offset + 1) 374 offset, record['postal_code'] = get_data(buf, offset + 1) 375 buf_pos = offset + 1 376 377 for j in range(3): 378 char = ord(buf[buf_pos]) 379 buf_pos += 1 380 latitude += (char << (j * 8)) 381 382 for j in range(3): 383 char = ord(buf[buf_pos]) 384 buf_pos += 1 385 longitude += (char << (j * 8)) 386 387 record['latitude'] = (latitude / 10000.0) - 180.0 388 record['longitude'] = (longitude / 10000.0) - 180.0 389 390 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6): 391 dmaarea_combo = 0 392 if record['country_code'] == 'US': 393 for j in range(3): 394 char = ord(buf[buf_pos]) 395 dmaarea_combo += (char << (j * 8)) 396 buf_pos += 1 397 398 record['dma_code'] = int(math.floor(dmaarea_combo / 1000)) 399 record['area_code'] = dmaarea_combo % 1000 400 401 record['metro_code'] = const.DMA_MAP.get(record['dma_code']) 402 params = (record['country_code'], record['region_name']) 403 record['time_zone'] = time_zone_by_country_and_region(*params) 404 405 return record 406
407 - def _gethostbyname(self, hostname):
408 if self._databaseType in const.IPV6_EDITIONS: 409 try: 410 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) 411 family, socktype, proto, canonname, sockaddr = response[0] 412 address, port, flow, scope = sockaddr 413 return address 414 except socket.gaierror: 415 return '' 416 else: 417 return socket.gethostbyname(hostname)
418
419 - def id_by_addr(self, addr):
420 """ 421 Get the country index. 422 Looks up the index for the country which is the key for 423 the code and name. 424 425 @param addr: The IP address 426 @type addr: str 427 @return: network byte order 32-bit integer 428 @rtype: int 429 """ 430 ipnum = util.ip2long(addr) 431 if not ipnum: 432 raise ValueError("Invalid IP address: %s" % addr) 433 434 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 435 if self._databaseType not in COUNTY_EDITIONS: 436 message = 'Invalid database type, expected Country' 437 raise GeoIPError(message) 438 439 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
440
441 - def country_code_by_addr(self, addr):
442 """ 443 Returns 2-letter country code (e.g. 'US') for specified IP address. 444 Use this method if you have a Country, Region, or City database. 445 446 @param addr: IP address 447 @type addr: str 448 @return: 2-letter country code 449 @rtype: str 450 """ 451 try: 452 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 453 if self._databaseType in VALID_EDITIONS: 454 ipv = 6 if addr.find(':') >= 0 else 4 455 456 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: 457 message = 'Invalid database type; expected IPv6 address' 458 raise ValueError(message) 459 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: 460 message = 'Invalid database type; expected IPv4 address' 461 raise ValueError(message) 462 463 country_id = self.id_by_addr(addr) 464 return const.COUNTRY_CODES[country_id] 465 elif self._databaseType in const.REGION_CITY_EDITIONS: 466 return self.region_by_addr(addr).get('country_code') 467 468 message = 'Invalid database type, expected Country, City or Region' 469 raise GeoIPError(message) 470 except ValueError: 471 raise GeoIPError('Failed to lookup address %s' % addr)
472
473 - def country_code_by_name(self, hostname):
474 """ 475 Returns 2-letter country code (e.g. 'US') for specified hostname. 476 Use this method if you have a Country, Region, or City database. 477 478 @param hostname: Hostname 479 @type hostname: str 480 @return: 2-letter country code 481 @rtype: str 482 """ 483 addr = self._gethostbyname(hostname) 484 return self.country_code_by_addr(addr)
485
486 - def country_name_by_addr(self, addr):
487 """ 488 Returns full country name for specified IP address. 489 Use this method if you have a Country or City database. 490 491 @param addr: IP address 492 @type addr: str 493 @return: country name 494 @rtype: str 495 """ 496 try: 497 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 498 if self._databaseType in VALID_EDITIONS: 499 country_id = self.id_by_addr(addr) 500 return const.COUNTRY_NAMES[country_id] 501 elif self._databaseType in const.CITY_EDITIONS: 502 return self.record_by_addr(addr).get('country_name') 503 else: 504 message = 'Invalid database type, expected Country or City' 505 raise GeoIPError(message) 506 except ValueError: 507 raise GeoIPError('Failed to lookup address %s' % addr)
508
509 - def country_name_by_name(self, hostname):
510 """ 511 Returns full country name for specified hostname. 512 Use this method if you have a Country database. 513 514 @param hostname: Hostname 515 @type hostname: str 516 @return: country name 517 @rtype: str 518 """ 519 addr = self._gethostbyname(hostname) 520 return self.country_name_by_addr(addr)
521
522 - def org_by_addr(self, addr):
523 """ 524 Lookup Organization, ISP or ASNum for given IP address. 525 Use this method if you have an Organization, ISP or ASNum database. 526 527 @param addr: IP address 528 @type addr: str 529 @return: organization or ISP name 530 @rtype: str 531 """ 532 try: 533 ipnum = util.ip2long(addr) 534 if not ipnum: 535 raise ValueError('Invalid IP address') 536 537 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6) 538 if self._databaseType not in valid: 539 message = 'Invalid database type, expected Org, ISP or ASNum' 540 raise GeoIPError(message) 541 542 return self._get_org(ipnum) 543 except ValueError: 544 raise GeoIPError('Failed to lookup address %s' % addr)
545
546 - def org_by_name(self, hostname):
547 """ 548 Lookup the organization (or ISP) for hostname. 549 Use this method if you have an Organization/ISP database. 550 551 @param hostname: Hostname 552 @type hostname: str 553 @return: Organization or ISP name 554 @rtype: str 555 """ 556 addr = self._gethostbyname(hostname) 557 return self.org_by_addr(addr)
558
559 - def record_by_addr(self, addr):
560 """ 561 Look up the record for a given IP address. 562 Use this method if you have a City database. 563 564 @param addr: IP address 565 @type addr: str 566 @return: Dictionary with country_code, country_code3, country_name, 567 region, city, postal_code, latitude, longitude, dma_code, 568 metro_code, area_code, region_name, time_zone 569 @rtype: dict 570 """ 571 try: 572 ipnum = util.ip2long(addr) 573 if not ipnum: 574 raise ValueError('Invalid IP address') 575 576 if self._databaseType not in const.CITY_EDITIONS: 577 message = 'Invalid database type, expected City' 578 raise GeoIPError(message) 579 580 rec = self._get_record(ipnum) 581 if not rec: 582 return None 583 584 return rec 585 except ValueError: 586 raise GeoIPError('Failed to lookup address %s' % addr)
587
588 - def record_by_name(self, hostname):
589 """ 590 Look up the record for a given hostname. 591 Use this method if you have a City database. 592 593 @param hostname: Hostname 594 @type hostname: str 595 @return: Dictionary with country_code, country_code3, country_name, 596 region, city, postal_code, latitude, longitude, dma_code, 597 metro_code, area_code, region_name, time_zone 598 @rtype: dict 599 """ 600 addr = self._gethostbyname(hostname) 601 return self.record_by_addr(addr)
602
603 - def region_by_addr(self, addr):
604 """ 605 Lookup the region for given IP address. 606 Use this method if you have a Region database. 607 608 @param addr: IP address 609 @type addr: str 610 @return: Dictionary containing country_code, region and region_name 611 @rtype: dict 612 """ 613 try: 614 ipnum = util.ip2long(addr) 615 if not ipnum: 616 raise ValueError('Invalid IP address') 617 618 if self._databaseType not in const.REGION_CITY_EDITIONS: 619 message = 'Invalid database type, expected Region or City' 620 raise GeoIPError(message) 621 622 return self._get_region(ipnum) 623 except ValueError: 624 raise GeoIPError('Failed to lookup address %s' % addr)
625
626 - def region_by_name(self, hostname):
627 """ 628 Lookup the region for given hostname. 629 Use this method if you have a Region database. 630 631 @param hostname: Hostname 632 @type hostname: str 633 @return: Dictionary containing country_code, region, and region_name 634 @rtype: dict 635 """ 636 addr = self._gethostbyname(hostname) 637 return self.region_by_addr(addr)
638
639 - def time_zone_by_addr(self, addr):
640 """ 641 Look up the time zone for a given IP address. 642 Use this method if you have a Region or City database. 643 644 @param addr: IP address 645 @type addr: str 646 @return: Time zone 647 @rtype: str 648 """ 649 try: 650 ipnum = util.ip2long(addr) 651 if not ipnum: 652 raise ValueError('Invalid IP address') 653 654 if self._databaseType not in const.CITY_EDITIONS: 655 message = 'Invalid database type, expected City' 656 raise GeoIPError(message) 657 658 return self._get_record(ipnum).get('time_zone') 659 except ValueError: 660 raise GeoIPError('Failed to lookup address %s' % addr)
661
662 - def time_zone_by_name(self, hostname):
663 """ 664 Look up the time zone for a given hostname. 665 Use this method if you have a Region or City database. 666 667 @param hostname: Hostname 668 @type hostname: str 669 @return: Time zone 670 @rtype: str 671 """ 672 addr = self._gethostbyname(hostname) 673 return self.time_zone_by_addr(addr)
674