Package pygeoip
[frames] | no frames]

Source Code for Package pygeoip

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Pure Python GeoIP API 
  4   
  5  @author: Jennifer Ennis <zaylea@gmail.com> 
  6  @author: William Tisäter <william@defunct.cc> 
  7   
  8  @license: Copyright(C) 2004 MaxMind LLC 
  9   
 10  This program is free software: you can redistribute it and/or modify 
 11  it under the terms of the GNU Lesser General Public License as published by 
 12  the Free Software Foundation, either version 3 of the License, or 
 13  (at your option) any later version. 
 14   
 15  This program is distributed in the hope that it will be useful, 
 16  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  GNU General Public License for more details. 
 19   
 20  You should have received a copy of the GNU Lesser General Public License 
 21  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 22  """ 
 23   
 24  import os 
 25  import socket 
 26  import codecs 
 27  from math import floor 
 28  from threading import Lock 
 29   
 30  try: 
 31      import mmap 
 32  except ImportError:  # pragma: no cover 
 33      mmap = None 
 34   
 35  try: 
 36      from StringIO import StringIO 
 37      range = xrange  # Use xrange for Python 2 
 38  except ImportError: 
 39      from io import StringIO, BytesIO 
 40   
 41  from pygeoip import util, const 
 42  from pygeoip.const import PY2, PY3 
 43  from pygeoip.timezone import time_zone_by_country_and_region 
 44   
 45   
 46  STANDARD = const.STANDARD 
 47  MMAP_CACHE = const.MMAP_CACHE 
 48  MEMORY_CACHE = const.MEMORY_CACHE 
 49   
 50  ENCODING = const.ENCODING 
51 52 53 -class GeoIPError(Exception):
54 pass
55
56 57 -class _GeoIPMetaclass(type):
58 _instances = {} 59 _instance_lock = Lock() 60
61 - def __call__(cls, *args, **kwargs):
62 """ Singleton method to gets an instance without reparsing 63 the database, the filename is being used as cache key. 64 """ 65 if len(args) > 0: 66 filename = args[0] 67 elif 'filename' in kwargs: 68 filename = kwargs['filename'] 69 else: 70 return None 71 72 if not kwargs.get('cache', True): 73 return super(_GeoIPMetaclass, cls).__call__(*args, **kwargs) 74 75 cls._instance_lock.acquire() 76 if filename not in cls._instances: 77 cls._instances[filename] = super(_GeoIPMetaclass, cls).__call__(*args, **kwargs) 78 cls._instance_lock.release() 79 80 return cls._instances[filename]
81
82 83 -class GeoIP(object):
84 __metaclass__ = _GeoIPMetaclass 85
86 - def __init__(self, filename, flags=0, cache=True):
87 """ 88 Initialize the class. 89 90 @param filename: Path to a geoip database. 91 @type filename: str 92 @param flags: Flags that affect how the database is processed. 93 Currently supported flags are STANDARD (the default), 94 MEMORY_CACHE (preload the whole file into memory) and 95 MMAP_CACHE (access the file via mmap). 96 @type flags: int 97 @param cache: Used in tests to skip instance caching 98 @type cache: bool 99 """ 100 self._flags = flags 101 self._netmask = None 102 103 if self._flags & const.MMAP_CACHE and mmap is None: # pragma: no cover 104 import warnings 105 warnings.warn("MMAP_CACHE cannot be used without a mmap module") 106 self._flags &= ~const.MMAP_CACHE 107 108 if self._flags & const.MMAP_CACHE: 109 f = codecs.open(filename, 'rb', ENCODING) 110 access = mmap.ACCESS_READ 111 self._fp = mmap.mmap(f.fileno(), 0, access=access) 112 self._type = 'MMAP_CACHE' 113 f.close() 114 elif self._flags & const.MEMORY_CACHE: 115 f = codecs.open(filename, 'rb', ENCODING) 116 self._memory = f.read() 117 self._fp = self._str_to_fp(self._memory) 118 self._type = 'MEMORY_CACHE' 119 f.close() 120 else: 121 self._fp = codecs.open(filename, 'rb', ENCODING) 122 self._type = 'STANDARD' 123 124 self._lock = Lock() 125 self._setup_segments()
126 127 @classmethod
128 - def _str_to_fp(cls, data):
129 """ 130 Convert bytes data to file handle object 131 132 @param data: string data 133 @type data: str 134 @return: file handle object 135 @rtype: StringIO or BytesIO 136 """ 137 return BytesIO(bytearray(data, ENCODING)) if PY3 else StringIO(data)
138
139 - def _setup_segments(self):
140 """ 141 Parses the database file to determine what kind of database is 142 being used and setup segment sizes and start points that will 143 be used by the seek*() methods later. 144 145 Supported databases: 146 147 * COUNTRY_EDITION 148 * COUNTRY_EDITION_V6 149 * REGION_EDITION_REV0 150 * REGION_EDITION_REV1 151 * CITY_EDITION_REV0 152 * CITY_EDITION_REV1 153 * CITY_EDITION_REV1_V6 154 * ORG_EDITION 155 * ISP_EDITION 156 * ASNUM_EDITION 157 * ASNUM_EDITION_V6 158 159 """ 160 self._databaseType = const.COUNTRY_EDITION 161 self._recordLength = const.STANDARD_RECORD_LENGTH 162 self._databaseSegments = const.COUNTRY_BEGIN 163 164 self._lock.acquire() 165 filepos = self._fp.tell() 166 self._fp.seek(-3, os.SEEK_END) 167 168 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 169 chars = chr(255) * 3 170 delim = self._fp.read(3) 171 172 if PY3 and type(delim) is bytes: 173 delim = delim.decode(ENCODING) 174 175 if PY2: 176 chars = chars.decode(ENCODING) 177 if type(delim) is str: 178 delim = delim.decode(ENCODING) 179 180 if delim == chars: 181 byte = self._fp.read(1) 182 self._databaseType = ord(byte) 183 184 # Compatibility with databases from April 2003 and earlier 185 if self._databaseType >= 106: 186 self._databaseType -= 105 187 188 if self._databaseType == const.REGION_EDITION_REV0: 189 self._databaseSegments = const.STATE_BEGIN_REV0 190 191 elif self._databaseType == const.REGION_EDITION_REV1: 192 self._databaseSegments = const.STATE_BEGIN_REV1 193 194 elif self._databaseType in (const.CITY_EDITION_REV0, 195 const.CITY_EDITION_REV1, 196 const.CITY_EDITION_REV1_V6, 197 const.ORG_EDITION, 198 const.ISP_EDITION, 199 const.ASNUM_EDITION, 200 const.ASNUM_EDITION_V6): 201 self._databaseSegments = 0 202 buf = self._fp.read(const.SEGMENT_RECORD_LENGTH) 203 204 if PY3 and type(buf) is bytes: 205 buf = buf.decode(ENCODING) 206 207 for j in range(const.SEGMENT_RECORD_LENGTH): 208 self._databaseSegments += (ord(buf[j]) << (j * 8)) 209 210 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) 211 if self._databaseType in LONG_RECORDS: 212 self._recordLength = const.ORG_RECORD_LENGTH 213 break 214 else: 215 self._fp.seek(-4, os.SEEK_CUR) 216 217 self._fp.seek(filepos, os.SEEK_SET) 218 self._lock.release()
219
220 - def _seek_country(self, ipnum):
221 """ 222 Using the record length and appropriate start points, seek to the 223 country that corresponds to the converted IP address integer. 224 225 @param ipnum: result of ip2long conversion 226 @type ipnum: int 227 @return: offset of start of record 228 @rtype: int 229 """ 230 try: 231 offset = 0 232 seek_depth = 127 if len(str(ipnum)) > 10 else 31 233 234 for depth in range(seek_depth, -1, -1): 235 if self._flags & const.MEMORY_CACHE: 236 startIndex = 2 * self._recordLength * offset 237 endIndex = startIndex + (2 * self._recordLength) 238 buf = self._memory[startIndex:endIndex] 239 else: 240 startIndex = 2 * self._recordLength * offset 241 readLength = 2 * self._recordLength 242 self._lock.acquire() 243 self._fp.seek(startIndex, os.SEEK_SET) 244 buf = self._fp.read(readLength) 245 self._lock.release() 246 247 if PY3 and type(buf) is bytes: 248 buf = buf.decode(ENCODING) 249 250 x = [0, 0] 251 for i in range(2): 252 for j in range(self._recordLength): 253 byte = buf[self._recordLength * i + j] 254 x[i] += ord(byte) << (j * 8) 255 if ipnum & (1 << depth): 256 if x[1] >= self._databaseSegments: 257 self._netmask = seek_depth - depth + 1 258 return x[1] 259 offset = x[1] 260 else: 261 if x[0] >= self._databaseSegments: 262 self._netmask = seek_depth - depth + 1 263 return x[0] 264 offset = x[0] 265 except (IndexError, UnicodeDecodeError): 266 pass 267 268 raise GeoIPError('Corrupt database')
269
270 - def _get_org(self, ipnum):
271 """ 272 Seek and return organization or ISP name for ipnum. 273 @param ipnum: Converted IP address 274 @type ipnum: int 275 @return: org/isp name 276 @rtype: str 277 """ 278 seek_org = self._seek_country(ipnum) 279 if seek_org == self._databaseSegments: 280 return None 281 282 read_length = (2 * self._recordLength - 1) * self._databaseSegments 283 self._lock.acquire() 284 self._fp.seek(seek_org + read_length, os.SEEK_SET) 285 buf = self._fp.read(const.MAX_ORG_RECORD_LENGTH) 286 self._lock.release() 287 288 if PY3 and type(buf) is bytes: 289 buf = buf.decode(ENCODING) 290 291 return buf[:buf.index(chr(0))]
292
293 - def _get_region(self, ipnum):
294 """ 295 Seek and return the region information. 296 297 @param ipnum: Converted IP address 298 @type ipnum: int 299 @return: dict containing country_code and region_code 300 @rtype: dict 301 """ 302 region_code = None 303 country_code = None 304 seek_country = self._seek_country(ipnum) 305 306 def get_region_code(offset): 307 region1 = chr(offset // 26 + 65) 308 region2 = chr(offset % 26 + 65) 309 return ''.join([region1, region2])
310 311 if self._databaseType == const.REGION_EDITION_REV0: 312 seek_region = seek_country - const.STATE_BEGIN_REV0 313 if seek_region >= 1000: 314 country_code = 'US' 315 region_code = get_region_code(seek_region - 1000) 316 else: 317 country_code = const.COUNTRY_CODES[seek_region] 318 elif self._databaseType == const.REGION_EDITION_REV1: 319 seek_region = seek_country - const.STATE_BEGIN_REV1 320 if seek_region < const.US_OFFSET: 321 pass 322 elif seek_region < const.CANADA_OFFSET: 323 country_code = 'US' 324 region_code = get_region_code(seek_region - const.US_OFFSET) 325 elif seek_region < const.WORLD_OFFSET: 326 country_code = 'CA' 327 region_code = get_region_code(seek_region - const.CANADA_OFFSET) 328 else: 329 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 330 if index in const.COUNTRY_CODES: 331 country_code = const.COUNTRY_CODES[index] 332 elif self._databaseType in const.CITY_EDITIONS: 333 rec = self._get_record(ipnum) 334 region_code = rec.get('region_code') 335 country_code = rec.get('country_code') 336 337 return {'country_code': country_code, 'region_code': region_code}
338
339 - def _get_record(self, ipnum):
340 """ 341 Populate location dict for converted IP. 342 343 @param ipnum: Converted IP address 344 @type ipnum: int 345 @return: dict with city, region_code, area_code, time_zone, 346 dma_code, metro_code, country_code3, latitude, postal_code, 347 longitude, country_code, country_name, continent 348 @rtype: dict 349 """ 350 seek_country = self._seek_country(ipnum) 351 if seek_country == self._databaseSegments: 352 return {} 353 354 read_length = (2 * self._recordLength - 1) * self._databaseSegments 355 self._lock.acquire() 356 self._fp.seek(seek_country + read_length, os.SEEK_SET) 357 buf = self._fp.read(const.FULL_RECORD_LENGTH) 358 self._lock.release() 359 360 if PY3 and type(buf) is bytes: 361 buf = buf.decode(ENCODING) 362 363 record = { 364 'dma_code': 0, 365 'area_code': 0, 366 'metro_code': None, 367 'postal_code': None 368 } 369 370 latitude = 0 371 longitude = 0 372 373 char = ord(buf[0]) 374 record['country_code'] = const.COUNTRY_CODES[char] 375 record['country_code3'] = const.COUNTRY_CODES3[char] 376 record['country_name'] = const.COUNTRY_NAMES[char] 377 record['continent'] = const.CONTINENT_NAMES[char] 378 379 def read_data(buf, pos): 380 cur = pos 381 while buf[cur] != '\0': 382 cur += 1 383 return cur, buf[pos:cur] if cur > pos else None
384 385 offset, record['region_code'] = read_data(buf, 1) 386 offset, record['city'] = read_data(buf, offset + 1) 387 offset, record['postal_code'] = read_data(buf, offset + 1) 388 offset = offset + 1 389 390 for j in range(3): 391 latitude += (ord(buf[offset + j]) << (j * 8)) 392 393 for j in range(3): 394 longitude += (ord(buf[offset + j + 3]) << (j * 8)) 395 396 record['latitude'] = (latitude / 10000.0) - 180.0 397 record['longitude'] = (longitude / 10000.0) - 180.0 398 399 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6): 400 if record['country_code'] == 'US': 401 dma_area = 0 402 for j in range(3): 403 dma_area += ord(buf[offset + j + 6]) << (j * 8) 404 405 record['dma_code'] = int(floor(dma_area / 1000)) 406 record['area_code'] = dma_area % 1000 407 record['metro_code'] = const.DMA_MAP.get(record['dma_code']) 408 409 params = (record['country_code'], record['region_code']) 410 record['time_zone'] = time_zone_by_country_and_region(*params) 411 412 return record 413
414 - def _gethostbyname(self, hostname):
415 if self._databaseType in const.IPV6_EDITIONS: 416 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) 417 family, socktype, proto, canonname, sockaddr = response[0] 418 address, port, flow, scope = sockaddr 419 return address 420 else: 421 return socket.gethostbyname(hostname)
422
423 - def _id_by_addr(self, addr):
424 """ 425 Looks up the index for the country which is the key for the 426 code and name. 427 428 @param addr: IPv4 or IPv6 address 429 @type addr: str 430 @return: network byte order 32-bit integer 431 @rtype: int 432 """ 433 ipv = 6 if addr.find(':') >= 0 else 4 434 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: 435 raise GeoIPError('Invalid database type; expected IPv6 address') 436 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: 437 raise GeoIPError('Invalid database type; expected IPv4 address') 438 439 ipnum = util.ip2long(addr) 440 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
441
442 - def last_netmask(self):
443 """ 444 Return the netmask depth of the last lookup. 445 446 @return: network depth 447 @rtype: int 448 """ 449 return self._netmask
450
451 - def country_code_by_addr(self, addr):
452 """ 453 Returns 2-letter country code (e.g. 'US') for specified IP address. 454 Use this method if you have a Country, Region, or City database. 455 456 @param addr: IP address 457 @type addr: str 458 @return: 2-letter country code 459 @rtype: str 460 """ 461 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 462 if self._databaseType in VALID_EDITIONS: 463 country_id = self._id_by_addr(addr) 464 return const.COUNTRY_CODES[country_id] 465 elif self._databaseType in const.REGION_CITY_EDITIONS: 466 return self.region_by_addr(addr).get('country_code') 467 468 raise GeoIPError('Invalid database type, expected Country, City or Region')
469
470 - def country_code_by_name(self, hostname):
471 """ 472 Returns 2-letter country code (e.g. 'US') for specified hostname. 473 Use this method if you have a Country, Region, or City database. 474 475 @param hostname: Hostname 476 @type hostname: str 477 @return: 2-letter country code 478 @rtype: str 479 """ 480 addr = self._gethostbyname(hostname) 481 return self.country_code_by_addr(addr)
482
483 - def country_name_by_addr(self, addr):
484 """ 485 Returns full country name for specified IP address. 486 Use this method if you have a Country or City database. 487 488 @param addr: IP address 489 @type addr: str 490 @return: country name 491 @rtype: str 492 """ 493 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 494 if self._databaseType in VALID_EDITIONS: 495 country_id = self._id_by_addr(addr) 496 return const.COUNTRY_NAMES[country_id] 497 elif self._databaseType in const.CITY_EDITIONS: 498 return self.record_by_addr(addr).get('country_name') 499 else: 500 message = 'Invalid database type, expected Country or City' 501 raise GeoIPError(message)
502
503 - def country_name_by_name(self, hostname):
504 """ 505 Returns full country name for specified hostname. 506 Use this method if you have a Country database. 507 508 @param hostname: Hostname 509 @type hostname: str 510 @return: country name 511 @rtype: str 512 """ 513 addr = self._gethostbyname(hostname) 514 return self.country_name_by_addr(addr)
515
516 - def org_by_addr(self, addr):
517 """ 518 Lookup Organization, ISP or ASNum for given IP address. 519 Use this method if you have an Organization, ISP or ASNum database. 520 521 @param addr: IP address 522 @type addr: str 523 @return: organization or ISP name 524 @rtype: str 525 """ 526 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6) 527 if self._databaseType not in valid: 528 message = 'Invalid database type, expected Org, ISP or ASNum' 529 raise GeoIPError(message) 530 531 ipnum = util.ip2long(addr) 532 return self._get_org(ipnum)
533
534 - def org_by_name(self, hostname):
535 """ 536 Lookup the organization (or ISP) for hostname. 537 Use this method if you have an Organization/ISP database. 538 539 @param hostname: Hostname 540 @type hostname: str 541 @return: Organization or ISP name 542 @rtype: str 543 """ 544 addr = self._gethostbyname(hostname) 545 return self.org_by_addr(addr)
546
547 - def record_by_addr(self, addr):
548 """ 549 Look up the record for a given IP address. 550 Use this method if you have a City database. 551 552 @param addr: IP address 553 @type addr: str 554 @return: Dictionary with country_code, country_code3, country_name, 555 region, city, postal_code, latitude, longitude, dma_code, 556 metro_code, area_code, region_code, time_zone 557 @rtype: dict 558 """ 559 if self._databaseType not in const.CITY_EDITIONS: 560 message = 'Invalid database type, expected City' 561 raise GeoIPError(message) 562 563 ipnum = util.ip2long(addr) 564 rec = self._get_record(ipnum) 565 if not rec: 566 return None 567 568 return rec
569
570 - def record_by_name(self, hostname):
571 """ 572 Look up the record for a given hostname. 573 Use this method if you have a City database. 574 575 @param hostname: Hostname 576 @type hostname: str 577 @return: Dictionary with country_code, country_code3, country_name, 578 region, city, postal_code, latitude, longitude, dma_code, 579 metro_code, area_code, region_code, time_zone 580 @rtype: dict 581 """ 582 addr = self._gethostbyname(hostname) 583 return self.record_by_addr(addr)
584
585 - def region_by_addr(self, addr):
586 """ 587 Lookup the region for given IP address. 588 Use this method if you have a Region database. 589 590 @param addr: IP address 591 @type addr: str 592 @return: Dictionary containing country_code and region_code 593 @rtype: dict 594 """ 595 if self._databaseType not in const.REGION_CITY_EDITIONS: 596 message = 'Invalid database type, expected Region or City' 597 raise GeoIPError(message) 598 599 ipnum = util.ip2long(addr) 600 return self._get_region(ipnum)
601
602 - def region_by_name(self, hostname):
603 """ 604 Lookup the region for given hostname. 605 Use this method if you have a Region database. 606 607 @param hostname: Hostname 608 @type hostname: str 609 @return: Dictionary containing country_code, region_code and region 610 @rtype: dict 611 """ 612 addr = self._gethostbyname(hostname) 613 return self.region_by_addr(addr)
614
615 - def time_zone_by_addr(self, addr):
616 """ 617 Look up the time zone for a given IP address. 618 Use this method if you have a Region or City database. 619 620 @param addr: IP address 621 @type addr: str 622 @return: Time zone 623 @rtype: str 624 """ 625 if self._databaseType not in const.CITY_EDITIONS: 626 message = 'Invalid database type, expected City' 627 raise GeoIPError(message) 628 629 ipnum = util.ip2long(addr) 630 return self._get_record(ipnum).get('time_zone')
631
632 - def time_zone_by_name(self, hostname):
633 """ 634 Look up the time zone for a given hostname. 635 Use this method if you have a Region or City database. 636 637 @param hostname: Hostname 638 @type hostname: str 639 @return: Time zone 640 @rtype: str 641 """ 642 addr = self._gethostbyname(hostname) 643 return self.time_zone_by_addr(addr)
644