Package pygeoip
[frames] | no frames]

Source Code for Package pygeoip

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Pure Python GeoIP API 
  4   
  5  @author: Jennifer Ennis <zaylea@gmail.com> 
  6  @author: William Tisäter <william@defunct.cc> 
  7   
  8  @license: Copyright(C) 2004 MaxMind LLC 
  9   
 10  This program is free software: you can redistribute it and/or modify 
 11  it under the terms of the GNU Lesser General Public License as published by 
 12  the Free Software Foundation, either version 3 of the License, or 
 13  (at your option) any later version. 
 14   
 15  This program is distributed in the hope that it will be useful, 
 16  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  GNU General Public License for more details. 
 19   
 20  You should have received a copy of the GNU Lesser General Public License 
 21  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 22  """ 
 23   
 24  import os 
 25  import math 
 26  import socket 
 27  import codecs 
 28  from threading import Lock 
 29   
 30  try: 
 31      import mmap 
 32  except ImportError: 
 33      mmap = None 
 34   
 35  try: 
 36      from StringIO import StringIO 
 37  except ImportError: 
 38      from io import StringIO, BytesIO 
 39   
 40  from pygeoip import util, const 
 41  from pygeoip.const import PY2, PY3 
 42  from pygeoip.timezone import time_zone_by_country_and_region 
 43   
 44   
 45  STANDARD = const.STANDARD 
 46  MMAP_CACHE = const.MMAP_CACHE 
 47  MEMORY_CACHE = const.MEMORY_CACHE 
 48   
 49  ENCODING = const.ENCODING 
50 51 52 -class GeoIPError(Exception):
53 pass
54
55 56 -class _GeoIPMetaclass(type):
57 _instances = {} 58 _instance_lock = Lock() 59
60 - def __call__(cls, *args, **kwargs):
61 """ Singleton method to gets an instance without reparsing 62 the database, the filename is being used as cache key. 63 """ 64 if len(args) > 0: 65 filename = args[0] 66 elif 'filename' in kwargs: 67 filename = kwargs['filename'] 68 else: 69 return None 70 71 if not kwargs.get('cache', True): 72 return super(_GeoIPMetaclass, cls).__call__(*args, **kwargs) 73 74 cls._instance_lock.acquire() 75 if filename not in cls._instances: 76 cls._instances[filename] = super(_GeoIPMetaclass, cls).__call__(*args, **kwargs) 77 cls._instance_lock.release() 78 79 return cls._instances[filename]
80
81 82 -class GeoIP(object):
83 __metaclass__ = _GeoIPMetaclass 84
85 - def __init__(self, filename, flags=0, cache=True):
86 """ 87 Initialize the class. 88 89 @param filename: Path to a geoip database. 90 @type filename: str 91 @param flags: Flags that affect how the database is processed. 92 Currently supported flags are STANDARD (the default), 93 MEMORY_CACHE (preload the whole file into memory) and 94 MMAP_CACHE (access the file via mmap). 95 @type flags: int 96 @param cache: Used in tests to skip instance caching 97 @type cache: bool 98 """ 99 self._flags = flags 100 101 if self._flags & const.MMAP_CACHE and mmap is None: 102 import warnings 103 warnings.warn("MMAP_CACHE cannot be used without a mmap module") 104 self._flags &= ~const.MMAP_CACHE 105 106 if self._flags & const.MMAP_CACHE: 107 f = codecs.open(filename, 'rb', ENCODING) 108 access = mmap.ACCESS_READ 109 self._fp = mmap.mmap(f.fileno(), 0, access=access) 110 self._type = 'MMAP_CACHE' 111 f.close() 112 elif self._flags & const.MEMORY_CACHE: 113 f = codecs.open(filename, 'rb', ENCODING) 114 self._memory = f.read() 115 self._fp = self._str_to_fp(self._memory) 116 self._type = 'MEMORY_CACHE' 117 f.close() 118 else: 119 self._fp = codecs.open(filename, 'rb', ENCODING) 120 self._type = 'STANDARD' 121 122 self._lock = Lock() 123 self._setup_segments()
124 125 @classmethod
126 - def _str_to_fp(cls, data):
127 """ 128 Convert bytes data to file handle object 129 130 @param data: string data 131 @type data: str 132 @return: file handle object 133 @rtype: StringIO or BytesIO 134 """ 135 return BytesIO(bytearray(data, ENCODING)) if PY3 else StringIO(data)
136
137 - def _setup_segments(self):
138 """ 139 Parses the database file to determine what kind of database is 140 being used and setup segment sizes and start points that will 141 be used by the seek*() methods later. 142 143 Supported databases: 144 145 * COUNTRY_EDITION 146 * COUNTRY_EDITION_V6 147 * REGION_EDITION_REV0 148 * REGION_EDITION_REV1 149 * CITY_EDITION_REV0 150 * CITY_EDITION_REV1 151 * CITY_EDITION_REV1_V6 152 * ORG_EDITION 153 * ISP_EDITION 154 * ASNUM_EDITION 155 * ASNUM_EDITION_V6 156 157 """ 158 self._databaseType = const.COUNTRY_EDITION 159 self._recordLength = const.STANDARD_RECORD_LENGTH 160 self._databaseSegments = const.COUNTRY_BEGIN 161 162 self._lock.acquire() 163 filepos = self._fp.tell() 164 self._fp.seek(-3, os.SEEK_END) 165 166 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 167 chars = chr(255) * 3 168 delim = self._fp.read(3) 169 170 if PY3 and type(delim) is bytes: 171 delim = delim.decode(ENCODING) 172 173 if PY2: 174 chars = chars.decode(ENCODING) 175 if type(delim) is str: 176 delim = delim.decode(ENCODING) 177 178 if delim == chars: 179 byte = self._fp.read(1) 180 self._databaseType = ord(byte) 181 182 # Compatibility with databases from April 2003 and earlier 183 if self._databaseType >= 106: 184 self._databaseType -= 105 185 186 if self._databaseType == const.REGION_EDITION_REV0: 187 self._databaseSegments = const.STATE_BEGIN_REV0 188 189 elif self._databaseType == const.REGION_EDITION_REV1: 190 self._databaseSegments = const.STATE_BEGIN_REV1 191 192 elif self._databaseType in (const.CITY_EDITION_REV0, 193 const.CITY_EDITION_REV1, 194 const.CITY_EDITION_REV1_V6, 195 const.ORG_EDITION, 196 const.ISP_EDITION, 197 const.ASNUM_EDITION, 198 const.ASNUM_EDITION_V6): 199 self._databaseSegments = 0 200 buf = self._fp.read(const.SEGMENT_RECORD_LENGTH) 201 202 if PY3 and type(buf) is bytes: 203 buf = buf.decode(ENCODING) 204 205 for j in range(const.SEGMENT_RECORD_LENGTH): 206 self._databaseSegments += (ord(buf[j]) << (j * 8)) 207 208 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) 209 if self._databaseType in LONG_RECORDS: 210 self._recordLength = const.ORG_RECORD_LENGTH 211 break 212 else: 213 self._fp.seek(-4, os.SEEK_CUR) 214 215 self._fp.seek(filepos, os.SEEK_SET) 216 self._lock.release()
217
218 - def _seek_country(self, ipnum):
219 """ 220 Using the record length and appropriate start points, seek to the 221 country that corresponds to the converted IP address integer. 222 223 @param ipnum: result of ip2long conversion 224 @type ipnum: int 225 @return: offset of start of record 226 @rtype: int 227 """ 228 try: 229 offset = 0 230 seek_depth = 127 if len(str(ipnum)) > 10 else 31 231 232 for depth in range(seek_depth, -1, -1): 233 if self._flags & const.MEMORY_CACHE: 234 startIndex = 2 * self._recordLength * offset 235 endIndex = startIndex + (2 * self._recordLength) 236 buf = self._memory[startIndex:endIndex] 237 else: 238 startIndex = 2 * self._recordLength * offset 239 readLength = 2 * self._recordLength 240 self._lock.acquire() 241 self._fp.seek(startIndex, os.SEEK_SET) 242 buf = self._fp.read(readLength) 243 self._lock.release() 244 245 if PY3 and type(buf) is bytes: 246 buf = buf.decode(ENCODING) 247 248 x = [0, 0] 249 for i in range(2): 250 for j in range(self._recordLength): 251 byte = buf[self._recordLength * i + j] 252 x[i] += ord(byte) << (j * 8) 253 if ipnum & (1 << depth): 254 if x[1] >= self._databaseSegments: 255 return x[1] 256 offset = x[1] 257 else: 258 if x[0] >= self._databaseSegments: 259 return x[0] 260 offset = x[0] 261 except (IndexError, UnicodeDecodeError): 262 pass 263 264 raise GeoIPError('Corrupt database')
265
266 - def _get_org(self, ipnum):
267 """ 268 Seek and return organization or ISP name for ipnum. 269 @param ipnum: Converted IP address 270 @type ipnum: int 271 @return: org/isp name 272 @rtype: str 273 """ 274 seek_org = self._seek_country(ipnum) 275 if seek_org == self._databaseSegments: 276 return None 277 278 read_length = (2 * self._recordLength - 1) * self._databaseSegments 279 self._lock.acquire() 280 self._fp.seek(seek_org + read_length, os.SEEK_SET) 281 buf = self._fp.read(const.MAX_ORG_RECORD_LENGTH) 282 self._lock.release() 283 284 if PY3 and type(buf) is bytes: 285 buf = buf.decode(ENCODING) 286 287 return buf[:buf.index(chr(0))]
288
289 - def _get_region(self, ipnum):
290 """ 291 Seek and return the region info (dict containing country_code 292 and region_name). 293 294 @param ipnum: Converted IP address 295 @type ipnum: int 296 @return: dict containing country_code and region_name 297 @rtype: dict 298 """ 299 region = '' 300 country_code = '' 301 seek_country = self._seek_country(ipnum) 302 303 def get_region_name(offset): 304 region1 = chr(offset // 26 + 65) 305 region2 = chr(offset % 26 + 65) 306 return ''.join([region1, region2])
307 308 if self._databaseType == const.REGION_EDITION_REV0: 309 seek_region = seek_country - const.STATE_BEGIN_REV0 310 if seek_region >= 1000: 311 country_code = 'US' 312 region = get_region_name(seek_region - 1000) 313 else: 314 country_code = const.COUNTRY_CODES[seek_region] 315 elif self._databaseType == const.REGION_EDITION_REV1: 316 seek_region = seek_country - const.STATE_BEGIN_REV1 317 if seek_region < const.US_OFFSET: 318 pass 319 elif seek_region < const.CANADA_OFFSET: 320 country_code = 'US' 321 region = get_region_name(seek_region - const.US_OFFSET) 322 elif seek_region < const.WORLD_OFFSET: 323 country_code = 'CA' 324 region = get_region_name(seek_region - const.CANADA_OFFSET) 325 else: 326 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 327 if index in const.COUNTRY_CODES: 328 country_code = const.COUNTRY_CODES[index] 329 elif self._databaseType in const.CITY_EDITIONS: 330 rec = self._get_record(ipnum) 331 region = rec.get('region_name', '') 332 country_code = rec.get('country_code', '') 333 334 return {'country_code': country_code, 'region_name': region}
335
336 - def _get_record(self, ipnum):
337 """ 338 Populate location dict for converted IP. 339 340 @param ipnum: Converted IP address 341 @type ipnum: int 342 @return: dict with country_code, country_code3, country_name, 343 region, city, postal_code, latitude, longitude, 344 dma_code, metro_code, area_code, region_name, time_zone 345 @rtype: dict 346 """ 347 seek_country = self._seek_country(ipnum) 348 if seek_country == self._databaseSegments: 349 return {} 350 351 read_length = (2 * self._recordLength - 1) * self._databaseSegments 352 self._lock.acquire() 353 self._fp.seek(seek_country + read_length, os.SEEK_SET) 354 buf = self._fp.read(const.FULL_RECORD_LENGTH) 355 self._lock.release() 356 357 if PY3 and type(buf) is bytes: 358 buf = buf.decode(ENCODING) 359 360 record = { 361 'dma_code': 0, 362 'area_code': 0, 363 'metro_code': '', 364 'postal_code': '' 365 } 366 367 latitude = 0 368 longitude = 0 369 buf_pos = 0 370 371 # Get country 372 char = ord(buf[buf_pos]) 373 record['country_code'] = const.COUNTRY_CODES[char] 374 record['country_code3'] = const.COUNTRY_CODES3[char] 375 record['country_name'] = const.COUNTRY_NAMES[char] 376 record['continent'] = const.CONTINENT_NAMES[char] 377 378 buf_pos += 1 379 380 def get_data(buf, buf_pos): 381 offset = buf_pos 382 char = ord(buf[offset]) 383 while char != 0: 384 offset += 1 385 char = ord(buf[offset]) 386 if offset > buf_pos: 387 return offset, buf[buf_pos:offset] 388 return offset, ''
389 390 offset, record['region_name'] = get_data(buf, buf_pos) 391 offset, record['city'] = get_data(buf, offset + 1) 392 offset, record['postal_code'] = get_data(buf, offset + 1) 393 buf_pos = offset + 1 394 395 for j in range(3): 396 char = ord(buf[buf_pos]) 397 buf_pos += 1 398 latitude += (char << (j * 8)) 399 400 for j in range(3): 401 char = ord(buf[buf_pos]) 402 buf_pos += 1 403 longitude += (char << (j * 8)) 404 405 record['latitude'] = (latitude / 10000.0) - 180.0 406 record['longitude'] = (longitude / 10000.0) - 180.0 407 408 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6): 409 dmaarea_combo = 0 410 if record['country_code'] == 'US': 411 for j in range(3): 412 char = ord(buf[buf_pos]) 413 dmaarea_combo += (char << (j * 8)) 414 buf_pos += 1 415 416 record['dma_code'] = int(math.floor(dmaarea_combo / 1000)) 417 record['area_code'] = dmaarea_combo % 1000 418 419 record['metro_code'] = const.DMA_MAP.get(record['dma_code']) 420 params = (record['country_code'], record['region_name']) 421 record['time_zone'] = time_zone_by_country_and_region(*params) 422 423 return record 424
425 - def _gethostbyname(self, hostname):
426 if self._databaseType in const.IPV6_EDITIONS: 427 try: 428 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) 429 family, socktype, proto, canonname, sockaddr = response[0] 430 address, port, flow, scope = sockaddr 431 return address 432 except socket.gaierror: 433 return '' 434 else: 435 return socket.gethostbyname(hostname)
436
437 - def _id_by_addr(self, addr):
438 """ 439 Looks up the index for the country which is the key for the 440 code and name. 441 442 @param addr: IPv4 or IPv6 address 443 @type addr: str 444 @return: network byte order 32-bit integer 445 @rtype: int 446 """ 447 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 448 if self._databaseType not in COUNTY_EDITIONS: 449 raise GeoIPError('Invalid database type, expected Country') 450 451 ipv = 6 if addr.find(':') >= 0 else 4 452 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: 453 raise GeoIPError('Invalid database type; expected IPv6 address') 454 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: 455 raise GeoIPError('Invalid database type; expected IPv4 address') 456 457 ipnum = util.ip2long(addr) 458 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
459
460 - def country_code_by_addr(self, addr):
461 """ 462 Returns 2-letter country code (e.g. 'US') for specified IP address. 463 Use this method if you have a Country, Region, or City database. 464 465 @param addr: IP address 466 @type addr: str 467 @return: 2-letter country code 468 @rtype: str 469 """ 470 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 471 if self._databaseType in VALID_EDITIONS: 472 country_id = self._id_by_addr(addr) 473 return const.COUNTRY_CODES[country_id] 474 elif self._databaseType in const.REGION_CITY_EDITIONS: 475 return self.region_by_addr(addr).get('country_code') 476 477 raise GeoIPError('Invalid database type, expected Country, City or Region')
478
479 - def country_code_by_name(self, hostname):
480 """ 481 Returns 2-letter country code (e.g. 'US') for specified hostname. 482 Use this method if you have a Country, Region, or City database. 483 484 @param hostname: Hostname 485 @type hostname: str 486 @return: 2-letter country code 487 @rtype: str 488 """ 489 addr = self._gethostbyname(hostname) 490 return self.country_code_by_addr(addr)
491
492 - def country_name_by_addr(self, addr):
493 """ 494 Returns full country name for specified IP address. 495 Use this method if you have a Country or City database. 496 497 @param addr: IP address 498 @type addr: str 499 @return: country name 500 @rtype: str 501 """ 502 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 503 if self._databaseType in VALID_EDITIONS: 504 country_id = self._id_by_addr(addr) 505 return const.COUNTRY_NAMES[country_id] 506 elif self._databaseType in const.CITY_EDITIONS: 507 return self.record_by_addr(addr).get('country_name') 508 else: 509 message = 'Invalid database type, expected Country or City' 510 raise GeoIPError(message)
511
512 - def country_name_by_name(self, hostname):
513 """ 514 Returns full country name for specified hostname. 515 Use this method if you have a Country database. 516 517 @param hostname: Hostname 518 @type hostname: str 519 @return: country name 520 @rtype: str 521 """ 522 addr = self._gethostbyname(hostname) 523 return self.country_name_by_addr(addr)
524
525 - def org_by_addr(self, addr):
526 """ 527 Lookup Organization, ISP or ASNum for given IP address. 528 Use this method if you have an Organization, ISP or ASNum database. 529 530 @param addr: IP address 531 @type addr: str 532 @return: organization or ISP name 533 @rtype: str 534 """ 535 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6) 536 if self._databaseType not in valid: 537 message = 'Invalid database type, expected Org, ISP or ASNum' 538 raise GeoIPError(message) 539 540 ipnum = util.ip2long(addr) 541 return self._get_org(ipnum)
542
543 - def org_by_name(self, hostname):
544 """ 545 Lookup the organization (or ISP) for hostname. 546 Use this method if you have an Organization/ISP database. 547 548 @param hostname: Hostname 549 @type hostname: str 550 @return: Organization or ISP name 551 @rtype: str 552 """ 553 addr = self._gethostbyname(hostname) 554 return self.org_by_addr(addr)
555
556 - def record_by_addr(self, addr):
557 """ 558 Look up the record for a given IP address. 559 Use this method if you have a City database. 560 561 @param addr: IP address 562 @type addr: str 563 @return: Dictionary with country_code, country_code3, country_name, 564 region, city, postal_code, latitude, longitude, dma_code, 565 metro_code, area_code, region_name, time_zone 566 @rtype: dict 567 """ 568 if self._databaseType not in const.CITY_EDITIONS: 569 message = 'Invalid database type, expected City' 570 raise GeoIPError(message) 571 572 ipnum = util.ip2long(addr) 573 rec = self._get_record(ipnum) 574 if not rec: 575 return None 576 577 return rec
578
579 - def record_by_name(self, hostname):
580 """ 581 Look up the record for a given hostname. 582 Use this method if you have a City database. 583 584 @param hostname: Hostname 585 @type hostname: str 586 @return: Dictionary with country_code, country_code3, country_name, 587 region, city, postal_code, latitude, longitude, dma_code, 588 metro_code, area_code, region_name, time_zone 589 @rtype: dict 590 """ 591 addr = self._gethostbyname(hostname) 592 return self.record_by_addr(addr)
593
594 - def region_by_addr(self, addr):
595 """ 596 Lookup the region for given IP address. 597 Use this method if you have a Region database. 598 599 @param addr: IP address 600 @type addr: str 601 @return: Dictionary containing country_code, region and region_name 602 @rtype: dict 603 """ 604 if self._databaseType not in const.REGION_CITY_EDITIONS: 605 message = 'Invalid database type, expected Region or City' 606 raise GeoIPError(message) 607 608 ipnum = util.ip2long(addr) 609 return self._get_region(ipnum)
610
611 - def region_by_name(self, hostname):
612 """ 613 Lookup the region for given hostname. 614 Use this method if you have a Region database. 615 616 @param hostname: Hostname 617 @type hostname: str 618 @return: Dictionary containing country_code, region, and region_name 619 @rtype: dict 620 """ 621 addr = self._gethostbyname(hostname) 622 return self.region_by_addr(addr)
623
624 - def time_zone_by_addr(self, addr):
625 """ 626 Look up the time zone for a given IP address. 627 Use this method if you have a Region or City database. 628 629 @param addr: IP address 630 @type addr: str 631 @return: Time zone 632 @rtype: str 633 """ 634 if self._databaseType not in const.CITY_EDITIONS: 635 message = 'Invalid database type, expected City' 636 raise GeoIPError(message) 637 638 ipnum = util.ip2long(addr) 639 return self._get_record(ipnum).get('time_zone')
640
641 - def time_zone_by_name(self, hostname):
642 """ 643 Look up the time zone for a given hostname. 644 Use this method if you have a Region or City database. 645 646 @param hostname: Hostname 647 @type hostname: str 648 @return: Time zone 649 @rtype: str 650 """ 651 addr = self._gethostbyname(hostname) 652 return self.time_zone_by_addr(addr)
653