1
2 """
3 Pure Python GeoIP API
4
5 The API is based on U{MaxMind's C-based Python
6 API<http://www.maxmind.com/app/python>}, but the code itself is based on
7 the U{pure PHP5 API<http://pear.php.net/package/Net_GeoIP/>} by Jim Winstead
8 and Hans Lellelid.
9
10 It is mostly a drop-in replacement, except the C{new} and C{open} methods
11 are gone. You should instantiate the L{GeoIP} class yourself:
12
13 C{gi = GeoIP('/path/to/GeoIP.dat', pygeoip.MEMORY_CACHE)}
14
15 @author: Jennifer Ennis <zaylea at gmail dot com>
16 @author: William Tisäter <william@defunct.cc>
17
18 @license:
19 Copyright(C) 2004 MaxMind LLC
20
21 This program is free software: you can redistribute it and/or modify
22 it under the terms of the GNU Lesser General Public License as published by
23 the Free Software Foundation, either version 3 of the License, or
24 (at your option) any later version.
25
26 This program is distributed in the hope that it will be useful,
27 but WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 GNU General Public License for more details.
30
31 You should have received a copy of the GNU Lesser General Public License
32 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
33 """
34
35 import os
36 import math
37 import socket
38 import mmap
39 import codecs
40 from threading import Lock
41
42 try:
43 from StringIO import StringIO
44 except ImportError:
45 from io import StringIO
46
47 import pygeoip.const
48 from pygeoip import util
49 from pygeoip.const import PY2, PY3
50 from pygeoip.timezone import time_zone_by_country_and_region
51
52
53 MMAP_CACHE = const.MMAP_CACHE
54 MEMORY_CACHE = const.MEMORY_CACHE
55 STANDARD = const.STANDARD
56
57
60
61
83
84
85 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {})
86
87
90 """
91 Initialize the class.
92
93 @param filename: Path to a geoip database.
94 @type filename: str
95 @param flags: Flags that affect how the database is processed.
96 Currently supported flags are STANDARD (the default),
97 MEMORY_CACHE (preload the whole file into memory) and
98 MMAP_CACHE (access the file via mmap).
99 @type flags: int
100 """
101 self._filename = filename
102 self._flags = flags
103
104 if self._flags & const.MMAP_CACHE:
105 f = open(filename, 'rb')
106 access = mmap.ACCESS_READ
107 self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
108 f.close()
109
110 elif self._flags & const.MEMORY_CACHE:
111 f = open(filename, 'rb')
112 self._memoryBuffer = f.read()
113 self._filehandle = StringIO(self._memoryBuffer)
114 f.close()
115
116 else:
117 self._filehandle = codecs.open(filename, 'rb', 'iso-8859-1')
118
119 self._lock = Lock()
120 self._setup_segments()
121
185
187 """
188 Using the record length and appropriate start points, seek to the
189 country that corresponds to the converted IP address integer.
190
191 @param ipnum: result of ip2long conversion
192 @type ipnum: int
193 @return: offset of start of record
194 @rtype: int
195 """
196 offset = 0
197 seek_depth = 127 if len(str(ipnum)) > 10 else 31
198
199 for depth in range(seek_depth, -1, -1):
200 if self._flags & const.MEMORY_CACHE:
201 startIndex = 2 * self._recordLength * offset
202 endIndex = startIndex + (2 * self._recordLength)
203 buf = self._memoryBuffer[startIndex:endIndex]
204 else:
205 startIndex = 2 * self._recordLength * offset
206 readLength = 2 * self._recordLength
207 self._lock.acquire()
208 self._filehandle.seek(startIndex, os.SEEK_SET)
209 buf = self._filehandle.read(readLength)
210 self._lock.release()
211
212 x = [0, 0]
213 for i in range(2):
214 for j in range(self._recordLength):
215 x[i] += ord(buf[self._recordLength * i + j]) << (j * 8)
216 if ipnum & (1 << depth):
217 if x[1] >= self._databaseSegments:
218 return x[1]
219 offset = x[1]
220 else:
221 if x[0] >= self._databaseSegments:
222 return x[0]
223 offset = x[0]
224
225 raise GeoIPError('Corrupt database')
226
228 """
229 Seek and return organization or ISP name for ipnum.
230 @param ipnum: Converted IP address
231 @type ipnum: int
232 @return: org/isp name
233 @rtype: str
234 """
235 seek_org = self._seek_country(ipnum)
236 if seek_org == self._databaseSegments:
237 return None
238
239 read_length = (2 * self._recordLength - 1) * self._databaseSegments
240 self._lock.acquire()
241 self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
242 org_buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
243 self._lock.release()
244
245 return org_buf[:org_buf.index(chr(0))]
246
248 """
249 Seek and return the region info (dict containing country_code
250 and region_name).
251
252 @param ipnum: Converted IP address
253 @type ipnum: int
254 @return: dict containing country_code and region_name
255 @rtype: dict
256 """
257 region = ''
258 country_code = ''
259 seek_country = self._seek_country(ipnum)
260
261 def get_region_name(offset):
262 region1 = chr(offset // 26 + 65)
263 region2 = chr(offset % 26 + 65)
264 return ''.join([region1, region2])
265
266 if self._databaseType == const.REGION_EDITION_REV0:
267 seek_region = seek_country - const.STATE_BEGIN_REV0
268 if seek_region >= 1000:
269 country_code = 'US'
270 region = get_region_name(seek_region - 1000)
271 else:
272 country_code = const.COUNTRY_CODES[seek_region]
273 elif self._databaseType == const.REGION_EDITION_REV1:
274 seek_region = seek_country - const.STATE_BEGIN_REV1
275 if seek_region < const.US_OFFSET:
276 pass
277 elif seek_region < const.CANADA_OFFSET:
278 country_code = 'US'
279 region = get_region_name(seek_region - const.US_OFFSET)
280 elif seek_region < const.WORLD_OFFSET:
281 country_code = 'CA'
282 region = get_region_name(seek_region - const.CANADA_OFFSET)
283 else:
284 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
285 if index in const.COUNTRY_CODES:
286 country_code = const.COUNTRY_CODES[index]
287 elif self._databaseType in const.CITY_EDITIONS:
288 rec = self._get_record(ipnum)
289 country_code = rec['country_code'] if 'country_code' in rec else ''
290 region = rec['region_name'] if 'region_name' in rec else ''
291
292 return {'country_code': country_code, 'region_name': region}
293
295 """
296 Populate location dict for converted IP.
297
298 @param ipnum: Converted IP address
299 @type ipnum: int
300 @return: dict with country_code, country_code3, country_name,
301 region, city, postal_code, latitude, longitude,
302 dma_code, metro_code, area_code, region_name, time_zone
303 @rtype: dict
304 """
305 seek_country = self._seek_country(ipnum)
306 if seek_country == self._databaseSegments:
307 return None
308
309 read_length = (2 * self._recordLength - 1) * self._databaseSegments
310 self._lock.acquire()
311 self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
312 record_buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
313 self._lock.release()
314
315 record = {
316 'dma_code': 0,
317 'area_code': 0,
318 'metro_code': '',
319 'postal_code': ''
320 }
321
322 latitude = 0
323 longitude = 0
324 record_buf_pos = 0
325
326
327 char = ord(record_buf[record_buf_pos])
328 record['country_code'] = const.COUNTRY_CODES[char]
329 record['country_code3'] = const.COUNTRY_CODES3[char]
330 record['country_name'] = const.COUNTRY_NAMES[char]
331 record_buf_pos += 1
332
333 def get_data(record_buf, record_buf_pos):
334 offset = record_buf_pos
335 char = ord(record_buf[offset])
336 while (char != 0):
337 offset += 1
338 char = ord(record_buf[offset])
339 if offset > record_buf_pos:
340 return (offset, record_buf[record_buf_pos:offset])
341 return (offset, '')
342
343 offset, record['region_name'] = get_data(record_buf, record_buf_pos)
344 offset, record['city'] = get_data(record_buf, offset + 1)
345 offset, record['postal_code'] = get_data(record_buf, offset + 1)
346 record_buf_pos = offset + 1
347
348 for j in range(3):
349 char = ord(record_buf[record_buf_pos])
350 record_buf_pos += 1
351 latitude += (char << (j * 8))
352
353 for j in range(3):
354 char = ord(record_buf[record_buf_pos])
355 record_buf_pos += 1
356 longitude += (char << (j * 8))
357
358 record['latitude'] = (latitude / 10000.0) - 180.0
359 record['longitude'] = (longitude / 10000.0) - 180.0
360
361 if self._databaseType == const.CITY_EDITION_REV1:
362 dmaarea_combo = 0
363 if record['country_code'] == 'US':
364 for j in range(3):
365 char = ord(record_buf[record_buf_pos])
366 dmaarea_combo += (char << (j * 8))
367 record_buf_pos += 1
368
369 record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
370 record['area_code'] = dmaarea_combo % 1000
371
372 if record['dma_code'] in const.DMA_MAP:
373 record['metro_code'] = const.DMA_MAP[record['dma_code']]
374
375 params = (record['country_code'], record['region_name'])
376 record['time_zone'] = time_zone_by_country_and_region(*params)
377
378 return record
379
381 if self._databaseType in const.IPV6_EDITIONS:
382 try:
383 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
384 except socket.gaierror:
385 return ''
386 family, socktype, proto, canonname, sockaddr = response[0]
387 address, port, flow, scope = sockaddr
388 return address
389 else:
390 return socket.gethostbyname(hostname)
391
393 """
394 Get the country index.
395 Looks up the index for the country which is the key for
396 the code and name.
397
398 @param addr: The IP address
399 @type addr: str
400 @return: network byte order 32-bit integer
401 @rtype: int
402 """
403 ipnum = util.ip2long(addr)
404 if not ipnum:
405 raise ValueError("Invalid IP address: %s" % addr)
406
407 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
408 if self._databaseType not in COUNTY_EDITIONS:
409 message = 'Invalid database type, expected Country'
410 raise GeoIPError(message)
411
412 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
413
415 """
416 Returns 2-letter country code (e.g. 'US') for specified IP address.
417 Use this method if you have a Country, Region, or City database.
418
419 @param addr: IP address
420 @type addr: str
421 @return: 2-letter country code
422 @rtype: str
423 """
424 try:
425 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
426 if self._databaseType in VALID_EDITIONS:
427 ipv = 6 if addr.find(':') >= 0 else 4
428
429 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
430 message = 'Invalid database type; expected IPv6 address'
431 raise ValueError(message)
432 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
433 message = 'Invalid database type; expected IPv4 address'
434 raise ValueError(message)
435
436 country_id = self.id_by_addr(addr)
437
438 return const.COUNTRY_CODES[country_id]
439 elif self._databaseType in const.REGION_CITY_EDITIONS:
440 return self.region_by_addr(addr)['country_code']
441
442 message = 'Invalid database type, expected Country, City or Region'
443 raise GeoIPError(message)
444 except ValueError:
445 raise GeoIPError('Failed to lookup address %s' % addr)
446
448 """
449 Returns 2-letter country code (e.g. 'US') for specified hostname.
450 Use this method if you have a Country, Region, or City database.
451
452 @param hostname: Hostname
453 @type hostname: str
454 @return: 2-letter country code
455 @rtype: str
456 """
457 addr = self._gethostbyname(hostname)
458 return self.country_code_by_addr(addr)
459
461 """
462 Returns full country name for specified IP address.
463 Use this method if you have a Country or City database.
464
465 @param addr: IP address
466 @type addr: str
467 @return: country name
468 @rtype: str
469 """
470 try:
471 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
472 if self._databaseType in VALID_EDITIONS:
473 return const.COUNTRY_NAMES[self.id_by_addr(addr)]
474 elif self._databaseType in const.CITY_EDITIONS:
475 return self.record_by_addr(addr)['country_name']
476 else:
477 message = 'Invalid database type, expected Country or City'
478 raise GeoIPError(message)
479 except ValueError:
480 raise GeoIPError('Failed to lookup address %s' % addr)
481
483 """
484 Returns full country name for specified hostname.
485 Use this method if you have a Country database.
486
487 @param hostname: Hostname
488 @type hostname: str
489 @return: country name
490 @rtype: str
491 """
492 addr = self._gethostbyname(hostname)
493 return self.country_name_by_addr(addr)
494
496 """
497 Lookup Organization, ISP or ASNum for given IP address.
498 Use this method if you have an Organization, ISP or ASNum database.
499
500 @param addr: IP address
501 @type addr: str
502 @return: organization or ISP name
503 @rtype: str
504 """
505 try:
506 ipnum = util.ip2long(addr)
507 if not ipnum:
508 raise ValueError('Invalid IP address')
509
510 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION)
511 if self._databaseType not in valid:
512 message = 'Invalid database type, expected Org, ISP or ASNum'
513 raise GeoIPError(message)
514
515 return self._get_org(ipnum)
516 except ValueError:
517 raise GeoIPError('Failed to lookup address %s' % addr)
518
520 """
521 Lookup the organization (or ISP) for hostname.
522 Use this method if you have an Organization/ISP database.
523
524 @param hostname: Hostname
525 @type hostname: str
526 @return: Organization or ISP name
527 @rtype: str
528 """
529 addr = socket.gethostbyname(hostname)
530 return self.org_by_addr(addr)
531
533 """
534 Look up the record for a given IP address.
535 Use this method if you have a City database.
536
537 @param addr: IP address
538 @type addr: str
539 @return: Dictionary with country_code, country_code3, country_name,
540 region, city, postal_code, latitude, longitude, dma_code,
541 metro_code, area_code, region_name, time_zone
542 @rtype: dict
543 """
544 try:
545 ipnum = util.ip2long(addr)
546 if not ipnum:
547 raise ValueError('Invalid IP address')
548
549 if self._databaseType not in const.CITY_EDITIONS:
550 message = 'Invalid database type, expected City'
551 raise GeoIPError(message)
552
553 return self._get_record(ipnum)
554 except ValueError:
555 raise GeoIPError('Failed to lookup address %s' % addr)
556
558 """
559 Look up the record for a given hostname.
560 Use this method if you have a City database.
561
562 @param hostname: Hostname
563 @type hostname: str
564 @return: Dictionary with country_code, country_code3, country_name,
565 region, city, postal_code, latitude, longitude, dma_code,
566 metro_code, area_code, region_name, time_zone
567 @rtype: dict
568 """
569 addr = socket.gethostbyname(hostname)
570 return self.record_by_addr(addr)
571
573 """
574 Lookup the region for given IP address.
575 Use this method if you have a Region database.
576
577 @param addr: IP address
578 @type addr: str
579 @return: Dictionary containing country_code, region and region_name
580 @rtype: dict
581 """
582 try:
583 ipnum = util.ip2long(addr)
584 if not ipnum:
585 raise ValueError('Invalid IP address')
586
587 if self._databaseType not in const.REGION_CITY_EDITIONS:
588 message = 'Invalid database type, expected Region or City'
589 raise GeoIPError(message)
590
591 return self._get_region(ipnum)
592 except ValueError:
593 raise GeoIPError('Failed to lookup address %s' % addr)
594
596 """
597 Lookup the region for given hostname.
598 Use this method if you have a Region database.
599
600 @param hostname: Hostname
601 @type hostname: str
602 @return: Dictionary containing country_code, region, and region_name
603 @rtype: dict
604 """
605 addr = socket.gethostbyname(hostname)
606 return self.region_by_addr(addr)
607
609 """
610 Look up the time zone for a given IP address.
611 Use this method if you have a Region or City database.
612
613 @param addr: IP address
614 @type addr: str
615 @return: Time zone
616 @rtype: str
617 """
618 try:
619 ipnum = util.ip2long(addr)
620 if not ipnum:
621 raise ValueError('Invalid IP address')
622
623 if self._databaseType not in const.CITY_EDITIONS:
624 message = 'Invalid database type, expected City'
625 raise GeoIPError(message)
626
627 return self._get_record(ipnum)['time_zone']
628 except ValueError:
629 raise GeoIPError('Failed to lookup address %s' % addr)
630
632 """
633 Look up the time zone for a given hostname.
634 Use this method if you have a Region or City database.
635
636 @param hostname: Hostname
637 @type hostname: str
638 @return: Time zone
639 @rtype: str
640 """
641 addr = socket.gethostbyname(hostname)
642 return self.time_zone_by_addr(addr)
643