1
2 """
3 Pure Python GeoIP API
4
5 The API is based on MaxMind's C-based Python API, but the code itself is
6 ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid.
7
8 @author: Jennifer Ennis <zaylea@gmail.com>
9
10 @license: Copyright(C) 2004 MaxMind LLC
11
12 This program is free software: you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
24 """
25
26 import os
27 import math
28 import socket
29 import codecs
30 from threading import Lock
31
32 try:
33 import mmap
34 except ImportError:
35 mmap = None
36
37 try:
38 from StringIO import StringIO
39 except ImportError:
40 from io import StringIO, BytesIO
41
42 from pygeoip import util, const
43 from pygeoip.const import PY2, PY3
44 from pygeoip.timezone import time_zone_by_country_and_region
45
46
47 STANDARD = const.STANDARD
48 MMAP_CACHE = const.MMAP_CACHE
49 MEMORY_CACHE = const.MEMORY_CACHE
50
51 ENCODING = const.ENCODING
52
53
71
72
73 _GeoIPBase = _GeoIPMetaclass('GeoIPBase', (object,), {})
74
75
78
79
82 """
83 Initialize the class.
84
85 @param filename: Path to a geoip database.
86 @type filename: str
87 @param flags: Flags that affect how the database is processed.
88 Currently supported flags are STANDARD (the default),
89 MEMORY_CACHE (preload the whole file into memory) and
90 MMAP_CACHE (access the file via mmap).
91 @type flags: int
92 """
93 self._filename = filename
94 self._flags = flags
95
96 if self._flags & const.MMAP_CACHE and mmap is None:
97 import warnings
98 warnings.warn("MMAP_CACHE cannot be used without a mmap module")
99 self._flags &= ~const.MMAP_CACHE
100
101 elif self._flags & const.MMAP_CACHE:
102 f = open(filename, 'rb')
103 access = mmap.ACCESS_READ
104 self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
105 f.close()
106
107 elif self._flags & const.MEMORY_CACHE:
108 f = open(filename, 'rb')
109 self._memoryBuffer = f.read()
110 iohandle = BytesIO if PY3 else StringIO
111 self._filehandle = iohandle(self._memoryBuffer)
112 f.close()
113
114 else:
115 self._filehandle = codecs.open(filename, 'rb', ENCODING)
116
117 self._lock = Lock()
118 self._setup_segments()
119
121 """
122 Parses the database file to determine what kind of database is
123 being used and setup segment sizes and start points that will
124 be used by the seek*() methods later.
125
126 Supported databases:
127
128 * COUNTRY_EDITION
129 * COUNTRY_EDITION_V6
130 * REGION_EDITION_REV0
131 * REGION_EDITION_REV1
132 * CITY_EDITION_REV0
133 * CITY_EDITION_REV1
134 * CITY_EDITION_REV1_V6
135 * ORG_EDITION
136 * ISP_EDITION
137 * ASNUM_EDITION
138 * ASNUM_EDITION_V6
139
140 """
141 self._databaseType = const.COUNTRY_EDITION
142 self._recordLength = const.STANDARD_RECORD_LENGTH
143 self._databaseSegments = const.COUNTRY_BEGIN
144
145 self._lock.acquire()
146 filepos = self._filehandle.tell()
147 self._filehandle.seek(-3, os.SEEK_END)
148
149 for i in range(const.STRUCTURE_INFO_MAX_SIZE):
150 chars = chr(255) * 3
151 delim = self._filehandle.read(3)
152
153 if PY3 and type(delim) is bytes:
154 delim = delim.decode(ENCODING)
155
156 if PY2:
157 chars = chars.decode(ENCODING)
158 if type(delim) is str:
159 delim = delim.decode(ENCODING)
160
161 if delim == chars:
162 byte = self._filehandle.read(1)
163 self._databaseType = ord(byte)
164
165
166 if (self._databaseType >= 106):
167 self._databaseType -= 105
168
169 if self._databaseType == const.REGION_EDITION_REV0:
170 self._databaseSegments = const.STATE_BEGIN_REV0
171
172 elif self._databaseType == const.REGION_EDITION_REV1:
173 self._databaseSegments = const.STATE_BEGIN_REV1
174
175 elif self._databaseType in (const.CITY_EDITION_REV0,
176 const.CITY_EDITION_REV1,
177 const.CITY_EDITION_REV1_V6,
178 const.ORG_EDITION,
179 const.ISP_EDITION,
180 const.ASNUM_EDITION,
181 const.ASNUM_EDITION_V6):
182 self._databaseSegments = 0
183 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH)
184
185 if PY3 and type(buf) is bytes:
186 buf = buf.decode(ENCODING)
187
188 for j in range(const.SEGMENT_RECORD_LENGTH):
189 self._databaseSegments += (ord(buf[j]) << (j * 8))
190
191 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
192 if self._databaseType in LONG_RECORDS:
193 self._recordLength = const.ORG_RECORD_LENGTH
194 break
195 else:
196 self._filehandle.seek(-4, os.SEEK_CUR)
197
198 self._filehandle.seek(filepos, os.SEEK_SET)
199 self._lock.release()
200
202 """
203 Using the record length and appropriate start points, seek to the
204 country that corresponds to the converted IP address integer.
205
206 @param ipnum: result of ip2long conversion
207 @type ipnum: int
208 @return: offset of start of record
209 @rtype: int
210 """
211 try:
212 offset = 0
213 seek_depth = 127 if len(str(ipnum)) > 10 else 31
214
215 for depth in range(seek_depth, -1, -1):
216 if self._flags & const.MEMORY_CACHE:
217 startIndex = 2 * self._recordLength * offset
218 endIndex = startIndex + (2 * self._recordLength)
219 buf = self._memoryBuffer[startIndex:endIndex]
220 else:
221 startIndex = 2 * self._recordLength * offset
222 readLength = 2 * self._recordLength
223 self._lock.acquire()
224 self._filehandle.seek(startIndex, os.SEEK_SET)
225 buf = self._filehandle.read(readLength)
226 self._lock.release()
227
228 if PY3 and type(buf) is bytes:
229 buf = buf.decode(ENCODING)
230
231 x = [0, 0]
232 for i in range(2):
233 for j in range(self._recordLength):
234 byte = buf[self._recordLength * i + j]
235 x[i] += ord(byte) << (j * 8)
236 if ipnum & (1 << depth):
237 if x[1] >= self._databaseSegments:
238 return x[1]
239 offset = x[1]
240 else:
241 if x[0] >= self._databaseSegments:
242 return x[0]
243 offset = x[0]
244 except:
245 pass
246
247 raise GeoIPError('Corrupt database')
248
250 """
251 Seek and return organization or ISP name for ipnum.
252 @param ipnum: Converted IP address
253 @type ipnum: int
254 @return: org/isp name
255 @rtype: str
256 """
257 seek_org = self._seek_country(ipnum)
258 if seek_org == self._databaseSegments:
259 return None
260
261 read_length = (2 * self._recordLength - 1) * self._databaseSegments
262 self._lock.acquire()
263 self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
264 buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
265 self._lock.release()
266
267 if PY3 and type(buf) is bytes:
268 buf = buf.decode(ENCODING)
269
270 return buf[:buf.index(chr(0))]
271
273 """
274 Seek and return the region info (dict containing country_code
275 and region_name).
276
277 @param ipnum: Converted IP address
278 @type ipnum: int
279 @return: dict containing country_code and region_name
280 @rtype: dict
281 """
282 region = ''
283 country_code = ''
284 seek_country = self._seek_country(ipnum)
285
286 def get_region_name(offset):
287 region1 = chr(offset // 26 + 65)
288 region2 = chr(offset % 26 + 65)
289 return ''.join([region1, region2])
290
291 if self._databaseType == const.REGION_EDITION_REV0:
292 seek_region = seek_country - const.STATE_BEGIN_REV0
293 if seek_region >= 1000:
294 country_code = 'US'
295 region = get_region_name(seek_region - 1000)
296 else:
297 country_code = const.COUNTRY_CODES[seek_region]
298 elif self._databaseType == const.REGION_EDITION_REV1:
299 seek_region = seek_country - const.STATE_BEGIN_REV1
300 if seek_region < const.US_OFFSET:
301 pass
302 elif seek_region < const.CANADA_OFFSET:
303 country_code = 'US'
304 region = get_region_name(seek_region - const.US_OFFSET)
305 elif seek_region < const.WORLD_OFFSET:
306 country_code = 'CA'
307 region = get_region_name(seek_region - const.CANADA_OFFSET)
308 else:
309 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
310 if index in const.COUNTRY_CODES:
311 country_code = const.COUNTRY_CODES[index]
312 elif self._databaseType in const.CITY_EDITIONS:
313 rec = self._get_record(ipnum)
314 region = rec.get('region_name', '')
315 country_code = rec.get('country_code', '')
316
317 return {'country_code': country_code, 'region_name': region}
318
320 """
321 Populate location dict for converted IP.
322
323 @param ipnum: Converted IP address
324 @type ipnum: int
325 @return: dict with country_code, country_code3, country_name,
326 region, city, postal_code, latitude, longitude,
327 dma_code, metro_code, area_code, region_name, time_zone
328 @rtype: dict
329 """
330 seek_country = self._seek_country(ipnum)
331 if seek_country == self._databaseSegments:
332 return {}
333
334 read_length = (2 * self._recordLength - 1) * self._databaseSegments
335 self._lock.acquire()
336 self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
337 buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
338 self._lock.release()
339
340 if PY3 and type(buf) is bytes:
341 buf = buf.decode(ENCODING)
342
343 record = {
344 'dma_code': 0,
345 'area_code': 0,
346 'metro_code': '',
347 'postal_code': ''
348 }
349
350 latitude = 0
351 longitude = 0
352 buf_pos = 0
353
354
355 char = ord(buf[buf_pos])
356 record['country_code'] = const.COUNTRY_CODES[char]
357 record['country_code3'] = const.COUNTRY_CODES3[char]
358 record['country_name'] = const.COUNTRY_NAMES[char]
359 record['continent'] = const.CONTINENT_NAMES[char]
360
361 buf_pos += 1
362 def get_data(buf, buf_pos):
363 offset = buf_pos
364 char = ord(buf[offset])
365 while (char != 0):
366 offset += 1
367 char = ord(buf[offset])
368 if offset > buf_pos:
369 return (offset, buf[buf_pos:offset])
370 return (offset, '')
371
372 offset, record['region_name'] = get_data(buf, buf_pos)
373 offset, record['city'] = get_data(buf, offset + 1)
374 offset, record['postal_code'] = get_data(buf, offset + 1)
375 buf_pos = offset + 1
376
377 for j in range(3):
378 char = ord(buf[buf_pos])
379 buf_pos += 1
380 latitude += (char << (j * 8))
381
382 for j in range(3):
383 char = ord(buf[buf_pos])
384 buf_pos += 1
385 longitude += (char << (j * 8))
386
387 record['latitude'] = (latitude / 10000.0) - 180.0
388 record['longitude'] = (longitude / 10000.0) - 180.0
389
390 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6):
391 dmaarea_combo = 0
392 if record['country_code'] == 'US':
393 for j in range(3):
394 char = ord(buf[buf_pos])
395 dmaarea_combo += (char << (j * 8))
396 buf_pos += 1
397
398 record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
399 record['area_code'] = dmaarea_combo % 1000
400
401 record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
402 params = (record['country_code'], record['region_name'])
403 record['time_zone'] = time_zone_by_country_and_region(*params)
404
405 return record
406
408 if self._databaseType in const.IPV6_EDITIONS:
409 try:
410 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
411 family, socktype, proto, canonname, sockaddr = response[0]
412 address, port, flow, scope = sockaddr
413 return address
414 except socket.gaierror:
415 return ''
416 else:
417 return socket.gethostbyname(hostname)
418
420 """
421 Get the country index.
422 Looks up the index for the country which is the key for
423 the code and name.
424
425 @param addr: The IP address
426 @type addr: str
427 @return: network byte order 32-bit integer
428 @rtype: int
429 """
430 ipnum = util.ip2long(addr)
431 if not ipnum:
432 raise ValueError("Invalid IP address: %s" % addr)
433
434 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
435 if self._databaseType not in COUNTY_EDITIONS:
436 message = 'Invalid database type, expected Country'
437 raise GeoIPError(message)
438
439 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
440
442 """
443 Returns 2-letter country code (e.g. 'US') for specified IP address.
444 Use this method if you have a Country, Region, or City database.
445
446 @param addr: IP address
447 @type addr: str
448 @return: 2-letter country code
449 @rtype: str
450 """
451 try:
452 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
453 if self._databaseType in VALID_EDITIONS:
454 ipv = 6 if addr.find(':') >= 0 else 4
455
456 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
457 message = 'Invalid database type; expected IPv6 address'
458 raise ValueError(message)
459 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
460 message = 'Invalid database type; expected IPv4 address'
461 raise ValueError(message)
462
463 country_id = self.id_by_addr(addr)
464 return const.COUNTRY_CODES[country_id]
465 elif self._databaseType in const.REGION_CITY_EDITIONS:
466 return self.region_by_addr(addr).get('country_code')
467
468 message = 'Invalid database type, expected Country, City or Region'
469 raise GeoIPError(message)
470 except ValueError:
471 raise GeoIPError('Failed to lookup address %s' % addr)
472
474 """
475 Returns 2-letter country code (e.g. 'US') for specified hostname.
476 Use this method if you have a Country, Region, or City database.
477
478 @param hostname: Hostname
479 @type hostname: str
480 @return: 2-letter country code
481 @rtype: str
482 """
483 addr = self._gethostbyname(hostname)
484 return self.country_code_by_addr(addr)
485
487 """
488 Returns full country name for specified IP address.
489 Use this method if you have a Country or City database.
490
491 @param addr: IP address
492 @type addr: str
493 @return: country name
494 @rtype: str
495 """
496 try:
497 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
498 if self._databaseType in VALID_EDITIONS:
499 country_id = self.id_by_addr(addr)
500 return const.COUNTRY_NAMES[country_id]
501 elif self._databaseType in const.CITY_EDITIONS:
502 return self.record_by_addr(addr).get('country_name')
503 else:
504 message = 'Invalid database type, expected Country or City'
505 raise GeoIPError(message)
506 except ValueError:
507 raise GeoIPError('Failed to lookup address %s' % addr)
508
510 """
511 Returns full country name for specified hostname.
512 Use this method if you have a Country database.
513
514 @param hostname: Hostname
515 @type hostname: str
516 @return: country name
517 @rtype: str
518 """
519 addr = self._gethostbyname(hostname)
520 return self.country_name_by_addr(addr)
521
523 """
524 Lookup Organization, ISP or ASNum for given IP address.
525 Use this method if you have an Organization, ISP or ASNum database.
526
527 @param addr: IP address
528 @type addr: str
529 @return: organization or ISP name
530 @rtype: str
531 """
532 try:
533 ipnum = util.ip2long(addr)
534 if not ipnum:
535 raise ValueError('Invalid IP address')
536
537 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
538 if self._databaseType not in valid:
539 message = 'Invalid database type, expected Org, ISP or ASNum'
540 raise GeoIPError(message)
541
542 return self._get_org(ipnum)
543 except ValueError:
544 raise GeoIPError('Failed to lookup address %s' % addr)
545
547 """
548 Lookup the organization (or ISP) for hostname.
549 Use this method if you have an Organization/ISP database.
550
551 @param hostname: Hostname
552 @type hostname: str
553 @return: Organization or ISP name
554 @rtype: str
555 """
556 addr = self._gethostbyname(hostname)
557 return self.org_by_addr(addr)
558
560 """
561 Look up the record for a given IP address.
562 Use this method if you have a City database.
563
564 @param addr: IP address
565 @type addr: str
566 @return: Dictionary with country_code, country_code3, country_name,
567 region, city, postal_code, latitude, longitude, dma_code,
568 metro_code, area_code, region_name, time_zone
569 @rtype: dict
570 """
571 try:
572 ipnum = util.ip2long(addr)
573 if not ipnum:
574 raise ValueError('Invalid IP address')
575
576 if self._databaseType not in const.CITY_EDITIONS:
577 message = 'Invalid database type, expected City'
578 raise GeoIPError(message)
579
580 rec = self._get_record(ipnum)
581 if not rec:
582 return None
583
584 return rec
585 except ValueError:
586 raise GeoIPError('Failed to lookup address %s' % addr)
587
589 """
590 Look up the record for a given hostname.
591 Use this method if you have a City database.
592
593 @param hostname: Hostname
594 @type hostname: str
595 @return: Dictionary with country_code, country_code3, country_name,
596 region, city, postal_code, latitude, longitude, dma_code,
597 metro_code, area_code, region_name, time_zone
598 @rtype: dict
599 """
600 addr = self._gethostbyname(hostname)
601 return self.record_by_addr(addr)
602
604 """
605 Lookup the region for given IP address.
606 Use this method if you have a Region database.
607
608 @param addr: IP address
609 @type addr: str
610 @return: Dictionary containing country_code, region and region_name
611 @rtype: dict
612 """
613 try:
614 ipnum = util.ip2long(addr)
615 if not ipnum:
616 raise ValueError('Invalid IP address')
617
618 if self._databaseType not in const.REGION_CITY_EDITIONS:
619 message = 'Invalid database type, expected Region or City'
620 raise GeoIPError(message)
621
622 return self._get_region(ipnum)
623 except ValueError:
624 raise GeoIPError('Failed to lookup address %s' % addr)
625
627 """
628 Lookup the region for given hostname.
629 Use this method if you have a Region database.
630
631 @param hostname: Hostname
632 @type hostname: str
633 @return: Dictionary containing country_code, region, and region_name
634 @rtype: dict
635 """
636 addr = self._gethostbyname(hostname)
637 return self.region_by_addr(addr)
638
640 """
641 Look up the time zone for a given IP address.
642 Use this method if you have a Region or City database.
643
644 @param addr: IP address
645 @type addr: str
646 @return: Time zone
647 @rtype: str
648 """
649 try:
650 ipnum = util.ip2long(addr)
651 if not ipnum:
652 raise ValueError('Invalid IP address')
653
654 if self._databaseType not in const.CITY_EDITIONS:
655 message = 'Invalid database type, expected City'
656 raise GeoIPError(message)
657
658 return self._get_record(ipnum).get('time_zone')
659 except ValueError:
660 raise GeoIPError('Failed to lookup address %s' % addr)
661
663 """
664 Look up the time zone for a given hostname.
665 Use this method if you have a Region or City database.
666
667 @param hostname: Hostname
668 @type hostname: str
669 @return: Time zone
670 @rtype: str
671 """
672 addr = self._gethostbyname(hostname)
673 return self.time_zone_by_addr(addr)
674