1
2 """
3 Pure Python GeoIP API
4
5 @author: Jennifer Ennis <zaylea@gmail.com>
6 @author: William Tisäter <william@defunct.cc>
7
8 @license: Copyright(C) 2004 MaxMind LLC
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
22 """
23
24 import os
25 import math
26 import socket
27 import codecs
28 from threading import Lock
29
30 try:
31 import mmap
32 except ImportError:
33 mmap = None
34
35 try:
36 from StringIO import StringIO
37 except ImportError:
38 from io import StringIO, BytesIO
39
40 from pygeoip import util, const
41 from pygeoip.const import PY2, PY3
42 from pygeoip.timezone import time_zone_by_country_and_region
43
44
45 STANDARD = const.STANDARD
46 MMAP_CACHE = const.MMAP_CACHE
47 MEMORY_CACHE = const.MEMORY_CACHE
48
49 ENCODING = const.ENCODING
54
80
83 __metaclass__ = _GeoIPMetaclass
84
85 - def __init__(self, filename, flags=0, cache=True):
86 """
87 Initialize the class.
88
89 @param filename: Path to a geoip database.
90 @type filename: str
91 @param flags: Flags that affect how the database is processed.
92 Currently supported flags are STANDARD (the default),
93 MEMORY_CACHE (preload the whole file into memory) and
94 MMAP_CACHE (access the file via mmap).
95 @type flags: int
96 @param cache: Used in tests to skip instance caching
97 @type cache: bool
98 """
99 self._flags = flags
100
101 if self._flags & const.MMAP_CACHE and mmap is None:
102 import warnings
103 warnings.warn("MMAP_CACHE cannot be used without a mmap module")
104 self._flags &= ~const.MMAP_CACHE
105
106 if self._flags & const.MMAP_CACHE:
107 f = codecs.open(filename, 'rb', ENCODING)
108 access = mmap.ACCESS_READ
109 self._fp = mmap.mmap(f.fileno(), 0, access=access)
110 self._type = 'MMAP_CACHE'
111 f.close()
112 elif self._flags & const.MEMORY_CACHE:
113 f = codecs.open(filename, 'rb', ENCODING)
114 self._memory = f.read()
115 self._fp = self._str_to_fp(self._memory)
116 self._type = 'MEMORY_CACHE'
117 f.close()
118 else:
119 self._fp = codecs.open(filename, 'rb', ENCODING)
120 self._type = 'STANDARD'
121
122 self._lock = Lock()
123 self._setup_segments()
124
125 @classmethod
127 """
128 Convert bytes data to file handle object
129
130 @param data: string data
131 @type data: str
132 @return: file handle object
133 @rtype: StringIO or BytesIO
134 """
135 return BytesIO(bytearray(data, ENCODING)) if PY3 else StringIO(data)
136
138 """
139 Parses the database file to determine what kind of database is
140 being used and setup segment sizes and start points that will
141 be used by the seek*() methods later.
142
143 Supported databases:
144
145 * COUNTRY_EDITION
146 * COUNTRY_EDITION_V6
147 * REGION_EDITION_REV0
148 * REGION_EDITION_REV1
149 * CITY_EDITION_REV0
150 * CITY_EDITION_REV1
151 * CITY_EDITION_REV1_V6
152 * ORG_EDITION
153 * ISP_EDITION
154 * ASNUM_EDITION
155 * ASNUM_EDITION_V6
156
157 """
158 self._databaseType = const.COUNTRY_EDITION
159 self._recordLength = const.STANDARD_RECORD_LENGTH
160 self._databaseSegments = const.COUNTRY_BEGIN
161
162 self._lock.acquire()
163 filepos = self._fp.tell()
164 self._fp.seek(-3, os.SEEK_END)
165
166 for i in range(const.STRUCTURE_INFO_MAX_SIZE):
167 chars = chr(255) * 3
168 delim = self._fp.read(3)
169
170 if PY3 and type(delim) is bytes:
171 delim = delim.decode(ENCODING)
172
173 if PY2:
174 chars = chars.decode(ENCODING)
175 if type(delim) is str:
176 delim = delim.decode(ENCODING)
177
178 if delim == chars:
179 byte = self._fp.read(1)
180 self._databaseType = ord(byte)
181
182
183 if self._databaseType >= 106:
184 self._databaseType -= 105
185
186 if self._databaseType == const.REGION_EDITION_REV0:
187 self._databaseSegments = const.STATE_BEGIN_REV0
188
189 elif self._databaseType == const.REGION_EDITION_REV1:
190 self._databaseSegments = const.STATE_BEGIN_REV1
191
192 elif self._databaseType in (const.CITY_EDITION_REV0,
193 const.CITY_EDITION_REV1,
194 const.CITY_EDITION_REV1_V6,
195 const.ORG_EDITION,
196 const.ISP_EDITION,
197 const.ASNUM_EDITION,
198 const.ASNUM_EDITION_V6):
199 self._databaseSegments = 0
200 buf = self._fp.read(const.SEGMENT_RECORD_LENGTH)
201
202 if PY3 and type(buf) is bytes:
203 buf = buf.decode(ENCODING)
204
205 for j in range(const.SEGMENT_RECORD_LENGTH):
206 self._databaseSegments += (ord(buf[j]) << (j * 8))
207
208 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
209 if self._databaseType in LONG_RECORDS:
210 self._recordLength = const.ORG_RECORD_LENGTH
211 break
212 else:
213 self._fp.seek(-4, os.SEEK_CUR)
214
215 self._fp.seek(filepos, os.SEEK_SET)
216 self._lock.release()
217
219 """
220 Using the record length and appropriate start points, seek to the
221 country that corresponds to the converted IP address integer.
222
223 @param ipnum: result of ip2long conversion
224 @type ipnum: int
225 @return: offset of start of record
226 @rtype: int
227 """
228 try:
229 offset = 0
230 seek_depth = 127 if len(str(ipnum)) > 10 else 31
231
232 for depth in range(seek_depth, -1, -1):
233 if self._flags & const.MEMORY_CACHE:
234 startIndex = 2 * self._recordLength * offset
235 endIndex = startIndex + (2 * self._recordLength)
236 buf = self._memory[startIndex:endIndex]
237 else:
238 startIndex = 2 * self._recordLength * offset
239 readLength = 2 * self._recordLength
240 self._lock.acquire()
241 self._fp.seek(startIndex, os.SEEK_SET)
242 buf = self._fp.read(readLength)
243 self._lock.release()
244
245 if PY3 and type(buf) is bytes:
246 buf = buf.decode(ENCODING)
247
248 x = [0, 0]
249 for i in range(2):
250 for j in range(self._recordLength):
251 byte = buf[self._recordLength * i + j]
252 x[i] += ord(byte) << (j * 8)
253 if ipnum & (1 << depth):
254 if x[1] >= self._databaseSegments:
255 return x[1]
256 offset = x[1]
257 else:
258 if x[0] >= self._databaseSegments:
259 return x[0]
260 offset = x[0]
261 except (IndexError, UnicodeDecodeError):
262 pass
263
264 raise GeoIPError('Corrupt database')
265
267 """
268 Seek and return organization or ISP name for ipnum.
269 @param ipnum: Converted IP address
270 @type ipnum: int
271 @return: org/isp name
272 @rtype: str
273 """
274 seek_org = self._seek_country(ipnum)
275 if seek_org == self._databaseSegments:
276 return None
277
278 read_length = (2 * self._recordLength - 1) * self._databaseSegments
279 self._lock.acquire()
280 self._fp.seek(seek_org + read_length, os.SEEK_SET)
281 buf = self._fp.read(const.MAX_ORG_RECORD_LENGTH)
282 self._lock.release()
283
284 if PY3 and type(buf) is bytes:
285 buf = buf.decode(ENCODING)
286
287 return buf[:buf.index(chr(0))]
288
290 """
291 Seek and return the region info (dict containing country_code
292 and region_name).
293
294 @param ipnum: Converted IP address
295 @type ipnum: int
296 @return: dict containing country_code and region_name
297 @rtype: dict
298 """
299 region = ''
300 country_code = ''
301 seek_country = self._seek_country(ipnum)
302
303 def get_region_name(offset):
304 region1 = chr(offset // 26 + 65)
305 region2 = chr(offset % 26 + 65)
306 return ''.join([region1, region2])
307
308 if self._databaseType == const.REGION_EDITION_REV0:
309 seek_region = seek_country - const.STATE_BEGIN_REV0
310 if seek_region >= 1000:
311 country_code = 'US'
312 region = get_region_name(seek_region - 1000)
313 else:
314 country_code = const.COUNTRY_CODES[seek_region]
315 elif self._databaseType == const.REGION_EDITION_REV1:
316 seek_region = seek_country - const.STATE_BEGIN_REV1
317 if seek_region < const.US_OFFSET:
318 pass
319 elif seek_region < const.CANADA_OFFSET:
320 country_code = 'US'
321 region = get_region_name(seek_region - const.US_OFFSET)
322 elif seek_region < const.WORLD_OFFSET:
323 country_code = 'CA'
324 region = get_region_name(seek_region - const.CANADA_OFFSET)
325 else:
326 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
327 if index in const.COUNTRY_CODES:
328 country_code = const.COUNTRY_CODES[index]
329 elif self._databaseType in const.CITY_EDITIONS:
330 rec = self._get_record(ipnum)
331 region = rec.get('region_name', '')
332 country_code = rec.get('country_code', '')
333
334 return {'country_code': country_code, 'region_name': region}
335
337 """
338 Populate location dict for converted IP.
339
340 @param ipnum: Converted IP address
341 @type ipnum: int
342 @return: dict with country_code, country_code3, country_name,
343 region, city, postal_code, latitude, longitude,
344 dma_code, metro_code, area_code, region_name, time_zone
345 @rtype: dict
346 """
347 seek_country = self._seek_country(ipnum)
348 if seek_country == self._databaseSegments:
349 return {}
350
351 read_length = (2 * self._recordLength - 1) * self._databaseSegments
352 self._lock.acquire()
353 self._fp.seek(seek_country + read_length, os.SEEK_SET)
354 buf = self._fp.read(const.FULL_RECORD_LENGTH)
355 self._lock.release()
356
357 if PY3 and type(buf) is bytes:
358 buf = buf.decode(ENCODING)
359
360 record = {
361 'dma_code': 0,
362 'area_code': 0,
363 'metro_code': '',
364 'postal_code': ''
365 }
366
367 latitude = 0
368 longitude = 0
369 buf_pos = 0
370
371
372 char = ord(buf[buf_pos])
373 record['country_code'] = const.COUNTRY_CODES[char]
374 record['country_code3'] = const.COUNTRY_CODES3[char]
375 record['country_name'] = const.COUNTRY_NAMES[char]
376 record['continent'] = const.CONTINENT_NAMES[char]
377
378 buf_pos += 1
379
380 def get_data(buf, buf_pos):
381 offset = buf_pos
382 char = ord(buf[offset])
383 while char != 0:
384 offset += 1
385 char = ord(buf[offset])
386 if offset > buf_pos:
387 return offset, buf[buf_pos:offset]
388 return offset, ''
389
390 offset, record['region_name'] = get_data(buf, buf_pos)
391 offset, record['city'] = get_data(buf, offset + 1)
392 offset, record['postal_code'] = get_data(buf, offset + 1)
393 buf_pos = offset + 1
394
395 for j in range(3):
396 char = ord(buf[buf_pos])
397 buf_pos += 1
398 latitude += (char << (j * 8))
399
400 for j in range(3):
401 char = ord(buf[buf_pos])
402 buf_pos += 1
403 longitude += (char << (j * 8))
404
405 record['latitude'] = (latitude / 10000.0) - 180.0
406 record['longitude'] = (longitude / 10000.0) - 180.0
407
408 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6):
409 dmaarea_combo = 0
410 if record['country_code'] == 'US':
411 for j in range(3):
412 char = ord(buf[buf_pos])
413 dmaarea_combo += (char << (j * 8))
414 buf_pos += 1
415
416 record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
417 record['area_code'] = dmaarea_combo % 1000
418
419 record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
420 params = (record['country_code'], record['region_name'])
421 record['time_zone'] = time_zone_by_country_and_region(*params)
422
423 return record
424
426 if self._databaseType in const.IPV6_EDITIONS:
427 try:
428 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
429 family, socktype, proto, canonname, sockaddr = response[0]
430 address, port, flow, scope = sockaddr
431 return address
432 except socket.gaierror:
433 return ''
434 else:
435 return socket.gethostbyname(hostname)
436
438 """
439 Looks up the index for the country which is the key for the
440 code and name.
441
442 @param addr: IPv4 or IPv6 address
443 @type addr: str
444 @return: network byte order 32-bit integer
445 @rtype: int
446 """
447 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
448 if self._databaseType not in COUNTY_EDITIONS:
449 raise GeoIPError('Invalid database type, expected Country')
450
451 ipv = 6 if addr.find(':') >= 0 else 4
452 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
453 raise GeoIPError('Invalid database type; expected IPv6 address')
454 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
455 raise GeoIPError('Invalid database type; expected IPv4 address')
456
457 ipnum = util.ip2long(addr)
458 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
459
461 """
462 Returns 2-letter country code (e.g. 'US') for specified IP address.
463 Use this method if you have a Country, Region, or City database.
464
465 @param addr: IP address
466 @type addr: str
467 @return: 2-letter country code
468 @rtype: str
469 """
470 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
471 if self._databaseType in VALID_EDITIONS:
472 country_id = self._id_by_addr(addr)
473 return const.COUNTRY_CODES[country_id]
474 elif self._databaseType in const.REGION_CITY_EDITIONS:
475 return self.region_by_addr(addr).get('country_code')
476
477 raise GeoIPError('Invalid database type, expected Country, City or Region')
478
480 """
481 Returns 2-letter country code (e.g. 'US') for specified hostname.
482 Use this method if you have a Country, Region, or City database.
483
484 @param hostname: Hostname
485 @type hostname: str
486 @return: 2-letter country code
487 @rtype: str
488 """
489 addr = self._gethostbyname(hostname)
490 return self.country_code_by_addr(addr)
491
493 """
494 Returns full country name for specified IP address.
495 Use this method if you have a Country or City database.
496
497 @param addr: IP address
498 @type addr: str
499 @return: country name
500 @rtype: str
501 """
502 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
503 if self._databaseType in VALID_EDITIONS:
504 country_id = self._id_by_addr(addr)
505 return const.COUNTRY_NAMES[country_id]
506 elif self._databaseType in const.CITY_EDITIONS:
507 return self.record_by_addr(addr).get('country_name')
508 else:
509 message = 'Invalid database type, expected Country or City'
510 raise GeoIPError(message)
511
513 """
514 Returns full country name for specified hostname.
515 Use this method if you have a Country database.
516
517 @param hostname: Hostname
518 @type hostname: str
519 @return: country name
520 @rtype: str
521 """
522 addr = self._gethostbyname(hostname)
523 return self.country_name_by_addr(addr)
524
526 """
527 Lookup Organization, ISP or ASNum for given IP address.
528 Use this method if you have an Organization, ISP or ASNum database.
529
530 @param addr: IP address
531 @type addr: str
532 @return: organization or ISP name
533 @rtype: str
534 """
535 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
536 if self._databaseType not in valid:
537 message = 'Invalid database type, expected Org, ISP or ASNum'
538 raise GeoIPError(message)
539
540 ipnum = util.ip2long(addr)
541 return self._get_org(ipnum)
542
544 """
545 Lookup the organization (or ISP) for hostname.
546 Use this method if you have an Organization/ISP database.
547
548 @param hostname: Hostname
549 @type hostname: str
550 @return: Organization or ISP name
551 @rtype: str
552 """
553 addr = self._gethostbyname(hostname)
554 return self.org_by_addr(addr)
555
557 """
558 Look up the record for a given IP address.
559 Use this method if you have a City database.
560
561 @param addr: IP address
562 @type addr: str
563 @return: Dictionary with country_code, country_code3, country_name,
564 region, city, postal_code, latitude, longitude, dma_code,
565 metro_code, area_code, region_name, time_zone
566 @rtype: dict
567 """
568 if self._databaseType not in const.CITY_EDITIONS:
569 message = 'Invalid database type, expected City'
570 raise GeoIPError(message)
571
572 ipnum = util.ip2long(addr)
573 rec = self._get_record(ipnum)
574 if not rec:
575 return None
576
577 return rec
578
580 """
581 Look up the record for a given hostname.
582 Use this method if you have a City database.
583
584 @param hostname: Hostname
585 @type hostname: str
586 @return: Dictionary with country_code, country_code3, country_name,
587 region, city, postal_code, latitude, longitude, dma_code,
588 metro_code, area_code, region_name, time_zone
589 @rtype: dict
590 """
591 addr = self._gethostbyname(hostname)
592 return self.record_by_addr(addr)
593
595 """
596 Lookup the region for given IP address.
597 Use this method if you have a Region database.
598
599 @param addr: IP address
600 @type addr: str
601 @return: Dictionary containing country_code, region and region_name
602 @rtype: dict
603 """
604 if self._databaseType not in const.REGION_CITY_EDITIONS:
605 message = 'Invalid database type, expected Region or City'
606 raise GeoIPError(message)
607
608 ipnum = util.ip2long(addr)
609 return self._get_region(ipnum)
610
612 """
613 Lookup the region for given hostname.
614 Use this method if you have a Region database.
615
616 @param hostname: Hostname
617 @type hostname: str
618 @return: Dictionary containing country_code, region, and region_name
619 @rtype: dict
620 """
621 addr = self._gethostbyname(hostname)
622 return self.region_by_addr(addr)
623
625 """
626 Look up the time zone for a given IP address.
627 Use this method if you have a Region or City database.
628
629 @param addr: IP address
630 @type addr: str
631 @return: Time zone
632 @rtype: str
633 """
634 if self._databaseType not in const.CITY_EDITIONS:
635 message = 'Invalid database type, expected City'
636 raise GeoIPError(message)
637
638 ipnum = util.ip2long(addr)
639 return self._get_record(ipnum).get('time_zone')
640
642 """
643 Look up the time zone for a given hostname.
644 Use this method if you have a Region or City database.
645
646 @param hostname: Hostname
647 @type hostname: str
648 @return: Time zone
649 @rtype: str
650 """
651 addr = self._gethostbyname(hostname)
652 return self.time_zone_by_addr(addr)
653