Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ASN maxmind integration #2282

Merged
merged 36 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
814a251
Changed library from maxminddb to geoip2
fgibertoni Apr 19, 2024
5002680
Refactoring maxmind not finished
fgibertoni Apr 19, 2024
3557647
Refactoring maxmind analyzer
fgibertoni Apr 22, 2024
50375ef
Added methods for query db
fgibertoni Apr 22, 2024
a4eb906
Made a method 'private'
fgibertoni Apr 22, 2024
00d4c1a
Renamed method
fgibertoni Apr 22, 2024
60f396c
Made attributes 'private'
fgibertoni Apr 22, 2024
7a38689
Added return type
fgibertoni Apr 22, 2024
51d72db
Improved log message
fgibertoni Apr 22, 2024
fef2d4b
Renamed back to update() because of updating cron
fgibertoni Apr 22, 2024
a1f9114
Fixed media_root settings
fgibertoni Apr 22, 2024
dcb3928
Added log to tar extraction
fgibertoni Apr 22, 2024
8cb688d
Removed unnecessary variable
fgibertoni Apr 22, 2024
d45d1da
Improved log messages
fgibertoni Apr 22, 2024
3a55feb
Readded maxminddb library
fgibertoni Apr 23, 2024
e3f57ef
Update api_app/analyzers_manager/observable_analyzers/maxmind.py
fgibertoni Apr 23, 2024
6c71e33
Fix_ci (#2284)
0ssigeno Apr 23, 2024
4c9340e
Changed library from maxminddb to geoip2
fgibertoni Apr 19, 2024
e450fd0
Refactoring maxmind not finished
fgibertoni Apr 19, 2024
0e3d20a
Refactoring maxmind analyzer
fgibertoni Apr 22, 2024
32054e1
Added methods for query db
fgibertoni Apr 22, 2024
384b999
Made a method 'private'
fgibertoni Apr 22, 2024
a84344c
Renamed method
fgibertoni Apr 22, 2024
1f3fef5
Made attributes 'private'
fgibertoni Apr 22, 2024
ea44316
Added return type
fgibertoni Apr 22, 2024
61528e0
Improved log message
fgibertoni Apr 22, 2024
b7a4fcc
Renamed back to update() because of updating cron
fgibertoni Apr 22, 2024
247137b
Fixed media_root settings
fgibertoni Apr 22, 2024
c04ba9d
Added log to tar extraction
fgibertoni Apr 22, 2024
ff4621d
Removed unnecessary variable
fgibertoni Apr 22, 2024
3b8d10d
Improved log messages
fgibertoni Apr 22, 2024
8d9bbea
Readded maxminddb library
fgibertoni Apr 23, 2024
a888564
Merge branch 'asn-maxmind-integration' of github.com:intelowlproject/…
fgibertoni Apr 23, 2024
ce58906
Moved functions and improved logs
fgibertoni Apr 24, 2024
c9d8cbc
Changed error handling
fgibertoni Apr 26, 2024
07a3bf4
Fixed deepsource warning
fgibertoni Apr 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 158 additions & 98 deletions api_app/analyzers_manager/observable_analyzers/maxmind.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,100 @@
import os
import shutil
import tarfile
from typing import Optional

import maxminddb
import requests
from django.conf import settings
from geoip2.database import Reader
from geoip2.errors import AddressNotFoundError, GeoIP2Error
from geoip2.models import ASN, City, Country

from api_app.analyzers_manager import classes
from api_app.analyzers_manager.exceptions import (
AnalyzerConfigurationException,
AnalyzerRunException,
)
from api_app.models import PluginConfig
from tests.mock_utils import if_mock_connections, patch

logger = logging.getLogger(__name__)

db_names = ["GeoLite2-Country.mmdb", "GeoLite2-City.mmdb"]

class MaxmindDBManager:
_supported_dbs: [str] = ["GeoLite2-Country", "GeoLite2-City", "GeoLite2-ASN"]
_default_db_extension: str = ".mmdb"

class Maxmind(classes.ObservableAnalyzer):
_api_key_name: str
@classmethod
def get_supported_dbs(cls) -> [str]:
return [db_name + cls._default_db_extension for db_name in cls._supported_dbs]

def run(self):
maxmind_final_result = {}
for db in db_names:
try:
db_location = _get_db_location(db)
if not os.path.isfile(db_location) and not self._update_db(
db, self._api_key_name
):
raise AnalyzerRunException(
f"failed extraction of maxmind db {db},"
" reached max number of attempts"
)
if not os.path.exists(db_location):
raise maxminddb.InvalidDatabaseError(
"database location does not exist"
)
reader = maxminddb.open_database(db_location)
maxmind_result = reader.get(self.observable_name)
reader.close()
except maxminddb.InvalidDatabaseError as e:
error_message = f"Invalid database error: {e}"
logger.exception(error_message)
maxmind_result = {"error": error_message}
logger.info(f"maxmind result: {maxmind_result}")
if maxmind_result:
@classmethod
def update_all_dbs(cls, api_key: str) -> bool:
return all(cls._update_db(db, api_key) for db in cls._supported_dbs)

def query_all_dbs(self, observable_query: str, api_key: str) -> (dict, dict):
maxmind_final_result: {} = {}
maxmind_errors: [] = []
for db in self._supported_dbs:
maxmind_result, maxmind_error = self._query_single_db(
observable_query, db, api_key
)

if maxmind_error:
maxmind_errors.append(maxmind_error["error"])
elif maxmind_result:
logger.info(f"maxmind result: {maxmind_result} in {db=}")
maxmind_final_result.update(maxmind_result)
else:
logger.warning("maxmind result not available")
logger.warning(f"maxmind result not available in {db=}")

return maxmind_final_result
return maxmind_final_result, maxmind_errors

@classmethod
def _get_api_key(cls) -> Optional[str]:
for plugin in PluginConfig.objects.filter(
parameter__python_module=cls.python_module,
parameter__is_secret=True,
parameter__name="api_key_name",
):
if plugin.value:
return plugin.value
return None
def _get_physical_location(cls, db: str) -> str:
return f"{settings.MEDIA_ROOT}/{db}{cls._default_db_extension}"

def _query_single_db(
self, query_ip: str, db_name: str, api_key: str
) -> (dict, dict):
result: ASN | City | Country
db_path: str = self._get_physical_location(db_name)
self._check_and_update_db(api_key, db_name)

logger.info(f"Query {db_name=} for {query_ip=}")
with Reader(db_path) as reader:
try:
if "ASN" in db_name:
result = reader.asn(query_ip)
elif "Country" in db_name:
result = reader.country(query_ip)
elif "City" in db_name:
result = reader.city(query_ip)
except AddressNotFoundError:
reader.close()
logger.info(
f"Query for observable '{query_ip}' "
"didn't produce any results in any db."
)
return {}, {}
except (GeoIP2Error, maxminddb.InvalidDatabaseError) as e:
error_message = f"GeoIP2 database error: {e}"
logger.exception(error_message)
return {}, {"error": error_message}
else:
reader.close()
return result.raw, {}

def _check_and_update_db(self, api_key: str, db_name: str):
db_path = self._get_physical_location(db_name)
if not os.path.isfile(db_path) and not self._update_db(db_name, api_key):
raise AnalyzerRunException(
f"failed extraction of maxmind db {db_name},"
" reached max number of attempts"
)
if not os.path.exists(db_path):
raise maxminddb.InvalidDatabaseError(
f"database location '{db_path}' does not exist"
)

@classmethod
def _update_db(cls, db: str, api_key: str) -> bool:
Expand All @@ -77,78 +108,107 @@ def _update_db(cls, db: str, api_key: str) -> bool:
f"Unable to find api key for {cls.__name__}"
)

db_location = _get_db_location(db)
try:
db_name_wo_ext = db[:-5]
logger.info(f"starting download of db {db_name_wo_ext} from maxmind")
url = (
"https://download.maxmind.com/app/geoip_download?edition_id="
f"{db_name_wo_ext}&license_key={api_key}&suffix=tar.gz"
)
r = requests.get(url)
if r.status_code >= 300:
raise AnalyzerRunException(
f"failed request for new maxmind db {db_name_wo_ext}."
f" Status code: {r.status_code}"
)
logger.info(f"starting download of {db=} from maxmind")

tar_db_path = f"/tmp/{db_name_wo_ext}.tar.gz"
with open(tar_db_path, "wb") as f:
f.write(r.content) # lgtm [py/clear-text-storage-sensitive-data]

tf = tarfile.open(tar_db_path)
directory_to_extract_files = settings.MEDIA_ROOT
tf.extractall(str(directory_to_extract_files))

today = datetime.datetime.now().date()
counter = 0
directory_found = False
downloaded_db_path = ""
# this is because we do not know the exact date of the db we downloaded
while counter < 10 or not directory_found:
date_to_check = today - datetime.timedelta(days=counter)
formatted_date = date_to_check.strftime("%Y%m%d")
downloaded_db_path = (
f"{directory_to_extract_files}/"
f"{db_name_wo_ext}_{formatted_date}/{db}"
)
try:
os.rename(downloaded_db_path, db_location)
except FileNotFoundError:
logger.debug(
f"{downloaded_db_path} not found move to the day before"
)
counter += 1
else:
directory_found = True
shutil.rmtree(
f"{directory_to_extract_files}/"
f"{db_name_wo_ext}_{formatted_date}"
)

if directory_found:
logger.info(f"maxmind directory found {downloaded_db_path}")
else:
tar_db_path = cls._download_db(db, api_key)
cls._extract_db_to_media_root(tar_db_path)
directory_found = cls._remove_old_db(db)

if not directory_found:
return False

logger.info(f"ended download of db {db_name_wo_ext} from maxmind")
logger.info(f"ended download of {db=} from maxmind")
return True

except Exception as e:
logger.exception(e)
return False

@classmethod
def _download_db(cls, db_name: str, api_key: str) -> str:
url = (
"https://download.maxmind.com/app/geoip_download?edition_id="
f"{db_name}&license_key={api_key}&suffix=tar.gz"
)
response = requests.get(url)
if response.status_code >= 300:
raise AnalyzerRunException(
f"failed request for new maxmind db {db_name}."
f" Status code: {response.status_code}"
f"\nResponse: {response.raw}"
)

return cls._write_db_to_filesystem(db_name, response.content)

@classmethod
def _write_db_to_filesystem(cls, db_name: str, content: bytes) -> str:
tar_db_path = f"/tmp/{db_name}.tar.gz"
logger.info(
f"starting writing db {db_name} downloaded from maxmind to {tar_db_path}"
)
with open(tar_db_path, "wb") as f:
f.write(content)

return tar_db_path

@classmethod
def _extract_db_to_media_root(cls, tar_db_path: str):
logger.info(f"Started extracting {tar_db_path} to {settings.MEDIA_ROOT}.")
tf = tarfile.open(tar_db_path)
tf.extractall(str(settings.MEDIA_ROOT))
logger.info(f"Finished extracting {tar_db_path} to {settings.MEDIA_ROOT}.")

@classmethod
def _remove_old_db(cls, db: str) -> bool:
physical_db_location = cls._get_physical_location(db)
today = datetime.datetime.now().date()
counter = 0
directory_found = False
# this is because we do not know the exact date of the db we downloaded
while counter < 10 or not directory_found:
formatted_date = (today - datetime.timedelta(days=counter)).strftime(
"%Y%m%d"
)
downloaded_db_path = (
f"{settings.MEDIA_ROOT}/"
f"{db}_{formatted_date}/{db}{cls._default_db_extension}"
)
try:
os.rename(downloaded_db_path, physical_db_location)
except FileNotFoundError:
logger.debug(f"{downloaded_db_path} not found move to the day before")
counter += 1
else:
directory_found = True
shutil.rmtree(f"{settings.MEDIA_ROOT}/" f"{db}_{formatted_date}")
logger.info(f"maxmind directory found {downloaded_db_path}")
return directory_found


class Maxmind(classes.ObservableAnalyzer):
_api_key_name: str
_maxmind_db_manager: "MaxmindDBManager" = MaxmindDBManager()

def run(self):
maxmind_final_result, maxmind_errors = self._maxmind_db_manager.query_all_dbs(
self.observable_name, self._api_key_name
)
if maxmind_errors:
[self.report.errors.append(error_msg) for error_msg in maxmind_errors]
self.report.save()
return maxmind_final_result

@classmethod
def get_db_names(cls) -> [str]:
return cls._maxmind_db_manager.get_supported_dbs()

@classmethod
def update(cls) -> bool:
api_key = cls._get_api_key()
return all(cls._update_db(db, api_key) for db in db_names)
return cls._maxmind_db_manager.update_all_dbs(cls._api_key_name)

@classmethod
def _monkeypatch(cls):
# completely skip because does not work without connection.
patches = [if_mock_connections(patch.object(cls, "run", return_value={}))]
return super()._monkeypatch(patches=patches)


def _get_db_location(db):
return f"{settings.MEDIA_ROOT}/{db}"
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ RUN mkdir -p ${LOG_PATH} \
${LOG_PATH}/django \
${LOG_PATH}/uwsgi \
${LOG_PATH}/asgi \
/opt/deploy/files_required /opt/deploy/files_required/yara /opt/deploy/configuration
/opt/deploy/files_required /opt/deploy/configuration

# install required packages. some notes about:o
# python3-psycopg2 is required to use PostgresSQL with Django
Expand Down
6 changes: 4 additions & 2 deletions intel_owl/settings/commons.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

import os
from pathlib import Path, PosixPath

from ._util import get_secret
Expand All @@ -14,7 +14,9 @@
BASE_STATIC_PATH = PROJECT_LOCATION / "static"
MEDIA_ROOT = BASE_DIR / "files_required"
CONFIG_ROOT = PROJECT_LOCATION / "configuration"
YARA_RULES_PATH = MEDIA_ROOT / "yara"
YARA_RULES_PATH = MEDIA_ROOT / "yara" # path for manual yara rules
os.makedirs(YARA_RULES_PATH, exist_ok=True)

LOG_DIR = Path("/") / "var" / "log" / "intel_owl"
# test / ci
MOCK_CONNECTIONS = get_secret("MOCK_CONNECTIONS", False) == "True"
Expand Down
6 changes: 3 additions & 3 deletions intel_owl/settings/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
DJANGO_LOG_DIRECTORY = LOG_DIR / "django"
UWSGI_LOG_DIRECTORY = LOG_DIR / "uwsgi"
ASGI_LOG_DIRECTORY = LOG_DIR / "asgi"
if not STAGE_CI:
for path in [DJANGO_LOG_DIRECTORY, UWSGI_LOG_DIRECTORY, ASGI_LOG_DIRECTORY]:
os.makedirs(path, exist_ok=True)
for path in [DJANGO_LOG_DIRECTORY, UWSGI_LOG_DIRECTORY, ASGI_LOG_DIRECTORY]:
os.makedirs(path, exist_ok=True)
if not STAGE_CI:
set_permissions(path)

DISABLE_LOGGING_TEST = secrets.get_secret("DISABLE_LOGGING_TEST", False) == "True"
Expand Down
1 change: 1 addition & 0 deletions requirements/project-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ google-cloud-webrisk==1.14.0
intezer-sdk==1.20
lief==0.14.0
maxminddb==2.6.0
geoip2==4.8.0
mwdblib==4.5.0
git+https://github.com/decalage2/oletools.git@ccf99d1a8f85e552f5cc130fbaa504cfe5725a92
OTXv2==1.5.12
Expand Down
2 changes: 1 addition & 1 deletion tests/test_crons.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_remove_old_jobs(self):
@if_mock_connections(skip("not working without connection"))
def test_maxmind_updater(self):
maxmind.Maxmind.update()
for db in maxmind.db_names:
for db in maxmind.Maxmind.get_db_names():
self.assertTrue(os.path.exists(db))

@if_mock_connections(
Expand Down
Loading