Skip to content

Commit

Permalink
Merge pull request #19 from jackfromeast/patch-pull-request
Browse files Browse the repository at this point in the history
Support querying neo4j through ineo for DOM Clobbering
  • Loading branch information
SoheilKhodayari authored Apr 22, 2024
2 parents a3dcc93 + b5e1e0b commit 2a0011c
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 25 deletions.
142 changes: 135 additions & 7 deletions analyses/domclobbering/domc_neo4j_traversals.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,24 @@
import os
import sys
import time
import json
import constants as constantsModule
import utils.io as IOModule
from utils.logging import logger

import analyses.domclobbering.domc_cypher_queries as DOMCTraversalsModule
import hpg_neo4j.db_utility as neo4jDatabaseUtilityModule
import hpg_neo4j.query_utility as neo4jQueryUtilityModule
# import hpg_neo4j.db_utility as neo4jDatabaseUtilityModule
# import hpg_neo4j.query_utility as neo4jQueryUtilityModule
import hpg_neo4j.db_utility as DU
import hpg_neo4j.query_utility as QU
import docker.neo4j.manage_container as dockerModule

def get_url_for_webpage(webpage_directory):
content = None
fd = open(os.path.join(webpage_directory, "url.out"), "r")
content = fd.read()
fd.close()
return content


def get_name_from_url(url):
Expand Down Expand Up @@ -89,15 +98,16 @@ def build_and_analyze_hpg(seed_url):
database_name = 'neo4j'
container_name = 'neo4j_container_'

# de-compress the hpg
IOModule.decompress_graph(webpage)

for each_webpage in webapp_pages:

relative_import_path = os.path.join(webapp_folder_name, each_webpage)
container_name = container_name + each_webpage
webpage = os.path.join(webapp_data_directory, each_webpage)
logger.warning('HPG for: %s'%(webpage))

# de-compress the hpg
IOModule.decompress_graph(webpage)


# import the CSV files into an active neo4j database inside a docker container
Expand All @@ -124,13 +134,13 @@ def build_and_analyze_hpg(seed_url):
logger.info('importing data inside container.')
dockerModule.import_data_inside_container(container_name, database_name, relative_import_path, 'CSV')
logger.info('waiting for the tcp port 7474 of the neo4j container to be ready...')
connection_success = neo4jDatabaseUtilityModule.wait_for_neo4j_bolt_connection(timeout=150)
connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150)
if not connection_success:
sys.exit(1)
else:
dockerModule.start_neo4j_container(container_name)
logger.info('waiting for the tcp port 7474 of the neo4j container to be ready...')
connection_success = neo4jDatabaseUtilityModule.wait_for_neo4j_bolt_connection(timeout=150)
connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150)
if not connection_success:
sys.exit(1)

Expand All @@ -141,7 +151,7 @@ def build_and_analyze_hpg(seed_url):

# step3: run the vulnerability detection queries
if query:
neo4jDatabaseUtilityModule.exec_fn_within_transaction(DOMCTraversalsModule.run_traversals, webpage)
DU.exec_fn_within_transaction(DOMCTraversalsModule.run_traversals, webpage)


# stop the neo4j docker container
Expand All @@ -151,6 +161,124 @@ def build_and_analyze_hpg(seed_url):



def build_and_analyze_hpg_local(seed_url, overwrite=False, conn_timeout=None, compress_hpg=True):

webapp_folder_name = get_name_from_url(seed_url)
webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name)
if not os.path.exists(webapp_data_directory):
logger.error("[TR] did not found the directory for HPG analysis: "+str(webapp_data_directory))
return -1

webpages_json_file = os.path.join(webapp_data_directory, "webpages.json")

if os.path.exists(webpages_json_file):
logger.info('[TR] reading webpages.json')
fd = open(webpages_json_file, 'r')
webapp_pages = json.load(fd)
fd.close()

else:
logger.info('[TR] webpages.json does not exist; falling back to filesystem.')
# fall back to analyzing all pages if the `webpages.json` file is missing
webapp_pages = os.listdir(webapp_data_directory)
# the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler)
webapp_pages = [item for item in webapp_pages if len(item) == 64]


for webpage in webapp_pages:
webpage_folder = os.path.join(webapp_data_directory, webpage)
if os.path.exists(webpage_folder):

logger.warning('[TR] HPG analyis for: %s'%(webpage_folder))

if str(overwrite).lower() == 'false':
# do NOT re-analyze webpages
OUTPUT_FILE = os.path.join(webpage_folder, "sinks.flows.out")
if os.path.exists(OUTPUT_FILE):
logger.info('[TR] analyis results already exists for webpage: %s'%webpage_folder)
continue

# requirement: the database name must have a length between 3 and 63 characters
# must always import into the default neo4j database
neo4j_database_name = 'neo4j'

database_name = '{0}_{1}'.format(webapp_folder_name, webpage)

nodes_file = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME)
rels_file = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME)

nodes_file_gz = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME +'.gz')
rels_file_gz = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME +'.gz')

if os.path.exists(nodes_file) and os.path.exists(rels_file):
logger.info('[TR] hpg files exist in decompressed format, skipping de-compression.')

elif os.path.exists(nodes_file_gz) and os.path.exists(rels_file_gz):
logger.info('[TR] de-compressing hpg.')
# de-compress the hpg
IOModule.decompress_graph(webpage_folder)
else:
logger.error('[TR] The nodes/rels.csv files do not exist in %s, skipping.'%webpage_folder)
continue

neo4j_http_port = constantsModule.NEO4J_HTTP_PORT
neo4j_bolt_port = constantsModule.NEO4J_BOLT_PORT

logger.warning('[TR] removing any previous neo4j instance for %s'%str(database_name))
DU.ineo_remove_db_instance(database_name)

logger.info('[TR] creating db %s with http port %s'%(database_name, neo4j_http_port))
DU.ineo_create_db_instance(database_name, neo4j_http_port)

# check if the bolt port requested by the config.yaml is not the default one
if not ( int(neo4j_http_port) + 2 == int(neo4j_bolt_port) ):
logger.info('[TR] setting the requested bolt port %s for db %s'%(neo4j_bolt_port, database_name))
DU.ineo_set_bolt_port_for_db_instance(database_name, neo4j_bolt_port)

logger.info('[TR] importing the database with neo4j-admin.')
DU.neoadmin_import_db_instance(database_name, neo4j_database_name, nodes_file, rels_file)

logger.info('[TR] changing the default neo4j password to enable programmatic access.')
DU.ineo_set_initial_password_and_restart(database_name, password=constantsModule.NEO4J_PASS)

if str(compress_hpg).lower() == 'true':
# compress the hpg after the model import
IOModule.compress_graph(webpage_folder)

logger.info('[TR] waiting for the neo4j connection to be ready...')
time.sleep(10)
logger.info('[TR] connection: %s'%constantsModule.NEO4J_CONN_HTTP_STRING)
connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150, conn=constantsModule.NEO4J_CONN_HTTP_STRING)
if not connection_success:
try:
logger.info('[TR] stopping neo4j for %s'%str(database_name))
DU.ineo_stop_db_instance(database_name)

## remove db after analysis
DU.ineo_remove_db_instance(database_name)
except:
logger.info('[TR] ran into exception while prematurely stopping neo4j for %s'%str(database_name))
continue

logger.info('[TR] starting to run the queries.')
webpage_url = get_url_for_webpage(webpage_folder)
try:
DU.exec_fn_within_transaction(DOMCTraversalsModule.run_traversals, webpage_folder)
except Exception as e:
logger.error(e)
logger.error('[TR] neo4j connection error.')
outfile = os.path.join(webpage_folder, "sinks.flows.out")
if not os.path.exists(outfile):
with open(outfile, 'w+') as fd:
error_json = {"error": str(e)}
json.dump(error_json, fd, ensure_ascii=False, indent=4)

logger.info('[TR] stopping neo4j for %s'%str(database_name))
DU.ineo_stop_db_instance(database_name)

## remove db after analysis
logger.info('[TR] removing neo4j for %s'%str(database_name))
DU.ineo_remove_db_instance(database_name)



Expand Down
4 changes: 3 additions & 1 deletion constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
PATTERN_DIR = os.path.join(OUTPUTS_DIR, "patterns")
DATA_DIR_UNREPONSIVE_DOMAINS = os.path.join(DATA_DIR, "unresponsive")

INEO_HOME= os.path.join(BASE_DIR, "ineo")


# ------------------------------------------------------------------------------------------ #
# Neo4j Config
Expand All @@ -54,7 +56,7 @@

# ports
NEO4J_HTTP_PORT = '7474'
NEO4J_BOLT_PORT = '7687'
NEO4J_BOLT_PORT = '7476'

# http connection string
NEO4J_CONN_HTTP_STRING = "http://127.0.0.1:%s"%str(NEO4J_HTTP_PORT)
Expand Down
12 changes: 8 additions & 4 deletions engine/core/io/graphexporter.js
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,10 @@ GraphExporter.prototype.compressGraph = function (webpageFolder){
cmd = `pigz ${relsFile}`;
execSync(cmd);

cmd = `pigz ${relsFileDynamic}`;
execSync(cmd);
if (fs.existsSync(relsFileDynamic)){
cmd = `pigz ${relsFileDynamic}`;
execSync(cmd);
}
}

GraphExporter.prototype.decompressGraph = function (webpageFolder){
Expand All @@ -433,8 +435,10 @@ GraphExporter.prototype.decompressGraph = function (webpageFolder){
cmd = `pigz -d ${relsFile}`;
execSync(cmd);

cmd = `pigz -d ${relsFileDynamic}`;
execSync(cmd);
if (fs.existsSync(relsFileDynamic)){
cmd = `pigz -d ${relsFileDynamic}`;
execSync(cmd);
}

}

Expand Down
7 changes: 4 additions & 3 deletions hpg_neo4j/db_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,8 @@ def wait_for_neo4j_bolt_connection(timeout=60, conn=constantsModule.NEO4J_CONN_H
def ineo_create_db_instance(db_name, port, neo4j_version='4.2.3'):

INEO_BIN = constantsModule.INEO_BIN
command = "INEO_BIN create -v {0} -p{1} {2}".format(neo4j_version, port, db_name)

command = "export INEO_HOME='{3}' && INEO_BIN create -v {0} -p{1} {2}".format(neo4j_version, port, db_name, constantsModule.INEO_HOME)
command = command.replace("INEO_BIN", INEO_BIN)
run_os_command(command)

Expand Down Expand Up @@ -300,15 +301,15 @@ def neoadmin_import_db_instance(ineo_db_name, neo4j_db_name, nodes_file, rels_fi
def ineo_set_bolt_port_for_db_instance(db_name, port_string):

INEO_BIN = constantsModule.INEO_BIN
command = "INEO_BIN set-port -b {0} {1}".format(db_name, port_string)
command = "export INEO_HOME='{2}' && INEO_BIN set-port -b {0} {1}".format(db_name, port_string, constantsModule.INEO_HOME)
command = command.replace("INEO_BIN", INEO_BIN)
run_os_command(command)


def ineo_restart_neo4j(db_name):

INEO_BIN = constantsModule.INEO_BIN
command = "INEO_BIN restart {0}".format(db_name)
command = "export INEO_HOME='{1}' && INEO_BIN restart {0}".format(db_name, constantsModule.INEO_HOME)
command = command.replace("INEO_BIN", INEO_BIN)
run_os_command(command)

Expand Down
10 changes: 5 additions & 5 deletions run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,13 @@ def main():
# static analysis over neo4j
if config['domclobbering']["passes"]["static_neo4j"]:
LOGGER.info("HPG construction and analysis over neo4j for site %s."%(website_url))
DOMCTraversalsModule.build_and_analyze_hpg(website_url)
DOMCTraversalsModule.build_and_analyze_hpg_local(website_url)
LOGGER.info("finished HPG construction and analysis over neo4j for site %s."%(website_url))

# dynamic verification
if config['domclobbering']["passes"]["dynamic"]:
LOGGER.info("Running dynamic verifier for site %s."%(website_url))
cmd = node_force_execution.replace('SEED_URL', website_url)
cmd = node_force_execution.replace('SITE_URL', website_url)
IOModule.run_os_command(cmd, cwd=force_execution_command_cwd, timeout= force_execution_timeout)
LOGGER.info("Dynamic verification completed for site %s."%(website_url))

Expand Down Expand Up @@ -326,7 +326,7 @@ def main():
# dynamic verification
if config['request_hijacking']['passes']['verification']:
LOGGER.info("dynamic data flow verification for site %s."%(website_url))
cmd = node_dynamic_verifier.replace("SITE_URL", website_url)
cmd = node_dynamic_verifier.replace("SEED_URL", website_url)
request_hijacking_verification_api.start_verification_for_site(cmd, website_url, cwd=dynamic_verifier_command_cwd, timeout=verification_pass_timeout, overwrite=False)
LOGGER.info("sucessfully finished dynamic data flow verification for site %s."%(website_url))

Expand Down Expand Up @@ -394,13 +394,13 @@ def main():

if config['domclobbering']["passes"]["static_neo4j"]:
LOGGER.info("HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url))
DOMCTraversalsModule.build_and_analyze_hpg(website_url)
DOMCTraversalsModule.build_and_analyze_hpg_local(website_url)
LOGGER.info("finished HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url))

# dynamic verification
if config['domclobbering']["passes"]["dynamic"]:
LOGGER.info("Running dynamic verifier for site %s - %s"%(website_rank, website_url))
cmd = node_force_execution.replace('SEED_URL', website_url)
cmd = node_force_execution.replace('SITE_URL', website_url)
IOModule.run_os_command(cmd, cwd=force_execution_command_cwd, timeout= force_execution_timeout)
LOGGER.info("Dynamic verification completed for site %s - %s"%(website_rank, website_url))

Expand Down
14 changes: 9 additions & 5 deletions utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,24 @@ def compress_graph(webpage_folder_path, node_file=constantsModule.NODE_INPUT_FIL

cmd1="pigz %s"%(os.path.join(webpage_folder_path, node_file))
cmd2="pigz %s"%(os.path.join(webpage_folder_path, edge_file))
cmd3="pigz %s"%(os.path.join(webpage_folder_path, edges_file_dynamic))

bash_command(cmd1)
bash_command(cmd2)
bash_command(cmd3)

if os.path.exists(os.path.join(webpage_folder_path, edges_file_dynamic)):
cmd3="pigz %s"%(os.path.join(webpage_folder_path, edges_file_dynamic))
bash_command(cmd3)

def decompress_graph(webpage_folder_path, node_file=constantsModule.NODE_INPUT_FILE_NAME, edge_file=constantsModule.RELS_INPUT_FILE_NAME, edges_file_dynamic=constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME):

cmd1="pigz -d %s"%(os.path.join(webpage_folder_path, node_file))
cmd2="pigz -d %s"%(os.path.join(webpage_folder_path, edge_file))
cmd3="pigz -d %s"%(os.path.join(webpage_folder_path, edges_file_dynamic))


bash_command(cmd1)
bash_command(cmd2)
bash_command(cmd3)

if os.path.exists(os.path.join(webpage_folder_path, edges_file_dynamic)):
cmd3="pigz %s"%(os.path.join(webpage_folder_path, edges_file_dynamic))
bash_command(cmd3)


0 comments on commit 2a0011c

Please sign in to comment.