diff --git a/README.md b/README.md
index f185dd9..1526c36 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ JAW has a Github pages website available at .
+
+
+ Description:
+ ------------
+ Creates a neo4j graph db for a given webpage and runs the analysis queries
+
+
+ Usage:
+ ------------
+ > python3 -m analyses.open_redirect.analyze_hpg_api --seedurl=http://example.com --webpage=xyz
+
+"""
+
+import os
+import sys
+import time
+import argparse
+import json
+import constants as constantsModule
+import utils.io as IOModule
+import utils.utility as utilityModule
+import docker.neo4j.manage_container as dockerModule
+import hpg_neo4j.db_utility as DU
+import hpg_neo4j.query_utility as QU
+import analyses.open_redirect.traversals_cypher
+from utils.logging import logger as LOGGER
+
+
+def get_url_for_webpage(webpage_directory):
+ content = None
+ fd = open(os.path.join(webpage_directory, "url.out"), "r")
+ content = fd.read()
+ fd.close()
+ return content
+
+def main():
+
+
+ BASE_DIR= constantsModule.BASE_DIR
+
+ p = argparse.ArgumentParser(description='This script runs the tool pipeline.')
+
+
+
+ p.add_argument('--seedurl', "-U",
+ default='http://example.com',
+ help='the seed URL of the app to analyze (default: %(default)s)',
+ type=str)
+
+ p.add_argument('--webpage', "-W",
+ default='xyz',
+ help='webpage folder name (default: %(default)s)',
+ type=str)
+
+ p.add_argument('--httpport', "-H",
+ default=constantsModule.NEO4J_HTTP_PORT,
+ help='http port for neo4j (default: %(default)s)',
+ type=str)
+
+ p.add_argument('--boltport', "-B",
+ default=constantsModule.NEO4J_BOLT_PORT,
+ help='bolt port for neo4j (default: %(default)s)',
+ type=str)
+
+
+
+ args= vars(p.parse_args())
+
+ seed_url = args["seedurl"]
+ webpage = args["webpage"]
+ webapp_folder_name = utilityModule.getDirectoryNameFromURL(seed_url)
+
+ # overwrite the neo4j config for this process
+ neo4j_http_port = args["httpport"]
+ neo4j_bolt_port = args["boltport"]
+
+ constantsModule.NEO4J_HTTP_PORT = neo4j_http_port
+ constantsModule.NEO4J_CONN_HTTP_STRING = "http://127.0.0.1:%s"%str(constantsModule.NEO4J_HTTP_PORT)
+
+ constantsModule.NEO4J_BOLT_PORT = neo4j_bolt_port
+ constantsModule.NEO4J_CONN_STRING = "bolt://127.0.0.1:%s"%str(constantsModule.NEO4J_BOLT_PORT)
+ constantsModule.NEOMODEL_NEO4J_CONN_STRING = "bolt://%s:%s@127.0.0.1:%s"%(constantsModule.NEO4J_USER, constantsModule.NEO4J_PASS, constantsModule.NEO4J_BOLT_PORT)
+
+ webpage_folder = os.path.join(constantsModule.DATA_DIR, os.path.join(webapp_folder_name, webpage))
+
+ # requirement: the database name must have a length between 3 and 63 characters
+ # must always import into the default neo4j database
+ neo4j_database_name = 'neo4j'
+ database_name = '{0}_{1}'.format(webapp_folder_name, webpage)
+
+ nodes_file = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME)
+ rels_file = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME)
+ rels_dynamic_file = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME)
+
+ nodes_file_gz = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME +'.gz')
+ rels_file_gz = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME +'.gz')
+ rels_dynamic_file_gz = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME +'.gz')
+
+ if os.path.exists(nodes_file) and os.path.exists(rels_file) and os.path.exists(rels_dynamic_file):
+ LOGGER.info('[TR] hpg files exist in decompressed format, skipping de-compression.')
+
+ elif os.path.exists(nodes_file_gz) and os.path.exists(rels_file_gz) and os.path.exists(rels_dynamic_file_gz):
+ LOGGER.info('[TR] de-compressing hpg.')
+ # de-compress the hpg
+ IOModule.decompress_graph(webpage_folder)
+ else:
+ LOGGER.error('[TR] The nodes/rels.csv files do not exist in %s, skipping.'%webpage_folder)
+ return False
+
+ LOGGER.warning('[TR] removing any previous neo4j instance for %s'%str(database_name))
+ DU.ineo_remove_db_instance(database_name)
+
+ LOGGER.info('[TR] creating db %s with http port %s'%(database_name, neo4j_http_port))
+ DU.ineo_create_db_instance(database_name, neo4j_http_port)
+
+ # check if the bolt port requested by the config.yaml is not the default one
+ if not ( int(neo4j_http_port) + 2 == int(neo4j_bolt_port) ):
+ LOGGER.info('[TR] setting the requested bolt port %s for db %s'%(neo4j_bolt_port, database_name))
+ DU.ineo_set_bolt_port_for_db_instance(database_name, neo4j_bolt_port)
+
+ LOGGER.info('[TR] importing the database with neo4j-admin.')
+ DU.neoadmin_import_db_instance(database_name, neo4j_database_name, nodes_file, rels_file, rels_dynamic_file)
+
+ LOGGER.info('[TR] changing the default neo4j password to enable programmatic access.')
+ DU.ineo_set_initial_password_and_restart(database_name, password=constantsModule.NEO4J_PASS)
+
+ # compress the hpg after the model import
+ IOModule.compress_graph(webpage_folder)
+
+ LOGGER.info('[TR] waiting for the neo4j connection to be ready...')
+ time.sleep(10)
+ LOGGER.info('[TR] connection: %s'%constantsModule.NEO4J_CONN_HTTP_STRING)
+ connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150, conn=constantsModule.NEO4J_CONN_HTTP_STRING)
+ if not connection_success:
+ try:
+ LOGGER.info('[TR] stopping neo4j for %s'%str(database_name))
+ DU.ineo_stop_db_instance(database_name)
+
+ ## remove db after analysis
+ DU.ineo_remove_db_instance(database_name)
+ except:
+ LOGGER.info('[TR] ran into exception while prematurely stopping neo4j for %s'%str(database_name))
+ return connection_success
+
+ LOGGER.info('[TR] starting to run the queries.')
+ webpage_url = get_url_for_webpage(webpage_folder)
+ try:
+ DU.exec_fn_within_transaction(traversals_cypher.run_traversals, webpage_url, webpage_folder, webpage, conn=constantsModule.NEO4J_CONN_STRING)
+ except Exception as e:
+ LOGGER.error(e)
+ LOGGER.error('[TR] neo4j connection error.')
+ outfile = os.path.join(webpage_folder, "sinks.flows.out")
+ if not os.path.exists(outfile):
+ with open(outfile, 'w+') as fd:
+ error_json = {"error": str(e)}
+ json.dump(error_json, fd, ensure_ascii=False, indent=4)
+
+
+
+ ## note: these steps are done in the top level module, as timeout may occur here
+ LOGGER.info('[TR] stopping neo4j for %s'%str(database_name))
+ DU.ineo_stop_db_instance(database_name)
+
+ ## remove db after analysis
+ LOGGER.info('[TR] removing neo4j for %s'%str(database_name))
+ DU.ineo_remove_db_instance(database_name)
+
+ return connection_success
+
+if __name__ == "__main__":
+ main()
diff --git a/analyses/open_redirect/globals.js b/analyses/open_redirect/globals.js
new file mode 100644
index 0000000..6cb5bf6
--- /dev/null
+++ b/analyses/open_redirect/globals.js
@@ -0,0 +1,464 @@
+/**
+ * List of Window object properties
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window
+ */
+const window_properties = ["caches", "closed", "console", "controllers", "crossOriginIsolated", "crypto", "customElements", "defaultStatus", "devicePixelRatio", "dialogArguments", "directories", "document", "event", "frameElement", "frames", "fullScreen", "history", "indexedDB", "innerHeight", "innerWidth", "isSecureContext", "isSecureContext", "length", "localStorage", "location", "locationbar", "menubar", "mozAnimationStartTime", "mozInnerScreenX", "mozInnerScreenY", "name", "navigator", "onabort", "onafterprint", "onanimationcancel", "onanimationend", "onanimationiteration", "onappinstalled", "onauxclick", "onbeforeinstallprompt", "onbeforeprint", "onbeforeunload", "onblur", "oncancel", "oncanplay", "oncanplaythrough", "onchange", "onclick", "onclose", "oncontextmenu", "oncuechange", "ondblclick", "ondevicemotion", "ondeviceorientation", "ondeviceorientationabsolute", "ondragdrop", "ondurationchange", "onended", "onerror", "onfocus", "onformdata", "ongamepadconnected", "ongamepaddisconnected", "ongotpointercapture", "onhashchange", "oninput", "oninvalid", "onkeydown", "onkeypress", "onkeyup", "onlanguagechange", "onload", "onloadeddata", "onloadedmetadata", "onloadend", "onloadstart", "onlostpointercapture", "onmessage", "onmessageerror", "onmousedown", "onmouseenter", "onmouseleave", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onpaint", "onpause", "onplay", "onplaying", "onpointercancel", "onpointerdown", "onpointerenter", "onpointerleave", "onpointermove", "onpointerout", "onpointerover", "onpointerup", "onpopstate", "onrejectionhandled", "onreset", "onresize", "onscroll", "onselect", "onselectionchange", "onselectstart", "onstorage", "onsubmit", "ontouchcancel", "ontouchstart", "ontransitioncancel", "ontransitionend", "onunhandledrejection", "onunload", "onvrdisplayactivate", "onvrdisplayblur", "onvrdisplayconnect", "onvrdisplaydeactivate", "onvrdisplaydisconnect", "onvrdisplayfocus", "onvrdisplaypointerrestricted", "onvrdisplaypointerunrestricted", "onvrdisplaypresentchange", "onwheel", "opener", "origin", "outerHeight", "outerWidth", "pageXOffset", "pageYOffset", "parent", "performance", "personalbar", "pkcs11", "screen", "screenLeft", "screenTop", "screenX", "screenY", "scrollbars", "scrollMaxX", "scrollMaxY", "scrollX", "scrollY", "self", "sessionStorage", "sidebar", "speechSynthesis", "status", "statusbar", "toolbar", "top", "visualViewport"];
+
+/**
+ * List of Window object methods
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window
+ */
+const window_methods = ["alert", "atob", "blur", "btoa", "cancelAnimationFrame", "cancelIdleCallback", "captureEvents", "clearImmediate", "clearInterval", "clearTimeout", "close", "confirm", "convertPointFromNodeToPage", "convertPointFromPageToNode", "createImageBitmap", "dump", "fetch", "find", "focus", "getComputedStyle", "getDefaultComputedStyle", "getSelection", "home", "matchMedia", "minimize", "moveBy", "moveTo", "open", "openDialog", "postMessage", "print", "prompt", "queueMicrotask", "releaseEvents", "requestAnimationFrame", "requestIdleCallback", "resizeBy", "resizeTo", "routeEvent", "scroll", "scrollBy", "scrollByLines", "scrollByPages", "scrollTo", "setCursor", "setImmediate", "setInterval", "setTimeout", "showDirectoryPicker", "showModalDialog", "showOpenFilePicker", "showSaveFilePicker", "sizeToContent", "stop", "updateCommands", "addEventListener"];
+
+/**
+ * List of Window object event properties
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window
+ */
+const window_events = ["event", "afterprint", "animationcancel", "animationend", "animationiteration", "beforeprint", "beforeunload", "blur", "copy", "cut", "DOMContentLoaded", "error", "focus", "hashchange", "languagechange", "load", "message", "messageerror", "offline", "online", "orientationchange", "pagehide", "pageshow", "paste", "popstate", "rejectionhandled", "storage", "transitioncancel", "unhandledrejection", "unload", "vrdisplayconnect", "vrdisplaydisconnect", "vrdisplaypresentchange"];
+
+
+
+/**
+ * List of Window object properties
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document
+ */
+const document_properties = ["cookie", "activeElement", "alinkColor", "all", "anchors", "applets", "bgColor", "body", "characterSet", "childElementCount", "children", "compatMode", "contentType", "currentScript", "defaultView", "designMode", "dir", "doctype", "documentElement", "documentURI", "documentURIObject", "domain", "embeds", "fgColor", "firstElementChild", "forms", "fullscreen", "fullscreenElement", "fullscreenEnabled", "head", "height", "hidden", "images", "implementation", "lastElementChild", "lastModified", "lastStyleSheetSet", "linkColor", "links", "location", "mozSyntheticDocument", "onabort", "onafterscriptexecute", "onanimationcancel", "onanimationend", "onanimationiteration", "onauxclick", "onbeforescriptexecute", "onblur", "oncancel", "oncanplay", "oncanplaythrough", "onchange", "onclick", "onclose", "oncontextmenu", "oncuechange", "ondblclick", "ondurationchange", "onended", "onerror", "onfocus", "onformdata", "onfullscreenchange", "onfullscreenerror", "ongotpointercapture", "oninput", "oninvalid", "onkeydown", "onkeypress", "onkeyup", "onload", "onloadeddata", "onloadedmetadata", "onloadend", "onloadstart", "onlostpointercapture", "onmousedown", "onmouseenter", "onmouseleave", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onoffline", "ononline", "onpause", "onplay", "onplaying", "onpointercancel", "onpointerdown", "onpointerenter", "onpointerleave", "onpointermove", "onpointerout", "onpointerover", "onpointerup", "onreset", "onresize", "onscroll", "onselect", "onselectionchange", "onselectstart", "onsubmit", "ontouchcancel", "ontouchstart", "ontransitioncancel", "ontransitionend", "onvisibilitychange", "onwheel", "pictureInPictureElement", "pictureInPictureEnabled", "plugins", "pointerLockElement", "popupNode", "preferredStyleSheetSet", "readyState", "referrer", "rootElement", "scripts", "scrollingElement", "selectedStyleSheetSet", "styleSheets", "styleSheetSets", "timeline", "title", "tooltipNode", "URL", "visibilityState", "vlinkColor", "width", "xmlEncoding", "xmlVersion"];
+
+/**
+ * List of document object methods
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document
+ */
+const document_methods = ["adoptNode", "append", "caretPositionFromPoint", "caretRangeFromPoint", "clear", "close", "createAttribute", "createCDATASection", "createComment", "createDocumentFragment", "createElement", "createElementNS", "createEntityReference", "createEvent", "createExpression", "createExpression", "createNodeIterator", "createNSResolver", "createNSResolver", "createProcessingInstruction", "createRange", "createTextNode", "createTouch", "createTouchList", "createTreeWalker", "elementFromPoint", "elementsFromPoint", "enableStyleSheetsForSet", "evaluate", "evaluate", "execCommand", "exitFullscreen", "exitPictureInPicture", "exitPointerLock", "getAnimations", "getBoxObjectFor", "getElementById", "getElementsByClassName", "getElementsByName", "getElementsByTagName", "getElementsByTagNameNS", "getSelection", "hasFocus", "hasStorageAccess", "importNode", "mozSetImageElement", "open", "prepend", "queryCommandEnabled", "queryCommandSupported", "querySelector", "querySelectorAll", "registerElement", "releaseCapture", "replaceChildren", "requestStorageAccess", "write", "writeln", "addEventListener"];
+
+/**
+ * List of document object event properties
+ * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document
+ */
+const document_events = ["animationcancel", "animationend", "animationiteration", "animationstart", "copy", "cut", "DOMContentLoaded", "drag", "dragend", "dragenter", "dragleave", "dragover", "dragstart", "drop", "fullscreenchange", "fullscreenerror", "gotpointercapture", "keydown", "keypress", "keyup", "lostpointercapture", "paste", "pointercancel", "pointerdown", "pointerenter", "pointerleave", "pointerlockchange", "pointerlockerror", "pointermove", "pointerout", "pointerover", "pointerup", "readystatechange", "scroll", "selectionchange", "selectstart", "touchcancel", "touchend", "touchmove", "touchstart", "transitioncancel", "transitionend", "transitionrun", "transitionstart", "visibilitychange", "wheel"];
+
+
+const _2nd_level_method_calls = ["appendChild", "console", "log"];
+
+
+
+const all_global_props = window_properties.concat(document_properties,document_methods,window_methods);
+
+
+// List curated through aggregation of other lists
+// - https://www.w3schools.com/jsref/jsref_obj_array.asp
+var js_builtin_methods = new Set([
+
+ // Functions
+ "apply",
+ "call",
+ "bind",
+
+ // Arrays
+ "concat",
+ "copyWithin",
+ "entries",
+ "every",
+ "fill",
+ "filter",
+ "find",
+ "findIndex",
+ "forEach",
+ "from",
+ "includes",
+ "indexOf",
+ "isArray",
+ "join",
+ "keys",
+ "lastIndexOf",
+ "map",
+ "pop",
+ "push",
+ "reduce",
+ "reduceRight",
+ "reverse",
+ "shift",
+ "slice",
+ "some",
+ "sort",
+ "splice",
+ "toString",
+ "unshift",
+ "valueOf",
+
+ // Boolean
+ "toString",
+ "valueOf",
+
+ // Classes
+ "extends",
+ "super",
+ "static",
+
+ // Date
+ "getDate",
+ "getDay",
+ "getFullYear",
+ "getHours",
+ "getMilliseconds",
+ "getMinutes",
+ "getMonth",
+ "getSeconds",
+ "getTime",
+ "getTimezoneOffset",
+ "getUTCDate",
+ "getUTCDay",
+ "getUTCFullYear",
+ "getUTCHours",
+ "getUTCMilliseconds",
+ "getUTCMinutes",
+ "getUTCMonth",
+ "getUTCSeconds",
+ "getYear",
+ "now",
+ "parse",
+ "setDate",
+ "setFullYear",
+ "setHours",
+ "setMilliseconds",
+ "setMinutes",
+ "setMonth",
+ "setSeconds",
+ "setTime",
+ "setUTCDate",
+ "setUTCFullYear",
+ "setUTCHours",
+ "setUTCMilliseconds",
+ "setUTCMinutes",
+ "setUTCMonth",
+ "setUTCSeconds",
+ "setYear",
+ "toDateString",
+ "toGMTString",
+ "toISOString",
+ "toJSON",
+ "toLocaleDateString",
+ "toLocaleTimeString",
+ "toLocaleString",
+ "toString",
+ "toTimeString",
+ "toUTCString",
+ "UTC",
+ "valueOf",
+
+ // JSON
+ "parse",
+ "stringify",
+
+ // MATH
+ "abs",
+ "acos",
+ "acosh",
+ "asin",
+ "asinh",
+ "atan",
+ "atan2",
+ "atanh",
+ "cbrt",
+ "ceil",
+ "clz32",
+ "cos ",
+ "cosh",
+ "exp",
+ "expm1",
+ "floor",
+ "fround",
+ "log",
+ "log10",
+ "log1p",
+ "log2",
+ "max",
+ "min",
+ "pow",
+ "random",
+ "round",
+ "sign ",
+ "sin ",
+ "sinh",
+ "sqrt",
+ "tan",
+ "tanh",
+ "trunc",
+
+ // Number
+ "isFinite",
+ "isInteger",
+ "isNaN",
+ "isSafeInteger",
+ "toExponential",
+ "toFixed",
+ "toLocaleString",
+ "toPrecision",
+ "toString",
+ "valueOf",
+
+ // RegExp
+ "compile",
+
+ // String
+ "charAt",
+ "charCodeAt",
+ "concat",
+ "endsWith",
+ "fromCharCode",
+ "includes",
+ "indexOf",
+ "lastIndexOf",
+ "localeCompare",
+ "match",
+ "repeat",
+ "replace",
+ "search",
+ "slice",
+ "split",
+ "startsWith",
+ "substr",
+ "substring",
+ "toLocaleLowerCase",
+ "toLocaleUpperCase",
+ "toLowerCase",
+ "toString",
+ "toUpperCase",
+ "trim",
+ "valueOf"
+]);
+
+
+js_builtin_methods = Array.from(js_builtin_methods)
+const buildint_dom_api = [
+ "window",
+ "document"
+].concat(all_global_props)
+
+const js_builtin = js_builtin_methods.concat(buildint_dom_api);
+
+
+
+var lib_content_heuristics = [
+ // jquery
+ "*! jQuery v",
+ "(c) OpenJS Foundation and other contributors | jquery.org/license",
+ "jQuery Foundation, Inc. | jquery.org/license *",
+ // bootstrap
+ "* Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)",
+ "* Bootstrap v",
+ // prototype
+ "* Prototype JavaScript framework, version",
+ // angular js
+ "@license AngularJS v",
+ "Google LLC. http://angularjs.org",
+ "AngularJS v",
+ // react
+ "* @license React",
+ // d3
+ "https://d3js.org v",
+ // require js
+ "* @license r.js ",
+ "* @license RequireJS ",
+ // ext js
+ "This file is part of Ext JS ",
+ "Contact: http://www.sencha.com/contact",
+ // leaflet
+ "* Leaflet "
+];
+
+
+// one-to-one mapping between the name of the library and the heuristic
+// as in `lib_content_heuristics` list
+var lib_content_heuristics_names = [
+ 'jquery',
+ 'jquery',
+ 'jquery',
+ 'bootstrap',
+ 'bootstrap',
+ 'prototype',
+ 'angularjs',
+ 'angularjs',
+ 'angularjs',
+ 'reactjs',
+ 'd3js',
+ 'requirejs',
+ 'requirejs',
+ 'extjs',
+ 'extjs',
+ 'leaflet',
+];
+
+var lib_src_heuristics = [
+ // common cdns
+ "unpkg.com/",
+ "ajax.googleapis.com/ajax/libs/",
+ "cdnjs.cloudflare.com/ajax/libs/",
+ // custom
+ "lib/",
+ "libs/",
+ "/libraries/",
+ // library names
+ "gajs", // google analytics
+ "google-analytics-js",
+ "analytics.js",
+ "gwt",
+ "ink",
+ "vaadin",
+ "bootstrap",
+ "zurb",
+ "polymer",
+ "highcharts",
+ "infovis",
+ "flotcharts",
+ "createjs",
+ "googlemaps",
+ "google-maps",
+ "jquery",
+ "jqueryui",
+ "dojo",
+ "prototype",
+ "scriptaculous",
+ "mootools",
+ "spry",
+ "yui",
+ "yui2",
+ "yui3",
+ "qooxdoo",
+ "extjs",
+ "ext.js",
+ "ext-all.js",
+ "base2",
+ "closurelibrary",
+ "raphaë",
+ "react",
+ "reactjs",
+ "nextjs",
+ "next.js",
+ "preact",
+ "preactjs",
+ "modernizr",
+ "processingjs",
+ "backbone",
+ "leaflet",
+ "mapbox",
+ "lo-dash",
+ "underscore",
+ "sammy",
+ "rico",
+ "mochikit",
+ "graphaë",
+ "glow",
+ "socketio",
+ "socket.io",
+ "mustache",
+ "fabricjs",
+ "fabric.js",
+ "fusejs",
+ "fuse.js",
+ "tweenjs",
+ "sproutcore",
+ "zeptojs",
+ "threejs",
+ "three",
+ "three.js",
+ "philogl",
+ "camanjs",
+ "yepnope",
+ "labjs",
+ "headjs",
+ "controljs",
+ "requirejs",
+ "require.js",
+ "rightjs",
+ "jquerytools",
+ "pusher",
+ "paperjs",
+ "swiffy",
+ "movejs",
+ "amplifyjs",
+ "popcornjs",
+ "d3js",
+ "d3.",
+ "handlebars",
+ "knockout",
+ "spine",
+ "jquerymobile",
+ "webfontloader",
+ "angular",
+ "angularjs",
+ "angular.js",
+ "emberjs",
+ "ember.js",
+ "hammerjs",
+ "visibilityjs",
+ "velocityjs",
+ "ifvisiblejs",
+ "pixijs",
+ "dcjs",
+ "greensockjs",
+ "fastclick",
+ "isotope",
+ "marionette",
+ "canjs",
+ "vuejs",
+ "vue.cjs",
+ "vue.global.js",
+ "vue",
+ "nuxtjs",
+ "twojs",
+ "two.js",
+ "brewser",
+ "materialdesignlite",
+ "material-design-lite",
+ "kendoui",
+ "matterjs",
+ "riotjs",
+ "seajs",
+ "momentjs",
+ "momenttimezone",
+ "scrollmagic",
+ "swfobject",
+ "flexslider",
+ "spfjs",
+ "numeraljs",
+ "boomerangjs",
+ "boomerang.js",
+ "framerjs",
+ "marko",
+ "ampjs",
+ "gatsby",
+ "shopify",
+ "magentojs",
+ "wordpress",
+ "wix",
+ "workbox",
+ "bpmn-js",
+ "googletagmanager",
+ "gtm.js"
+];
+
+
+
+module.exports = {
+ js_builtin: js_builtin,
+ lib_src_heuristics: lib_src_heuristics,
+ lib_content_heuristics: lib_content_heuristics
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/analyses/open_redirect/package.json b/analyses/open_redirect/package.json
new file mode 100644
index 0000000..388bfca
--- /dev/null
+++ b/analyses/open_redirect/package.json
@@ -0,0 +1,47 @@
+{
+ "name": "cs_csrf",
+ "version": "1.0.0",
+ "description": "",
+ "main": "static_analysis.js",
+ "dependencies": {
+ "crypto": "^1.0.1",
+ "elapsed-time-logger": "^1.1.7",
+ "fs": "0.0.1-security",
+ "js-beautify": "^1.15.1",
+ "path": "^0.12.7",
+ "process": "^0.11.10",
+ "puppeteer": "^23.9.0",
+ "core-js": "^3.39.0",
+ "csv-writer": "^1.6.0",
+ "domino": "^2.1.6",
+ "escodegen": "^2.1.0",
+ "esgraph": "*",
+ "espree": "^10.3.0",
+ "esprima": "^4.0.1",
+ "filbert": "^0.1.20",
+ "latest": "^0.2.0",
+ "open": "10.1.0",
+ "php-parser": "^3.2.1",
+ "process.argv": "^0.6.1",
+ "util": "^0.12.5",
+ "walkes": "*"
+ },
+ "devDependencies": {
+ "async": "^3.2.6",
+ "blanket": "^1.2.3",
+ "grunt": "^1.6.1",
+ "grunt-jsdoc": "^2.4.1",
+ "grunt-mocha-istanbul": "^5.0.2",
+ "grunt-mocha-test": "^0.13.3",
+ "grunt-strip-code": "^1.0.12",
+ "istanbul": "*",
+ "jsdoc": "^4.0.4",
+ "mocha": "*",
+ "should": "*"
+ },
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1"
+ },
+ "author": "Soheil Khodayari",
+ "license": "AGPL3"
+}
diff --git a/analyses/open_redirect/semantic_types.js b/analyses/open_redirect/semantic_types.js
new file mode 100644
index 0000000..0fcfba4
--- /dev/null
+++ b/analyses/open_redirect/semantic_types.js
@@ -0,0 +1,29 @@
+/*
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ Sematic Types for open redirect vulnerabilites
+*/
+
+
+
+var semanticTypes = {};
+
+// write
+semanticTypes.WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL";
+semanticTypes.WR_WIN_LOC_URL = "WR_WIN_LOC_URL";
+semanticTypes.WR_FRAME_URL = "WR_FRAME_URL";
+
+module.exports = semanticTypes;
\ No newline at end of file
diff --git a/analyses/open_redirect/semantic_types.py b/analyses/open_redirect/semantic_types.py
new file mode 100644
index 0000000..7d1a2e5
--- /dev/null
+++ b/analyses/open_redirect/semantic_types.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+"""
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ Semantic Types for open redirect vulnerabilities
+
+ Usage:
+ -----------
+ > import analyses.open_redirect.semantic_types as SemTypeDefinitions
+
+"""
+
+# write
+WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL";
+WR_WIN_LOC_URL = "WR_WIN_LOC_URL";
+
+# non reachable
+NON_REACHABLE = "NON_REACH"
+
+# read
+RD_WIN_LOC = "RD_WIN_LOC"
+RD_WIN_NAME = "RD_WIN_NAME"
+RD_DOC_REF = "RD_DOC_REF"
+RD_PM = "RD_PM"
+RD_WEB_STORAGE = "RD_WEB_STORAGE"
+RD_DOM_TREE = "RD_DOM"
+RD_COOKIE = "RD_COOKIE"
+
diff --git a/analyses/open_redirect/static_analysis.js b/analyses/open_redirect/static_analysis.js
new file mode 100644
index 0000000..a2c8dbb
--- /dev/null
+++ b/analyses/open_redirect/static_analysis.js
@@ -0,0 +1,443 @@
+/*
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ Static analysis main
+*/
+
+
+/**
+ * ------------------------------------------------
+ * third-party imports
+ * ------------------------------------------------
+**/
+const fs = require('fs');
+const pathModule = require('path');
+const crypto = require('crypto')
+const argv = require("process.argv");
+const elapsed = require("elapsed-time-logger");
+
+/**
+ * ------------------------------------------------
+ * module imports
+ * ------------------------------------------------
+**/
+const constantsModule = require('./../../engine/lib/jaw/constants');
+const globalsModule = require('./globals.js');
+const SourceSinkAnalyzerModule = require('./traversals.js');
+const SourceSinkAnalyzer = SourceSinkAnalyzerModule.OpenRedirectSourceSinkAnalyzer;
+
+const GraphExporter = require('./../../engine/core/io/graphexporter');
+
+/**
+ * ------------------------------------------------
+ * constants and globals
+ * ------------------------------------------------
+**/
+
+// directory where the data of the crawling will be saved
+const BASE_DIR = pathModule.resolve(__dirname, '../..')
+const dataStorageDirectory = pathModule.join(BASE_DIR, 'data');
+
+
+// when true, nodejs will log the current step for each webpage to the console
+const DEBUG = true;
+
+const do_ast_preprocessing_passes = false;
+var do_compress_graphs = true;
+var overwrite_hpg = false;
+var iterative_output = false;
+
+const FOXHOUND_EDGES = true;
+
+/**
+ * ------------------------------------------------
+ * utility functions
+ * ------------------------------------------------
+**/
+
+const withTimeout = (millis, promise) => {
+ const timeout = new Promise((resolve, reject) =>
+ setTimeout(
+ () => reject(`Timed out after ${millis} ms.`),
+ millis));
+ return Promise.race([
+ promise,
+ timeout
+ ]);
+};
+
+
+/**
+ * @function readFile
+ * @param file_path_name: absolute path of a file.
+ * @return the text content of the given file if it exists, otherwise -1.
+**/
+function readFile(file_path_name){
+ try {
+ const data = fs.readFileSync(file_path_name, 'utf8')
+ return data;
+ } catch (err) {
+ // console.error(err);
+ return -1;
+ }
+}
+
+
+/**
+ * @function getNameFromURL
+ * @param url: eTLD+1 domain name
+ * @return converts the url to a string name suitable for a directory by removing the colon and slash symbols
+**/
+function getNameFromURL(url){
+ return url.replace(/\:/g, '-').replace(/\//g, '');
+}
+
+
+/**
+ * @function hashURL
+ * @param url: string
+ * @return returns the SHA256 hash of the given input in hexa-decimal format
+**/
+function hashURL(url){
+ const hash = crypto.createHash('sha256').update(url, 'utf8').digest('hex');
+ return hash;
+}
+
+
+/**
+ * @function getOrCreateDataDirectoryForWebsite
+ * @param url: string
+ * @return creates a directory to store the data of the input url and returns the directory name.
+**/
+function getOrCreateDataDirectoryForWebsite(url){
+ const folderName = getNameFromURL(url);
+ const folderPath = pathModule.join(dataStorageDirectory, folderName);
+ if(!fs.existsSync(folderPath)){
+ fs.mkdirSync(folderPath);
+ }
+ return folderPath;
+}
+
+
+
+/**
+ * @function isLibraryScript
+ * @param {string} script: script src (when `mode: src`) or script content (when `mode: content`)
+ * @param {string} options: determines the type of the `script` param (format `{mode: type}` with types being `src` or `content`)
+ * @return {boolean} whether or not the input is a library script
+**/
+function isLibraryScript(script, options){
+
+ let return_flag = false;
+
+ if(options.mode === 'src'){
+
+ let script_src = script.toLowerCase();
+ for(let h of globalsModule.lib_src_heuristics){
+ if(script_src.includes(h)){ // check script src
+ return_flag = true;
+ break;
+ }
+ }
+
+ }else{ // [options.mode === 'content']
+
+ let script_content = script.toLowerCase();
+ for(let h of globalsModule.lib_content_heuristics){
+ if(script_content.includes(h)){ // check script content
+ return_flag = true;
+ break;
+ }
+ }
+ }
+
+ return return_flag;
+}
+
+/**
+ * ------------------------------------------------
+ * Main Static Analysis Thread
+ * ------------------------------------------------
+**/
+
+
+async function staticallyAnalyzeWebpage(url, webpageFolder){
+
+ let results_timing_file = pathModule.join(webpageFolder, "time.static_analysis.out");
+ if(!overwrite_hpg && fs.existsSync(results_timing_file)){
+ DEBUG && console.log('[skipping] results already exists for: '+ webpageFolder)
+ return 1;
+ }
+
+ // read the crawled scripts from disk
+ let scripts = [];
+ var sourcemaps = {};
+ let dirContent = fs.readdirSync( webpageFolder );
+
+
+ let scripts_mapping = {};
+ let scripts_mapping_content = await readFile(pathModule.join(webpageFolder, 'scripts_mapping.json'));
+ if(scripts_mapping_content != -1){
+ try{
+ scripts_mapping = JSON.parse(scripts_mapping_content);
+ }
+ catch{
+ // PASS
+ }
+ }
+
+
+ var library_scripts = [];
+ let scriptFiles = dirContent.filter(function( elm ) {return elm.match(/.*\.(js$)/ig);});
+ for(let i=0; i 10){
+
+ if(fs.existsSync(single_folder)){
+ var urlContent = readFile(pathModule.join(single_folder, "url.out"));
+ if(urlContent != -1){
+ var webpageUrl = urlContent.trim();
+ await staticallyAnalyzeWebpage(webpageUrl, single_folder);
+ }
+ }else{
+ console.log('[Warning] the following directory does not exists, but was marked for static analysis: '+ webpageFolder +'\n url is: '+ webpageUrl);
+ }
+
+ }else{
+
+ const dataDirectory = getOrCreateDataDirectoryForWebsite(seedurl);
+ const urlsFile = pathModule.join(dataDirectory, "urls.out");
+ const urlsFileContent = readFile(urlsFile);
+
+ if(urlsFileContent != -1){
+
+ const globalTimer = elapsed.start('global_static_timer');
+
+ const urls = new Set(urlsFileContent.split("\n")); // do not consider duplicate urls
+
+ for(let webpageUrl of urls.values()){
+
+ if(webpageUrl.trim().length > 1 ){ // eliminate empty strings
+ let _hash = hashURL(webpageUrl);
+ let webpageFolder = pathModule.join(dataDirectory, _hash);
+ if(fs.existsSync(webpageFolder)){
+ await staticallyAnalyzeWebpage(webpageUrl, webpageFolder);
+
+ }else{
+ console.log('[Warning] the following directory does not exists, but was marked for static analysis: '+ webpageFolder +'\n url is: '+ webpageUrl);
+ }
+ }
+ }
+
+ const globalTime = globalTimer.get();
+ globalTimer.end();
+ fs.writeFileSync(pathModule.join(dataDirectory, "time.static_analysis.out"), JSON.stringify({
+ "total_static_timer": globalTime,
+ }));
+
+ }
+ else{
+ console.log('[Warning] urls.out is empty for website: '+ seedurl +', thus exiting static-analysis pass.')
+ }
+ }
+
+})();
+
+
+
+
+
+
diff --git a/analyses/open_redirect/static_analysis_api.py b/analyses/open_redirect/static_analysis_api.py
new file mode 100644
index 0000000..1722317
--- /dev/null
+++ b/analyses/open_redirect/static_analysis_api.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+
+"""
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ API for running the open redirect preliminary analyses (i.e., property graph construction and identifying sinks)
+
+
+ Usage:
+ ------------
+ $ start_model_construction(website_url, memory, timeout)
+
+"""
+
+
+
+import os, sys, json
+import utils.io as IOModule
+import constants as constantsModule
+import utils.utility as utilityModule
+from utils.logging import logger as LOGGER
+
+
+
+def start_model_construction(website_url, iterative_output='false', memory=None, timeout=None, compress_hpg='true', overwrite_hpg='false', specific_webpage=None):
+
+ # setup defaults
+ if memory is None:
+ static_analysis_memory = '32000'
+ else:
+ static_analysis_memory = memory
+
+ if timeout is None:
+ static_analysis_per_webpage_timeout = 600 # seconds
+ else:
+ static_analysis_per_webpage_timeout = timeout
+
+
+ open_redirect_analyses_command_cwd = os.path.join(constantsModule.BASE_DIR, "analyses/open_redirect")
+ open_redirect_static_analysis_driver_program = os.path.join(open_redirect_analyses_command_cwd, "static_analysis.js")
+
+ open_redirect_static_analysis_command = "node --max-old-space-size=%s DRIVER_ENTRY --singlefolder=SINGLE_FOLDER --compresshpg=%s --overwritehpg=%s --iterativeoutput=%s"%(static_analysis_memory, compress_hpg, overwrite_hpg, iterative_output)
+ open_redirect_static_analysis_command = open_redirect_static_analysis_command.replace("DRIVER_ENTRY", open_redirect_static_analysis_driver_program)
+
+
+ website_folder_name = utilityModule.getDirectoryNameFromURL(website_url)
+ website_folder = os.path.join(constantsModule.DATA_DIR, website_folder_name)
+
+ webpages_json_file = os.path.join(website_folder, 'webpages.json')
+ urls_file = os.path.join(website_folder, 'urls.out')
+
+
+ if specific_webpage is not None:
+ webpage_folder = os.path.join(constantsModule.DATA_DIR, specific_webpage)
+ if os.path.exists(webpage_folder):
+ node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder)
+ IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True)
+
+ elif os.path.exists(webpages_json_file):
+
+ fd = open(webpages_json_file, 'r')
+ webpages = json.load(fd)
+ fd.close()
+
+ for webpage in webpages:
+ webpage_folder = os.path.join(website_folder, webpage)
+ if os.path.exists(webpage_folder):
+
+ node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder)
+ IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True)
+
+
+
+ elif os.path.exists(urls_file):
+ message = 'webpages.json file does not exist, falling back to urls.out'
+ LOGGER.warning(message)
+
+ # read the urls from the webpage data
+ fd = open(urls_file, 'r')
+ urls = fd.readlines()
+ fd.close()
+
+ # make sure that the list of urls is unique
+ # this would eliminate the cases where the crawler is executed multiple times for the same site
+ # without deleting the data of the old crawl and thus adds duplicate urls to urls.out file.
+ urls = list(set(urls))
+
+ for url in urls:
+ url = url.strip().rstrip('\n').strip()
+ webpage_folder_name = utilityModule.sha256(url)
+ webpage_folder = os.path.join(website_folder, webpage_folder_name)
+ if os.path.exists(webpage_folder):
+ node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder)
+ IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True)
+
+ else:
+ message = 'no webpages.json or urls.out file exists in the webapp directory; skipping analysis...'
+ LOGGER.warning(message)
+
diff --git a/analyses/open_redirect/static_analysis_py_api.py b/analyses/open_redirect/static_analysis_py_api.py
new file mode 100644
index 0000000..4597a2f
--- /dev/null
+++ b/analyses/open_redirect/static_analysis_py_api.py
@@ -0,0 +1,353 @@
+# -*- coding: utf-8 -*-
+
+"""
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ Detecting open redirect vulnerabilities
+
+
+ Usage:
+ ------------
+ > import analyses.open_redirect.static_analysis_py_api as open_redirect_py_api
+
+"""
+
+
+import os
+import sys
+import time
+import json
+import constants as constantsModule
+import utils.io as IOModule
+import docker.neo4j.manage_container as dockerModule
+import hpg_neo4j.db_utility as DU
+import hpg_neo4j.query_utility as QU
+import analyses.open_redirect.traversals_cypher as open_redirect_py_traversals
+from utils.logging import logger as LOGGER
+
+
+
+def get_url_for_webpage(webpage_directory):
+ content = None
+ fd = open(os.path.join(webpage_directory, "url.out"), "r")
+ content = fd.read()
+ fd.close()
+ return content
+
+
+def get_name_from_url(url):
+
+ """
+ @param url: eTLD+1 domain name
+ @return converts the url to a string name suitable for a directory by removing the colon and slash symbols
+
+ """
+ return url.replace(':', '-').replace('/', '')
+
+
+
+# ------------------------------------------------------------------------------------ #
+# Interface
+# ------------------------------------------------------------------------------------ #
+def build_and_analyze_hpg(seed_url, timeout=1800, overwrite=False, compress_hpg=True):
+
+ """
+ @param {string} seed_url
+ @param {integer} timeout: per page static analysis timeout
+ @description: imports an HPG inside a neo4j graph database and runs traversals over it.
+ """
+
+ if str(constantsModule.NEO4J_USE_DOCKER).lower() == 'true':
+ build_and_analyze_hpg_docker(seed_url, conn_timeout=timeout)
+ else:
+ build_and_analyze_hpg_local(seed_url, overwrite=overwrite, conn_timeout=timeout, compress_hpg=compress_hpg)
+
+ # if timeout is not None:
+ # build_and_analyze_hpg_local_with_timeout(seed_url, timeout=timeout, overwrite=overwrite)
+ # else:
+ # if constantsModule.NEO4J_USE_DOCKER:
+ # build_and_analyze_hpg_docker(seed_url)
+ # else:
+ # build_and_analyze_hpg_local(seed_url, overwrite=overwrite)
+
+
+
+def build_and_analyze_hpg_local_with_timeout(seed_url, timeout=None, overwrite=False):
+
+ if timeout is None:
+ static_analysis_per_webpage_timeout = 1800 # 30 mins (default)
+ else:
+ static_analysis_per_webpage_timeout = timeout
+
+ webapp_folder_name = get_name_from_url(seed_url)
+ webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name)
+ if not os.path.exists(webapp_data_directory):
+ LOGGER.error("[TR] did not found the directory for HPG analysis: "+str(webapp_data_directory))
+ return -1
+
+ webpages_json_file = os.path.join(webapp_data_directory, "webpages.json")
+
+ if os.path.exists(webpages_json_file):
+ LOGGER.info('[TR] reading webpages.json')
+ fd = open(webpages_json_file, 'r')
+ webapp_pages = json.load(fd)
+ fd.close()
+
+ else:
+ LOGGER.info('[TR] webpages.json does not exist; falling back to filesystem.')
+ # fall back to analyzing all pages if the `webpages.json` file is missing
+ webapp_pages = os.listdir(webapp_data_directory)
+ # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler)
+ webapp_pages = [item for item in webapp_pages if len(item) == 64]
+
+
+ for webpage in webapp_pages:
+ webpage_folder = os.path.join(webapp_data_directory, webpage)
+ if os.path.exists(webpage_folder):
+
+ LOGGER.warning('[TR] HPG analyis for: %s'%(webpage_folder))
+
+ # do NOT re-analyze webpages
+ if str(overwrite).lower() == 'false':
+ OUTPUT_FILE = os.path.join(webpage_folder, "sinks.flows.out")
+ if os.path.exists(OUTPUT_FILE):
+ LOGGER.info('[TR] analyis results already exists for webpage: %s'%webpage_folder)
+ continue
+
+ # note: the neo4j ports as in the config.yaml must be passed to the new process
+ command = "python3 -m analyses.open_redirect.analyze_hpg_api --seedurl={0} --webpage={1} --httpport={2} --boltport={3}".format(seed_url, webpage, constantsModule.NEO4J_HTTP_PORT, constantsModule.NEO4J_BOLT_PORT)
+ cwd = constantsModule.BASE_DIR
+ ret = IOModule.run_os_command(command, cwd= cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True)
+
+ if ret < 0:
+ ## safely finish the analysis, even when timeout occurs
+ database_name = '{0}_{1}'.format(webapp_folder_name, webpage)
+ LOGGER.info('[TR] stopping neo4j for %s'%str(database_name))
+ DU.ineo_stop_db_instance(database_name)
+
+ ## remove db after analysis
+ LOGGER.info('[TR] removing neo4j for %s'%str(database_name))
+ DU.ineo_remove_db_instance(database_name)
+
+ LOGGER.info('[TR] finished HPG analyis for: %s'%(webpage_folder))
+
+
+def build_and_analyze_hpg_local(seed_url, overwrite=False, conn_timeout=None, compress_hpg=True):
+
+ webapp_folder_name = get_name_from_url(seed_url)
+ webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name)
+ if not os.path.exists(webapp_data_directory):
+ LOGGER.error("[TR] did not found the directory for HPG analysis: "+str(webapp_data_directory))
+ return -1
+
+ webpages_json_file = os.path.join(webapp_data_directory, "webpages.json")
+
+ if os.path.exists(webpages_json_file):
+ LOGGER.info('[TR] reading webpages.json')
+ fd = open(webpages_json_file, 'r')
+ webapp_pages = json.load(fd)
+ fd.close()
+
+ else:
+ LOGGER.info('[TR] webpages.json does not exist; falling back to filesystem.')
+ # fall back to analyzing all pages if the `webpages.json` file is missing
+ webapp_pages = os.listdir(webapp_data_directory)
+ # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler)
+ webapp_pages = [item for item in webapp_pages if len(item) == 64]
+
+
+ for webpage in webapp_pages:
+ webpage_folder = os.path.join(webapp_data_directory, webpage)
+ if os.path.exists(webpage_folder):
+
+ LOGGER.warning('[TR] HPG analyis for: %s'%(webpage_folder))
+
+ if str(overwrite).lower() == 'false':
+ # do NOT re-analyze webpages
+ OUTPUT_FILE = os.path.join(webpage_folder, "sinks.flows.out")
+ if os.path.exists(OUTPUT_FILE):
+ LOGGER.info('[TR] analyis results already exists for webpage: %s'%webpage_folder)
+ continue
+
+ # requirement: the database name must have a length between 3 and 63 characters
+ # must always import into the default neo4j database
+ neo4j_database_name = 'neo4j'
+
+ database_name = '{0}_{1}'.format(webapp_folder_name, webpage)
+
+ nodes_file = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME)
+ rels_file = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME)
+ rels_dynamic_file = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME)
+
+ nodes_file_gz = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME +'.gz')
+ rels_file_gz = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME +'.gz')
+ rels_dynamic_file_gz = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME +'.gz')
+
+ if os.path.exists(nodes_file) and os.path.exists(rels_file) and os.path.exists(rels_dynamic_file):
+ LOGGER.info('[TR] hpg files exist in decompressed format, skipping de-compression.')
+
+ elif os.path.exists(nodes_file_gz) and os.path.exists(rels_file_gz) and os.path.exists(rels_dynamic_file_gz):
+ LOGGER.info('[TR] de-compressing hpg.')
+ # de-compress the hpg
+ IOModule.decompress_graph(webpage_folder)
+ else:
+ LOGGER.error('[TR] The nodes/rels.csv files do not exist in %s, skipping.'%webpage_folder)
+ continue
+
+ neo4j_http_port = constantsModule.NEO4J_HTTP_PORT
+ neo4j_bolt_port = constantsModule.NEO4J_BOLT_PORT
+
+ LOGGER.warning('[TR] removing any previous neo4j instance for %s'%str(database_name))
+ DU.ineo_remove_db_instance(database_name)
+
+ LOGGER.info('[TR] creating db %s with http port %s'%(database_name, neo4j_http_port))
+ DU.ineo_create_db_instance(database_name, neo4j_http_port)
+
+ # check if the bolt port requested by the config.yaml is not the default one
+ if not ( int(neo4j_http_port) + 2 == int(neo4j_bolt_port) ):
+ LOGGER.info('[TR] setting the requested bolt port %s for db %s'%(neo4j_bolt_port, database_name))
+ DU.ineo_set_bolt_port_for_db_instance(database_name, neo4j_bolt_port)
+
+ LOGGER.info('[TR] importing the database with neo4j-admin.')
+ DU.neoadmin_import_db_instance(database_name, neo4j_database_name, nodes_file, rels_file, rels_dynamic_file)
+
+ LOGGER.info('[TR] changing the default neo4j password to enable programmatic access.')
+ DU.ineo_set_initial_password_and_restart(database_name, password=constantsModule.NEO4J_PASS)
+
+ if str(compress_hpg).lower() == 'true':
+ # compress the hpg after the model import
+ IOModule.compress_graph(webpage_folder)
+
+ LOGGER.info('[TR] waiting for the neo4j connection to be ready...')
+ time.sleep(10)
+ LOGGER.info('[TR] connection: %s'%constantsModule.NEO4J_CONN_HTTP_STRING)
+ connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150, conn=constantsModule.NEO4J_CONN_HTTP_STRING)
+ if not connection_success:
+ try:
+ LOGGER.info('[TR] stopping neo4j for %s'%str(database_name))
+ DU.ineo_stop_db_instance(database_name)
+
+ ## remove db after analysis
+ DU.ineo_remove_db_instance(database_name)
+ except:
+ LOGGER.info('[TR] ran into exception while prematurely stopping neo4j for %s'%str(database_name))
+ continue
+
+ LOGGER.info('[TR] starting to run the queries.')
+ webpage_url = get_url_for_webpage(webpage_folder)
+ try:
+ DU.exec_fn_within_transaction(open_redirect_py_traversals.run_traversals, webpage_url, webpage_folder, webpage, conn=constantsModule.NEO4J_CONN_STRING, conn_timeout=conn_timeout)
+ except Exception as e:
+ LOGGER.error(e)
+ LOGGER.error('[TR] neo4j connection error.')
+ outfile = os.path.join(webpage_folder, "sinks.flows.out")
+ if not os.path.exists(outfile):
+ with open(outfile, 'w+') as fd:
+ error_json = {"error": str(e)}
+ json.dump(error_json, fd, ensure_ascii=False, indent=4)
+
+ LOGGER.info('[TR] stopping neo4j for %s'%str(database_name))
+ DU.ineo_stop_db_instance(database_name)
+
+ ## remove db after analysis
+ LOGGER.info('[TR] removing neo4j for %s'%str(database_name))
+ DU.ineo_remove_db_instance(database_name)
+
+
+def build_and_analyze_hpg_docker(seed_url, conn_timeout=None):
+
+ """
+ @param {string} seed_url
+ @description: imports an HPG inside a neo4j docker instance and runs traversals over it.
+
+ """
+ webapp_folder_name = get_name_from_url(seed_url)
+ webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name)
+ if not os.path.exists(webapp_data_directory):
+ LOGGER.error("[Traversals] did not found the directory for HPG analysis: "+str(webapp_data_directory))
+ return -1
+
+ webapp_pages = os.listdir(webapp_data_directory)
+ # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler)
+ webapp_pages = [item for item in webapp_pages if len(item) == 64]
+
+
+ # neo4j config
+ build = True
+ build_container = True
+ query = True
+ stop_container = True
+
+ # must use the default docker container db name which is the only active db in docker
+ database_name = 'neo4j'
+ container_name = 'neo4j_container_'
+
+ for each_webpage in webapp_pages:
+
+ relative_import_path = os.path.join(webapp_folder_name, each_webpage)
+ container_name = container_name + each_webpage
+ webpage = os.path.join(webapp_data_directory, each_webpage)
+ LOGGER.warning('HPG for: %s'%(webpage))
+
+ # de-compress the hpg
+ IOModule.decompress_graph(webpage)
+
+ # import the CSV files into an active neo4j database inside a docker container
+ if build:
+ nodes_file = os.path.join(webpage, constantsModule.NODE_INPUT_FILE_NAME)
+ rels_file = os.path.join(webpage, constantsModule.RELS_INPUT_FILE_NAME)
+ if not (os.path.exists(nodes_file) and os.path.exists(rels_file)):
+ LOGGER.error('The HPG nodes.csv / rels.csv files do not exist in the provided folder, skipping...')
+ continue
+
+ # must build a container only once
+ if build_container:
+
+ # remove the old container & database if it exists
+ dockerModule.stop_neo4j_container(container_name)
+ dockerModule.remove_neo4j_container(container_name)
+ dockerModule.remove_neo4j_database(database_name, container_name)
+ time.sleep(5)
+
+ dockerModule.create_neo4j_container(container_name)
+ LOGGER.info('waiting 5 seconds for the neo4j container to be ready.')
+ time.sleep(5)
+
+ LOGGER.info('importing data inside container.')
+ dockerModule.import_data_inside_container(container_name, database_name, relative_import_path, 'CSV')
+ LOGGER.info('waiting for the tcp port 7474 of the neo4j container to be ready...')
+ connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150)
+ if not connection_success:
+ sys.exit(1)
+ else:
+ dockerModule.start_neo4j_container(container_name)
+ LOGGER.info('waiting for the tcp port 7474 of the neo4j container to be ready...')
+ connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150)
+ if not connection_success:
+ sys.exit(1)
+
+ # compress the hpg after the model import
+ IOModule.compress_graph(webpage)
+
+ # step3: run the vulnerability detection queries
+ if query:
+ webpage_url = get_url_for_webpage(webpage)
+ DU.exec_fn_within_transaction(open_redirect_py_traversals.run_traversals, webpage_url, webpage, each_webpage, conn_timeout=conn_timeout)
+
+
+ # stop the neo4j docker container
+ if stop_container:
+ dockerModule.stop_neo4j_container(container_name)
diff --git a/analyses/open_redirect/traversals.js b/analyses/open_redirect/traversals.js
new file mode 100644
index 0000000..ac4a1c4
--- /dev/null
+++ b/analyses/open_redirect/traversals.js
@@ -0,0 +1,373 @@
+/*
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ Lightweight module for identifying request hijacking sources/sinks
+*/
+
+const constantsModule = require('./../../engine/lib/jaw/constants');
+const esprimaParser = require('./../../engine/lib/jaw/parser/jsparser');
+const globalsModule = require('./globals.js');
+const walkes = require('walkes');
+const escodgen = require('escodegen');
+var Set = require('./../../engine/lib/analyses/set');
+const DEBUG = false;
+
+
+// -------------------------------------------------------------------------------- //
+// SemanticTypes
+// -------------------------------------------------------------------------------- //
+
+
+const WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL";
+const WR_WIN_LOC_URL = "WR_WIN_LOC_URL";
+const WR_FRAME_URL = "WR_FRAME_URL";
+
+// -------------------------------------------------------------------------------- //
+// CLS
+// -------------------------------------------------------------------------------- //
+
+function requireUncached(module) {
+ delete require.cache[require.resolve(module)];
+ return require(module);
+}
+
+/**
+ * OpenRedirectSourceSinkAnalyzer
+ * @constructor
+ */
+function OpenRedirectSourceSinkAnalyzer() {
+ "use strict";
+ // re-instantiate every time
+ this.api = require('./../../engine/model_builder');
+ this.scopeCtrl = require('./../../engine/lib/jaw/scope/scopectrl');
+ this.modelCtrl = require('./../../engine/lib/jaw/model/modelctrl');
+ this.modelBuilder = require('./../../engine/lib/jaw/model/modelbuilder');
+ this.scopeCtrl.clear();
+ this.modelCtrl.clear();
+}
+
+
+// -------------------------------------------------------------------------------- //
+// Utility
+// -------------------------------------------------------------------------------- //
+
+function hasIdentifierChildren(node){
+ var flag = false;
+ if(!node) return flag;
+ if(node.type === "Identifier"){
+ flag = true;
+ }else{
+ walkes(node, {
+ Identifier: function(node, recurse){
+ if(node.type === "Identifier"){
+ flag = true;
+ }
+ }
+ });
+ }
+ return flag;
+}
+
+
+function getIdentifierChildren(node){
+
+ if(!node) return [];
+
+ if(node.type === "Identifier"){
+ return [node.name];
+ }else{
+
+ let identifier_names = new Set();
+ walkes(node, {
+
+ // CallExpression: function(node, recurse){
+ // recurse(node.arguments);
+ // },
+ FunctionExpression: function(node,recurse){
+ // we do not want function expression arguments
+ // thus, do not recurse here
+ },
+ CallExpression: function(node, recurse){
+ // we want the call expression arguments, e.g., JSON.stringify(x)
+ // here, recurse only on the arguments
+ for(let arg of node.arguments){
+ recurse(arg);
+ }
+ },
+ MemberExpression: function(node, recurse){
+ // we only care about the member expression base objects
+ // except when we have a `this.property` expression
+ // where we are interested in the property part of the member expression
+ let member_expression = escodgen.generate(node);
+ if(member_expression.startsWith("this.")){ // handle ThisExpression
+ member_expression = member_expression.replace('this.', '')
+ let identifier_name = member_expression.substr(0, member_expression.indexOf('.'));
+ if(!globalsModule.js_builtin.includes(identifier_name)){
+ identifier_names.add(identifier_name);
+ }
+ }else{
+ recurse(node.object);
+ }
+ },
+ ObjectExpression: function(node, recurse){
+ // recurse on object expression values only
+ // as keys cannot be tainted
+ node.properties.forEach(prop=>{
+ recurse(prop.value);
+ })
+ },
+ Identifier: function(node, recurse){
+ if(node.type === "Identifier"){
+ if(!globalsModule.js_builtin.includes(node.name)){
+ identifier_names.add(node.name);
+ }
+ }
+ }
+ });
+
+ return [].concat(identifier_names.values()); // convert Set() to list with the spread operator
+ }
+}
+
+// -------------------------------------------------------------------------------- //
+// API
+// -------------------------------------------------------------------------------- //
+
+OpenRedirectSourceSinkAnalyzer.prototype.build_static_model = async function(code){
+
+ let theSourceSinkAnalyzer = this;
+ let language = constantsModule.LANG.js;
+ await theSourceSinkAnalyzer.api.initializeModelsFromSource(code, language);
+ await theSourceSinkAnalyzer.api.buildInitializedModels();
+}
+
+
+OpenRedirectSourceSinkAnalyzer.prototype.get_sources = async function(){
+
+}
+
+
+OpenRedirectSourceSinkAnalyzer.prototype.get_sinks = async function(){
+
+ /*
+ ====================
+ Sinks
+ ====================
+
+ window.location = TAINT;
+ window.location.href = TAINT;
+ window.location.replace(TAINT);
+ window.location.assign(TAINT);
+ window.open(url) = TAINT;
+ frame.src = TAINT;
+
+ */
+
+ var outputs = [];
+ function appendSinkOutput(node, location, id, script_id, semantic_types, sink_code, sink_type, taint_possibility, sink_identifier_names){
+
+ if(node.semanticType){
+ node.semanticType.concat(['sink', sink_type]);
+ node.semanticType.concat(semantic_types);
+ }else{
+ node.semanticType = ['sink', sink_type];
+ node.semanticType.concat(semantic_types);
+ }
+
+ outputs.push({
+ "location": location,
+ "id": id,
+ "script": script_id,
+ "semantic_types": semantic_types,
+ "sink_code": sink_code,
+ "sink_type": sink_type,
+ "taint_possibility": taint_possibility, // true if the sink has at least one Identifier (i.e., not just literals)
+ "sink_identifiers": sink_identifier_names,
+ });
+ }
+
+ let engine = this;
+ let pageScopeTrees = engine.scopeCtrl.pageScopeTrees;
+ if(!pageScopeTrees){
+ return [];
+ }
+ for await (let scopeTree of pageScopeTrees){
+ const pageModels = engine.modelCtrl.getPageModels(scopeTree);
+ const intraProceduralModels = pageModels.intraProceduralModels;
+ const ast = scopeTree.scopes[0].ast;
+ const script_id = ast.value;
+
+ walkes(ast, {
+
+ AssignmentExpression: function(node, recurse){
+ // CASE:
+ // window.location = TAINT;
+ // window.location.href = TAINT;
+ // location.href = TAINT;
+ if(node && node.left && node.left.type==="MemberExpression" && (
+ (node.left.object.type==="Identifier" && (node.left.object.name==="window" || node.left.object.name==="win" || node.left.object.name==="w") && node.left.property.type==="Identifier" && node.left.property.name==="location")
+ ||
+ (node.left.object.type==="Identifier" && (node.left.object.name==="location") && node.left.property.type==="Identifier" && node.left.property.name==="href")
+ ||
+ (node.left.object.type==="MemberExpression" && node.left.object.object.type==="Identifier" && (node.left.object.object.name==="window" || node.left.object.object.name==="win" || node.left.object.object.name==="w") && node.left.object.property.type==="Identifier" && node.left.object.property.name==="location" && node.left.property.type==="Identifier" && node.left.property.name==="href")
+ )
+ ){
+
+ let taint_argument = node.right;
+ var taint_possibility = false;
+ if(taint_argument){
+ identifier_names = getIdentifierChildren(taint_argument);
+ if(identifier_names.length > 0){
+ taint_possibility = true;
+ }
+ }
+ var identifiers_object = {
+ WR_WIN_LOC_URL : identifier_names
+ }
+ var taint_possibility_object = {
+ WR_WIN_LOC_URL: taint_possibility
+ }
+ appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object);
+ }
+ else if (node && node.left && node.left.type==="MemberExpression" && (
+ (node.left.object.type==="Identifier" && (node.left.object.name==="frame" || node.left.object.name==="iframe") && node.left.property.type==="Identifier" && node.left.property.name==="src"))
+ ){
+
+ let taint_argument = node.right;
+ var taint_possibility = false;
+ if(taint_argument){
+ identifier_names = getIdentifierChildren(taint_argument);
+ if(identifier_names.length > 0){
+ taint_possibility = true;
+ }
+ }
+ var identifiers_object = {
+ WR_FRAME_URL: identifier_names
+ }
+ var taint_possibility_object = {
+ WR_FRAME_URL: taint_possibility
+ }
+ appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_FRAME_URL], escodgen.generate(node), "frame.src", taint_possibility_object, identifiers_object);
+ }
+
+ if(node && node.right && node.right.type === "FunctionExpression"){
+ recurse(node.right)
+ }
+ },
+
+ CallExpression: function(node, recurse){
+
+ // CASE: window.open(URL)
+ if(node.callee.type === "MemberExpression" && node.callee.object.type === "Identifier" &&
+ (node.callee.object.name === "window" || node.callee.object.name === "win" || node.callee.object.name === "w") &&
+ node.callee.property.type === "Identifier" && node.callee.property.name === "open"){
+
+
+ var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null;
+ var taint_possibility = false;
+ var identifier_names = [];
+ if(taint_argument){
+ identifier_names = getIdentifierChildren(taint_argument);
+ if(identifier_names.length > 0){
+ taint_possibility = true;
+ }
+ }
+
+ var identifiers_object = {
+ WR_WIN_OPEN_URL: identifier_names
+ }
+ var taint_possibility_object = {
+ WR_WIN_OPEN_URL: taint_possibility
+ }
+ appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_OPEN_URL], escodgen.generate(node), "window.open()", taint_possibility_object, identifiers_object);
+
+ }
+ // CASE:
+ // location.replace(TAINT);
+ // location.assign(TAINT);
+ else if(node.callee.type === "MemberExpression" && node.callee.object.type === "Identifier" &&
+ (node.callee.object.name === "location") &&
+ node.callee.property.type === "Identifier" && (node.callee.property.name === "replace" || node.callee.property.name === "assign")){
+
+ var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null;
+ var taint_possibility = false;
+ var identifier_names = [];
+ if(taint_argument){
+ identifier_names = getIdentifierChildren(taint_argument);
+ if(identifier_names.length > 0){
+ taint_possibility = true;
+ }
+ }
+
+ var identifiers_object = {
+ WR_WIN_LOC_URL: identifier_names
+ }
+ var taint_possibility_object = {
+ WR_WIN_LOC_URL: taint_possibility
+ }
+ appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object);
+
+ }
+ // CASE:
+ // window.location.replace(TAINT);
+ // window.location.assign(TAINT);
+ else if(node.callee.type === "MemberExpression" && node.callee.property.type==="Identifier" && (node.callee.property.name==="replace" || node.callee.property.name==="assign") &&
+ node.callee.object.type === "MemberExpression" && node.callee.object.object.type === "Identifier" && (node.callee.object.object.name === "window" || node.callee.object.object.name === "win" || node.callee.object.object.name === "w") &&
+ node.callee.object.property.type === "Identifier" && node.callee.object.property.name === "location"
+ ){
+
+ var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null;
+ var taint_possibility = false;
+ var identifier_names = [];
+ if(taint_argument){
+ identifier_names = getIdentifierChildren(taint_argument);
+ if(identifier_names.length > 0){
+ taint_possibility = true;
+ }
+ }
+
+ var identifiers_object = {
+ WR_WIN_LOC_URL: identifier_names
+ }
+ var taint_possibility_object = {
+ WR_WIN_LOC_URL: taint_possibility
+ }
+ appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object);
+
+ }
+ // handle cases where there are multiple call expressions in a single statement
+ // e.g., window.open(source).then(resp => console.log(resp))
+ else{
+ recurse(node.callee);
+ for(let arg of node.arguments){
+ recurse(arg);
+ }
+
+ }
+ }
+
+
+ });
+
+ }
+
+ return outputs;
+
+}
+
+module.exports = {
+ OpenRedirectSourceSinkAnalyzer: OpenRedirectSourceSinkAnalyzer,
+};
\ No newline at end of file
diff --git a/analyses/open_redirect/traversals_cypher.py b/analyses/open_redirect/traversals_cypher.py
new file mode 100644
index 0000000..f2b5085
--- /dev/null
+++ b/analyses/open_redirect/traversals_cypher.py
@@ -0,0 +1,570 @@
+# -*- coding: utf-8 -*-
+
+"""
+ Copyright (C) 2024 Soheil Khodayari, CISPA
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+
+ Description:
+ ------------
+ traversals for detecting open redirect vulnerabilities
+
+ Usage:
+ -----------
+ > import analyses.open_redirect.traversals_cypher
+
+"""
+
+import subprocess
+import hashlib
+import urllib.parse
+import os
+import time
+import re
+import sys
+import jsbeautifier
+import json
+
+import constants as constantsModule
+import utils.utility as utilityModule
+import hpg_neo4j.db_utility as DU
+import hpg_neo4j.query_utility as QU
+import analyses.general.data_flow as DF
+import analyses.open_redirect.semantic_types as SemTypeDefinitions
+from utils.logging import logger as LOGGER
+from neo4j import GraphDatabase
+from datetime import datetime
+
+
+# ----------------------------------------------------------------------- #
+# Globals
+# ----------------------------------------------------------------------- #
+
+
+DEBUG = False
+
+
+# ----------------------------------------------------------------------- #
+# Utility Functions
+# ----------------------------------------------------------------------- #
+
+
+def _unquote_url(url):
+
+ """
+ @param {string} url
+ @return {string} decoded url
+ """
+ out = urllib.parse.unquote(url)
+ out = out.replace('&', '&')
+
+ return out
+
+def _get_all_occurences(needle, haystack):
+
+ """
+ @param {string} needle
+ @param {string haystack
+ @description finds all occurences of needle in haystack
+ @return {array} a list of start index occurences of needle in haystack
+ """
+ out = [m.start() for m in re.finditer(needle, haystack)]
+ return out
+
+
+def _get_current_timestamp():
+
+ """
+ @return {string} current date and time string
+ """
+ now = datetime.now()
+ dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
+ return dt_string
+
+def _get_unique_list(lst):
+
+ """
+ @param {list} lst
+ @return remove duplicates from list and return the resulting array
+ """
+ return list(set(lst))
+
+
+def _get_orderd_unique_list(lst):
+
+ """
+ @param {list} lst
+ @return remove duplicates from list and return the resulting array maintaining the original list order
+ """
+ final_list = []
+ for item in lst:
+ if item not in final_list:
+ final_list.append(item)
+ return final_list
+
+def _get_line_of_location(esprima_location_str):
+
+ """
+ @param esprima_location_str
+ @return start line numnber of esprima location object
+ """
+ start_index = esprima_location_str.index('line:') + len('line:')
+ end_index = esprima_location_str.index(',')
+ out = esprima_location_str[start_index:end_index]
+ return out
+
+def _get_location_part(nid_string):
+
+ """
+ @param {string} nid_string: string containing node id and location
+ @return {string} node id string
+ """
+ start_index = nid_string.index('__Loc=') + len('__Loc=')
+ return nid_string[start_index:]
+
+def _get_node_id_part(nid_string):
+
+ """
+ @param {string} nid_string: string containing node id and location
+ @return {string} location string
+ """
+ start_index = nid_string.find('__nid=')
+ if start_index != -1:
+ start_index = start_index + len('__nid=')
+ else:
+ start_index = 0 # handle the case where function name is not stored at the begining
+
+ end_index = nid_string.index('__Loc=')
+ return nid_string[start_index:end_index]
+
+
+def _get_function_name_part(nid_string):
+
+ """
+ @param {string} nid_string: string containing node id and location
+ @return {string} function_name string
+ """
+ end_index = nid_string.index('__nid=')
+ return nid_string[:end_index]
+
+
+
+def _get_value_of_identifer_or_literal(node):
+ """
+ @param {PGNode} node
+ @return {list} returns the pair of the value of a node and the node type (identifer or literal)
+ """
+ if node['Type'] == 'Identifier':
+ return [node['Code'], node['Type']]
+ elif node['Type'] == 'Literal':
+ value = node['Value']
+ raw = node['Raw']
+ if value == '{}' and (raw.strip('\'').strip("\"").strip() != value):
+ return [node['Raw'], node['Type']]
+ else:
+ return [node['Value'], node['Type']]
+
+ return ['', '']
+
+
+
+# ----------------------------------------------------------------------- #
+# Semantic Type Association to Program Slices
+# ----------------------------------------------------------------------- #
+
+def _get_semantic_types(program_slices, num_slices):
+
+ """
+ @param {list} program_slices: slices of JS program
+ @param {int} num_slices: length of program_slices list
+ @return {list} the semantic types associated with the given program slices.
+ """
+
+
+ semantic_type = SemTypeDefinitions.NON_REACHABLE
+ semantic_types = []
+
+
+ # sources
+ WEB_STORAGE_STRINGS = [
+ 'localStorage',
+ 'sessionStorage'
+ ]
+
+ WIN_LOC_STRINGS = [
+ 'window.location',
+ 'win.location',
+ 'w.location',
+ 'location.href',
+ 'location.hash',
+ 'loc.href',
+ 'loc.hash',
+ 'History.getBookmarkedState',
+ ]
+
+ WIN_NAME_STRINGS = [
+ 'window.name',
+ 'win.name'
+ ]
+
+ DOM_READ_STRINGS = [
+ 'document.getElement',
+ 'document.querySelector',
+ 'doc.getElement',
+ 'doc.querySelector',
+ '.getElementBy',
+ '.getElementsBy',
+ '.querySelector',
+ '$(',
+ 'jQuery(',
+ '.attr(',
+ '.getAttribute(',
+ '.readAttribute('
+ ]
+
+ DOM_READ_COOKIE_STRINGS = [
+ 'document.cookie',
+ 'doc.cookie',
+ ]
+
+ PM_STRINGS = [
+ 'event.data',
+ 'evt.data'
+ ]
+
+ DOC_REF_STRINGS = [
+ 'document.referrer',
+ 'doc.referrer',
+ 'd.referrer',
+ ]
+
+
+ for i in range(num_slices):
+ program_slice = program_slices[i]
+ code = program_slice[0]
+ idents = program_slice[2]
+
+ for item in WIN_LOC_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_WIN_LOC
+ semantic_types.append(semantic_type)
+
+
+ for item in WIN_NAME_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_WIN_NAME
+ semantic_types.append(semantic_type)
+
+
+ for item in DOC_REF_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_DOC_REF
+ semantic_types.append(semantic_type)
+
+
+
+ for item in PM_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_PM
+ semantic_types.append(semantic_type)
+
+
+
+ for item in DOM_READ_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_DOM_TREE
+ semantic_types.append(semantic_type)
+
+
+ for item in WEB_STORAGE_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_WEB_STORAGE
+ semantic_types.append(semantic_type)
+
+
+ for item in DOM_READ_COOKIE_STRINGS:
+ if item in code:
+ semantic_type = SemTypeDefinitions.RD_COOKIE
+ semantic_types.append(semantic_type)
+
+
+
+ for identifier in idents:
+
+ for item in WIN_LOC_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_WIN_LOC
+ semantic_types.append(semantic_type)
+
+
+ for item in WIN_NAME_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_WIN_NAME
+ semantic_types.append(semantic_type)
+
+
+ for item in DOC_REF_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_DOC_REF
+ semantic_types.append(semantic_type)
+
+
+
+ for item in PM_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_PM
+ semantic_types.append(semantic_type)
+
+
+
+ for item in DOM_READ_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_DOM_TREE
+ semantic_types.append(semantic_type)
+
+
+ for item in WEB_STORAGE_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_WEB_STORAGE
+ semantic_types.append(semantic_type)
+
+
+ for item in DOM_READ_COOKIE_STRINGS:
+ if item in identifier:
+ semantic_type = SemTypeDefinitions.RD_COOKIE
+ semantic_types.append(semantic_type)
+
+
+
+ if len(semantic_types):
+ return list(set(semantic_types))
+
+ return [SemTypeDefinitions.NON_REACHABLE]
+
+
+def _get_semantic_type_set(semantic_type_list):
+
+ """
+ @param {list} semantic_type_list: list of types that may include duplicate semantic types
+ @return {list} a unique semantic type list
+ """
+
+ semantic_type_list = _get_unique_list(semantic_type_list)
+ if len(semantic_type_list) > 1:
+ if SemTypeDefinitions.NON_REACHABLE in semantic_type_list:
+ semantic_type_list.remove(SemTypeDefinitions.NON_REACHABLE)
+ return semantic_type_list
+
+ elif len(semantic_type_list) == 1:
+ return semantic_type_list
+
+ else:
+ return [SemTypeDefinitions.NON_REACHABLE]
+
+
+
+# ----------------------------------------------------------------------- #
+# Main: Taint Analysis
+# ----------------------------------------------------------------------- #
+
+
+def run_traversals(tx, webpage_url, webpage_directory, webpage_directory_hash='xxx', named_properties=[]):
+ """
+ @param {string} webpage_url
+ @param {string} webpage_directory
+ @param {list} named_properties: `id` and `name` attributes in HTML that can be accessed through the `document` API
+ @return {list} a list of candidate requests for hjacking
+ """
+
+
+ sinks_file = os.path.join(webpage_directory, "sinks.out.json")
+ if not os.path.exists(sinks_file):
+ LOGGER.error('[TR] sinks.out file does not exist in %s'%webpage_directory)
+ return -1
+
+
+ fd = open(sinks_file, 'r')
+ sinks_json = json.load(fd)
+ fd.close()
+ sinks_list = sinks_json['sinks']
+
+ storage = {}
+
+
+ for sink_node in sinks_list:
+
+ taintable_sink_identifiers = []
+
+ sink_identifiers_dict = sink_node["sink_identifiers"]
+ sink_taintable_semantic_types = []
+ sink_taint_possiblity_vector = sink_node["taint_possibility"]
+
+ for semantic_type in sink_taint_possiblity_vector:
+ if sink_taint_possiblity_vector[semantic_type] == True:
+ sink_taintable_semantic_types.append(semantic_type)
+ taintable_sink_identifiers.extend(sink_identifiers_dict[semantic_type])
+
+
+ sink_id = str(sink_node["id"])
+ sink_location = str(sink_node["location"])
+ sink_type = sink_node["sink_type"]
+ sink_cfg_node = QU.get_ast_topmost(tx, {"Id": "%s"%sink_id})
+
+
+ nid = sink_type + '__nid=' + sink_id + '__Loc=' + sink_location
+
+ sink_node["taintable_semantic_types"] = sink_taintable_semantic_types
+ sink_node["cfg_node_id"] = sink_cfg_node["Id"]
+
+ storage[nid] = {
+ "sink": sink_node,
+ "variables": {}
+ }
+
+
+
+ for varname in taintable_sink_identifiers:
+ slice_values = DF._get_varname_value_from_context(tx, varname, sink_cfg_node)
+
+ if DEBUG: print(varname, slice_values)
+
+ semantic_types = _get_semantic_types(slice_values,len(slice_values))
+ storage[nid]["variables"][varname]= {
+ "slices": slice_values,
+ "semantic_types": semantic_types
+ }
+
+ lst = storage[nid]["sink"]["taintable_semantic_types"]
+ lst.extend(semantic_types)
+ storage[nid]["sink"]["taintable_semantic_types"] = lst
+
+
+
+
+ print_buffer = []
+ json_buffer = {}
+
+ timestamp = _get_current_timestamp()
+ sep = utilityModule.get_output_header_sep()
+ sep_sub = utilityModule.get_output_subheader_sep()
+ print_buffer.append(sep)
+ print_buffer.append('[timestamp] generated on %s\n'%timestamp)
+ print_buffer.append(sep+'\n')
+ print_buffer.append('[*] webpage URL: %s\n\n'%webpage_url)
+ print_buffer.append(sep_sub+'\n')
+
+ json_buffer["url"] = webpage_url
+ json_buffer["flows"] = []
+ for sink_nid in storage:
+
+ sink_node = storage[sink_nid]["sink"]
+
+ print_buffer.append('[*] webpage: %s\n'%webpage_directory_hash)
+ script_name = sink_node["script"].split('/')[-1]
+ print_buffer.append('[*] script: %s\n'%script_name)
+ semantic_types_for_sink = _get_unique_list(sink_node["taintable_semantic_types"])
+ print_buffer.append('[*] semantic_types: {0}\n'.format(semantic_types_for_sink))
+ print_buffer.append('[*] node_id: %s\n'%str(sink_node["id"]))
+ print_buffer.append('[*] cfg_node_id: %s\n'%str(sink_node["cfg_node_id"]))
+ print_buffer.append('[*] loc: %s\n'%sink_node["location"])
+ print_buffer.append('[*] sink_type: %s\n'%(sink_node["sink_type"]))
+ print_buffer.append('[*] sink_code: %s\n'%sink_node["sink_code"])
+
+ json_flow_object = {
+ "webpage": webpage_directory_hash,
+ "script": script_name,
+ "semantic_types": semantic_types_for_sink,
+ "node_id": str(sink_node["id"]),
+ "cfg_node_id": str(sink_node["cfg_node_id"]),
+ "loc": sink_node["location"],
+ "sink_type": sink_node["sink_type"],
+ "sink_code": sink_node["sink_code"],
+ "program_slices": {},
+ }
+
+ program_slices_dict = storage[sink_nid]["variables"]
+ varnames = program_slices_dict.keys()
+ counter = 1
+
+
+ for varname in varnames:
+
+ program_slices = program_slices_dict[varname]["slices"]
+ num_slices = len(program_slices)
+ varname_semantic_types = program_slices_dict[varname]["semantic_types"]
+
+ idx = 0
+ for i in range(num_slices):
+ idx +=1
+ program_slice = program_slices[i]
+ loc = _get_line_of_location(program_slice[3])
+ code = program_slice[0]
+
+ if 'function(' in code:
+ code = jsbeautifier.beautify(code) # pretty print function calls
+
+
+ current_slice = {
+ "index": str(idx),
+ "loc": loc,
+ "code": code,
+ }
+
+ if i == 0 and varname in code:
+
+ a = '\n%d:%s variable=%s\n'%(counter, str(varname_semantic_types), varname)
+ counter += 1
+ b = """\t%s (loc:%s)- %s\n"""%(str(idx), loc,code)
+ print_buffer+= [a, b]
+
+ if varname not in json_flow_object["program_slices"]:
+ json_flow_object["program_slices"][varname] = {
+ "semantic_types": varname_semantic_types,
+ "slices": [current_slice],
+ }
+ else:
+ json_flow_object["program_slices"][varname]["slices"].append(current_slice)
+
+ else:
+ a = """\t%s (loc:%s)- %s\n"""%(str(idx), loc,code)
+ print_buffer += [a]
+
+ if varname not in json_flow_object["program_slices"]:
+ json_flow_object["program_slices"][varname] = {
+ "semantic_types": varname_semantic_types,
+ "slices": [current_slice],
+ }
+ else:
+ json_flow_object["program_slices"][varname]["slices"].append(current_slice)
+
+ json_buffer["flows"].append(json_flow_object)
+ print_buffer.append('\n\n')
+ print_buffer.append(sep_sub)
+
+ output_file = os.path.join(webpage_directory, "sinks.flows.out")
+ with open(output_file, "w+") as fd:
+ for line in print_buffer:
+ fd.write(line)
+
+ output_file_json = os.path.join(webpage_directory, "sinks.flows.out.json")
+ with open(output_file_json, "w+") as fd:
+ json.dump(json_buffer, fd, ensure_ascii=False, indent=4)
+
+
+ LOGGER.info('[TR] finished running the queries.')
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
index 18fb717..037d964 100644
--- a/config.yaml
+++ b/config.yaml
@@ -94,6 +94,12 @@ cs_csrf:
static: false
static_neo4j: false
+open_redirect:
+ enabled: false
+ passes:
+ crawling: false
+ static: false
+ static_neo4j: false
request_hijacking:
enabled: true
diff --git a/install.sh b/install.sh
index 678e0cd..1f22a58 100755
--- a/install.sh
+++ b/install.sh
@@ -14,6 +14,7 @@ sudo apt install -y chromium-browser
(cd analyses/domclobbering && npm install)
(cd analyses/cs_csrf && npm install)
(cd analyses/request_hijacking && npm install)
+(cd analyses/open_redirect && npm install)
(cd engine && npm install)
(cd engine/lib/jaw/dom-points-to && npm install)
diff --git a/run_pipeline.py b/run_pipeline.py
index f6d2c12..625fd46 100644
--- a/run_pipeline.py
+++ b/run_pipeline.py
@@ -44,6 +44,10 @@
import analyses.request_hijacking.static_analysis_py_api as request_hijacking_neo4j_analysis_api
import analyses.request_hijacking.verification_api as request_hijacking_verification_api
+import analyses.open_redirect.static_analysis_api as or_sast_model_construction_api
+import analyses.open_redirect.static_analysis_py_api as or_neo4j_analysis_api
+
+
def is_website_up(uri):
try:
response = requests.head(uri, timeout=20)
@@ -246,6 +250,7 @@ def main():
# crawling
if (config['domclobbering']['enabled'] and config['domclobbering']["passes"]["crawling"]) or \
(config['cs_csrf']['enabled'] and config['cs_csrf']["passes"]["crawling"]) or \
+ (config['open_redirect']['enabled'] and config['open_redirect']["passes"]["crawling"]) or \
(config['request_hijacking']['enabled'] and config['request_hijacking']["passes"]["crawling"]):
@@ -308,6 +313,19 @@ def main():
CSRFTraversalsModule.build_and_analyze_hpg(website_url)
LOGGER.info("finished HPG construction and analysis over neo4j for site %s."%(website_url))
+ # open redirects
+ if config['open_redirect']['enabled']:
+ # static analysis
+ if config['open_redirect']["passes"]["static"]:
+ LOGGER.info("static analysis for site %s."%(website_url))
+ or_sast_model_construction_api.start_model_construction(website_url, iterative_output=iterative_output, memory=static_analysis_memory, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite_hpg=static_analysis_overwrite_hpg)
+ LOGGER.info("successfully finished static analysis for site %s."%(website_url))
+
+ # static analysis over neo4j
+ if config['open_redirect']["passes"]["static_neo4j"]:
+ LOGGER.info("HPG construction and analysis over neo4j for site %s."%(website_url))
+ or_neo4j_analysis_api.build_and_analyze_hpg(website_url, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite=static_analysis_overwrite_hpg)
+ LOGGER.info("finished HPG construction and analysis over neo4j for site %s."%(website_url))
# request hijacking
if config['request_hijacking']['enabled']:
@@ -375,6 +393,7 @@ def main():
# crawling
if (config['domclobbering']['enabled'] and config['domclobbering']["passes"]["crawling"]) or \
(config['cs_csrf']['enabled'] and config['cs_csrf']["passes"]["crawling"]) or \
+ (config['open_redirect']['enabled'] and config['open_redirect']["passes"]["crawling"]) or \
(config['request_hijacking']['enabled'] and config['request_hijacking']["passes"]["crawling"]):
LOGGER.info("crawling site at row %s - rank %s - %s"%(g_index, website_rank, website_url))
@@ -420,6 +439,18 @@ def main():
CSRFTraversalsModule.build_and_analyze_hpg(website_url)
LOGGER.info("finished HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url))
+ # open redirect
+ if config['open_redirect']['enabled']:
+ # static analysis
+ if config['open_redirect']["passes"]["static"]:
+ LOGGER.info("static analysis for site at row %s - rank %s - %s"%(g_index, website_rank, website_url))
+ or_sast_model_construction_api.start_model_construction(website_url, iterative_output=iterative_output, memory=static_analysis_memory, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite_hpg=static_analysis_overwrite_hpg)
+ LOGGER.info("successfully finished static analysis for site at row %s - rank %s - %s"%(g_index, website_rank, website_url))
+
+ if config['open_redirect']["passes"]["static_neo4j"]:
+ LOGGER.info("HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url))
+ or_neo4j_analysis_api.build_and_analyze_hpg(website_url, timeout=static_analysis_per_webpage_timeout, overwrite=static_analysis_overwrite_hpg, compress_hpg=static_analysis_compress_hpg)
+ LOGGER.info("finished HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url))
# request hijacking