diff --git a/README.md b/README.md index f185dd9..1526c36 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ JAW has a Github pages website available at . + + + Description: + ------------ + Creates a neo4j graph db for a given webpage and runs the analysis queries + + + Usage: + ------------ + > python3 -m analyses.open_redirect.analyze_hpg_api --seedurl=http://example.com --webpage=xyz + +""" + +import os +import sys +import time +import argparse +import json +import constants as constantsModule +import utils.io as IOModule +import utils.utility as utilityModule +import docker.neo4j.manage_container as dockerModule +import hpg_neo4j.db_utility as DU +import hpg_neo4j.query_utility as QU +import analyses.open_redirect.traversals_cypher +from utils.logging import logger as LOGGER + + +def get_url_for_webpage(webpage_directory): + content = None + fd = open(os.path.join(webpage_directory, "url.out"), "r") + content = fd.read() + fd.close() + return content + +def main(): + + + BASE_DIR= constantsModule.BASE_DIR + + p = argparse.ArgumentParser(description='This script runs the tool pipeline.') + + + + p.add_argument('--seedurl', "-U", + default='http://example.com', + help='the seed URL of the app to analyze (default: %(default)s)', + type=str) + + p.add_argument('--webpage', "-W", + default='xyz', + help='webpage folder name (default: %(default)s)', + type=str) + + p.add_argument('--httpport', "-H", + default=constantsModule.NEO4J_HTTP_PORT, + help='http port for neo4j (default: %(default)s)', + type=str) + + p.add_argument('--boltport', "-B", + default=constantsModule.NEO4J_BOLT_PORT, + help='bolt port for neo4j (default: %(default)s)', + type=str) + + + + args= vars(p.parse_args()) + + seed_url = args["seedurl"] + webpage = args["webpage"] + webapp_folder_name = utilityModule.getDirectoryNameFromURL(seed_url) + + # overwrite the neo4j config for this process + neo4j_http_port = args["httpport"] + neo4j_bolt_port = args["boltport"] + + constantsModule.NEO4J_HTTP_PORT = neo4j_http_port + constantsModule.NEO4J_CONN_HTTP_STRING = "http://127.0.0.1:%s"%str(constantsModule.NEO4J_HTTP_PORT) + + constantsModule.NEO4J_BOLT_PORT = neo4j_bolt_port + constantsModule.NEO4J_CONN_STRING = "bolt://127.0.0.1:%s"%str(constantsModule.NEO4J_BOLT_PORT) + constantsModule.NEOMODEL_NEO4J_CONN_STRING = "bolt://%s:%s@127.0.0.1:%s"%(constantsModule.NEO4J_USER, constantsModule.NEO4J_PASS, constantsModule.NEO4J_BOLT_PORT) + + webpage_folder = os.path.join(constantsModule.DATA_DIR, os.path.join(webapp_folder_name, webpage)) + + # requirement: the database name must have a length between 3 and 63 characters + # must always import into the default neo4j database + neo4j_database_name = 'neo4j' + database_name = '{0}_{1}'.format(webapp_folder_name, webpage) + + nodes_file = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME) + rels_file = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME) + rels_dynamic_file = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME) + + nodes_file_gz = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME +'.gz') + rels_file_gz = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME +'.gz') + rels_dynamic_file_gz = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME +'.gz') + + if os.path.exists(nodes_file) and os.path.exists(rels_file) and os.path.exists(rels_dynamic_file): + LOGGER.info('[TR] hpg files exist in decompressed format, skipping de-compression.') + + elif os.path.exists(nodes_file_gz) and os.path.exists(rels_file_gz) and os.path.exists(rels_dynamic_file_gz): + LOGGER.info('[TR] de-compressing hpg.') + # de-compress the hpg + IOModule.decompress_graph(webpage_folder) + else: + LOGGER.error('[TR] The nodes/rels.csv files do not exist in %s, skipping.'%webpage_folder) + return False + + LOGGER.warning('[TR] removing any previous neo4j instance for %s'%str(database_name)) + DU.ineo_remove_db_instance(database_name) + + LOGGER.info('[TR] creating db %s with http port %s'%(database_name, neo4j_http_port)) + DU.ineo_create_db_instance(database_name, neo4j_http_port) + + # check if the bolt port requested by the config.yaml is not the default one + if not ( int(neo4j_http_port) + 2 == int(neo4j_bolt_port) ): + LOGGER.info('[TR] setting the requested bolt port %s for db %s'%(neo4j_bolt_port, database_name)) + DU.ineo_set_bolt_port_for_db_instance(database_name, neo4j_bolt_port) + + LOGGER.info('[TR] importing the database with neo4j-admin.') + DU.neoadmin_import_db_instance(database_name, neo4j_database_name, nodes_file, rels_file, rels_dynamic_file) + + LOGGER.info('[TR] changing the default neo4j password to enable programmatic access.') + DU.ineo_set_initial_password_and_restart(database_name, password=constantsModule.NEO4J_PASS) + + # compress the hpg after the model import + IOModule.compress_graph(webpage_folder) + + LOGGER.info('[TR] waiting for the neo4j connection to be ready...') + time.sleep(10) + LOGGER.info('[TR] connection: %s'%constantsModule.NEO4J_CONN_HTTP_STRING) + connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150, conn=constantsModule.NEO4J_CONN_HTTP_STRING) + if not connection_success: + try: + LOGGER.info('[TR] stopping neo4j for %s'%str(database_name)) + DU.ineo_stop_db_instance(database_name) + + ## remove db after analysis + DU.ineo_remove_db_instance(database_name) + except: + LOGGER.info('[TR] ran into exception while prematurely stopping neo4j for %s'%str(database_name)) + return connection_success + + LOGGER.info('[TR] starting to run the queries.') + webpage_url = get_url_for_webpage(webpage_folder) + try: + DU.exec_fn_within_transaction(traversals_cypher.run_traversals, webpage_url, webpage_folder, webpage, conn=constantsModule.NEO4J_CONN_STRING) + except Exception as e: + LOGGER.error(e) + LOGGER.error('[TR] neo4j connection error.') + outfile = os.path.join(webpage_folder, "sinks.flows.out") + if not os.path.exists(outfile): + with open(outfile, 'w+') as fd: + error_json = {"error": str(e)} + json.dump(error_json, fd, ensure_ascii=False, indent=4) + + + + ## note: these steps are done in the top level module, as timeout may occur here + LOGGER.info('[TR] stopping neo4j for %s'%str(database_name)) + DU.ineo_stop_db_instance(database_name) + + ## remove db after analysis + LOGGER.info('[TR] removing neo4j for %s'%str(database_name)) + DU.ineo_remove_db_instance(database_name) + + return connection_success + +if __name__ == "__main__": + main() diff --git a/analyses/open_redirect/globals.js b/analyses/open_redirect/globals.js new file mode 100644 index 0000000..6cb5bf6 --- /dev/null +++ b/analyses/open_redirect/globals.js @@ -0,0 +1,464 @@ +/** + * List of Window object properties + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window + */ +const window_properties = ["caches", "closed", "console", "controllers", "crossOriginIsolated", "crypto", "customElements", "defaultStatus", "devicePixelRatio", "dialogArguments", "directories", "document", "event", "frameElement", "frames", "fullScreen", "history", "indexedDB", "innerHeight", "innerWidth", "isSecureContext", "isSecureContext", "length", "localStorage", "location", "locationbar", "menubar", "mozAnimationStartTime", "mozInnerScreenX", "mozInnerScreenY", "name", "navigator", "onabort", "onafterprint", "onanimationcancel", "onanimationend", "onanimationiteration", "onappinstalled", "onauxclick", "onbeforeinstallprompt", "onbeforeprint", "onbeforeunload", "onblur", "oncancel", "oncanplay", "oncanplaythrough", "onchange", "onclick", "onclose", "oncontextmenu", "oncuechange", "ondblclick", "ondevicemotion", "ondeviceorientation", "ondeviceorientationabsolute", "ondragdrop", "ondurationchange", "onended", "onerror", "onfocus", "onformdata", "ongamepadconnected", "ongamepaddisconnected", "ongotpointercapture", "onhashchange", "oninput", "oninvalid", "onkeydown", "onkeypress", "onkeyup", "onlanguagechange", "onload", "onloadeddata", "onloadedmetadata", "onloadend", "onloadstart", "onlostpointercapture", "onmessage", "onmessageerror", "onmousedown", "onmouseenter", "onmouseleave", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onpaint", "onpause", "onplay", "onplaying", "onpointercancel", "onpointerdown", "onpointerenter", "onpointerleave", "onpointermove", "onpointerout", "onpointerover", "onpointerup", "onpopstate", "onrejectionhandled", "onreset", "onresize", "onscroll", "onselect", "onselectionchange", "onselectstart", "onstorage", "onsubmit", "ontouchcancel", "ontouchstart", "ontransitioncancel", "ontransitionend", "onunhandledrejection", "onunload", "onvrdisplayactivate", "onvrdisplayblur", "onvrdisplayconnect", "onvrdisplaydeactivate", "onvrdisplaydisconnect", "onvrdisplayfocus", "onvrdisplaypointerrestricted", "onvrdisplaypointerunrestricted", "onvrdisplaypresentchange", "onwheel", "opener", "origin", "outerHeight", "outerWidth", "pageXOffset", "pageYOffset", "parent", "performance", "personalbar", "pkcs11", "screen", "screenLeft", "screenTop", "screenX", "screenY", "scrollbars", "scrollMaxX", "scrollMaxY", "scrollX", "scrollY", "self", "sessionStorage", "sidebar", "speechSynthesis", "status", "statusbar", "toolbar", "top", "visualViewport"]; + +/** + * List of Window object methods + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window + */ +const window_methods = ["alert", "atob", "blur", "btoa", "cancelAnimationFrame", "cancelIdleCallback", "captureEvents", "clearImmediate", "clearInterval", "clearTimeout", "close", "confirm", "convertPointFromNodeToPage", "convertPointFromPageToNode", "createImageBitmap", "dump", "fetch", "find", "focus", "getComputedStyle", "getDefaultComputedStyle", "getSelection", "home", "matchMedia", "minimize", "moveBy", "moveTo", "open", "openDialog", "postMessage", "print", "prompt", "queueMicrotask", "releaseEvents", "requestAnimationFrame", "requestIdleCallback", "resizeBy", "resizeTo", "routeEvent", "scroll", "scrollBy", "scrollByLines", "scrollByPages", "scrollTo", "setCursor", "setImmediate", "setInterval", "setTimeout", "showDirectoryPicker", "showModalDialog", "showOpenFilePicker", "showSaveFilePicker", "sizeToContent", "stop", "updateCommands", "addEventListener"]; + +/** + * List of Window object event properties + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Window + */ +const window_events = ["event", "afterprint", "animationcancel", "animationend", "animationiteration", "beforeprint", "beforeunload", "blur", "copy", "cut", "DOMContentLoaded", "error", "focus", "hashchange", "languagechange", "load", "message", "messageerror", "offline", "online", "orientationchange", "pagehide", "pageshow", "paste", "popstate", "rejectionhandled", "storage", "transitioncancel", "unhandledrejection", "unload", "vrdisplayconnect", "vrdisplaydisconnect", "vrdisplaypresentchange"]; + + + +/** + * List of Window object properties + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document + */ +const document_properties = ["cookie", "activeElement", "alinkColor", "all", "anchors", "applets", "bgColor", "body", "characterSet", "childElementCount", "children", "compatMode", "contentType", "currentScript", "defaultView", "designMode", "dir", "doctype", "documentElement", "documentURI", "documentURIObject", "domain", "embeds", "fgColor", "firstElementChild", "forms", "fullscreen", "fullscreenElement", "fullscreenEnabled", "head", "height", "hidden", "images", "implementation", "lastElementChild", "lastModified", "lastStyleSheetSet", "linkColor", "links", "location", "mozSyntheticDocument", "onabort", "onafterscriptexecute", "onanimationcancel", "onanimationend", "onanimationiteration", "onauxclick", "onbeforescriptexecute", "onblur", "oncancel", "oncanplay", "oncanplaythrough", "onchange", "onclick", "onclose", "oncontextmenu", "oncuechange", "ondblclick", "ondurationchange", "onended", "onerror", "onfocus", "onformdata", "onfullscreenchange", "onfullscreenerror", "ongotpointercapture", "oninput", "oninvalid", "onkeydown", "onkeypress", "onkeyup", "onload", "onloadeddata", "onloadedmetadata", "onloadend", "onloadstart", "onlostpointercapture", "onmousedown", "onmouseenter", "onmouseleave", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onoffline", "ononline", "onpause", "onplay", "onplaying", "onpointercancel", "onpointerdown", "onpointerenter", "onpointerleave", "onpointermove", "onpointerout", "onpointerover", "onpointerup", "onreset", "onresize", "onscroll", "onselect", "onselectionchange", "onselectstart", "onsubmit", "ontouchcancel", "ontouchstart", "ontransitioncancel", "ontransitionend", "onvisibilitychange", "onwheel", "pictureInPictureElement", "pictureInPictureEnabled", "plugins", "pointerLockElement", "popupNode", "preferredStyleSheetSet", "readyState", "referrer", "rootElement", "scripts", "scrollingElement", "selectedStyleSheetSet", "styleSheets", "styleSheetSets", "timeline", "title", "tooltipNode", "URL", "visibilityState", "vlinkColor", "width", "xmlEncoding", "xmlVersion"]; + +/** + * List of document object methods + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document + */ +const document_methods = ["adoptNode", "append", "caretPositionFromPoint", "caretRangeFromPoint", "clear", "close", "createAttribute", "createCDATASection", "createComment", "createDocumentFragment", "createElement", "createElementNS", "createEntityReference", "createEvent", "createExpression", "createExpression", "createNodeIterator", "createNSResolver", "createNSResolver", "createProcessingInstruction", "createRange", "createTextNode", "createTouch", "createTouchList", "createTreeWalker", "elementFromPoint", "elementsFromPoint", "enableStyleSheetsForSet", "evaluate", "evaluate", "execCommand", "exitFullscreen", "exitPictureInPicture", "exitPointerLock", "getAnimations", "getBoxObjectFor", "getElementById", "getElementsByClassName", "getElementsByName", "getElementsByTagName", "getElementsByTagNameNS", "getSelection", "hasFocus", "hasStorageAccess", "importNode", "mozSetImageElement", "open", "prepend", "queryCommandEnabled", "queryCommandSupported", "querySelector", "querySelectorAll", "registerElement", "releaseCapture", "replaceChildren", "requestStorageAccess", "write", "writeln", "addEventListener"]; + +/** + * List of document object event properties + * Fetch from: https://developer.mozilla.org/en-US/docs/Web/API/Document + */ +const document_events = ["animationcancel", "animationend", "animationiteration", "animationstart", "copy", "cut", "DOMContentLoaded", "drag", "dragend", "dragenter", "dragleave", "dragover", "dragstart", "drop", "fullscreenchange", "fullscreenerror", "gotpointercapture", "keydown", "keypress", "keyup", "lostpointercapture", "paste", "pointercancel", "pointerdown", "pointerenter", "pointerleave", "pointerlockchange", "pointerlockerror", "pointermove", "pointerout", "pointerover", "pointerup", "readystatechange", "scroll", "selectionchange", "selectstart", "touchcancel", "touchend", "touchmove", "touchstart", "transitioncancel", "transitionend", "transitionrun", "transitionstart", "visibilitychange", "wheel"]; + + +const _2nd_level_method_calls = ["appendChild", "console", "log"]; + + + +const all_global_props = window_properties.concat(document_properties,document_methods,window_methods); + + +// List curated through aggregation of other lists +// - https://www.w3schools.com/jsref/jsref_obj_array.asp +var js_builtin_methods = new Set([ + + // Functions + "apply", + "call", + "bind", + + // Arrays + "concat", + "copyWithin", + "entries", + "every", + "fill", + "filter", + "find", + "findIndex", + "forEach", + "from", + "includes", + "indexOf", + "isArray", + "join", + "keys", + "lastIndexOf", + "map", + "pop", + "push", + "reduce", + "reduceRight", + "reverse", + "shift", + "slice", + "some", + "sort", + "splice", + "toString", + "unshift", + "valueOf", + + // Boolean + "toString", + "valueOf", + + // Classes + "extends", + "super", + "static", + + // Date + "getDate", + "getDay", + "getFullYear", + "getHours", + "getMilliseconds", + "getMinutes", + "getMonth", + "getSeconds", + "getTime", + "getTimezoneOffset", + "getUTCDate", + "getUTCDay", + "getUTCFullYear", + "getUTCHours", + "getUTCMilliseconds", + "getUTCMinutes", + "getUTCMonth", + "getUTCSeconds", + "getYear", + "now", + "parse", + "setDate", + "setFullYear", + "setHours", + "setMilliseconds", + "setMinutes", + "setMonth", + "setSeconds", + "setTime", + "setUTCDate", + "setUTCFullYear", + "setUTCHours", + "setUTCMilliseconds", + "setUTCMinutes", + "setUTCMonth", + "setUTCSeconds", + "setYear", + "toDateString", + "toGMTString", + "toISOString", + "toJSON", + "toLocaleDateString", + "toLocaleTimeString", + "toLocaleString", + "toString", + "toTimeString", + "toUTCString", + "UTC", + "valueOf", + + // JSON + "parse", + "stringify", + + // MATH + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "cbrt", + "ceil", + "clz32", + "cos ", + "cosh", + "exp", + "expm1", + "floor", + "fround", + "log", + "log10", + "log1p", + "log2", + "max", + "min", + "pow", + "random", + "round", + "sign ", + "sin ", + "sinh", + "sqrt", + "tan", + "tanh", + "trunc", + + // Number + "isFinite", + "isInteger", + "isNaN", + "isSafeInteger", + "toExponential", + "toFixed", + "toLocaleString", + "toPrecision", + "toString", + "valueOf", + + // RegExp + "compile", + + // String + "charAt", + "charCodeAt", + "concat", + "endsWith", + "fromCharCode", + "includes", + "indexOf", + "lastIndexOf", + "localeCompare", + "match", + "repeat", + "replace", + "search", + "slice", + "split", + "startsWith", + "substr", + "substring", + "toLocaleLowerCase", + "toLocaleUpperCase", + "toLowerCase", + "toString", + "toUpperCase", + "trim", + "valueOf" +]); + + +js_builtin_methods = Array.from(js_builtin_methods) +const buildint_dom_api = [ + "window", + "document" +].concat(all_global_props) + +const js_builtin = js_builtin_methods.concat(buildint_dom_api); + + + +var lib_content_heuristics = [ + // jquery + "*! jQuery v", + "(c) OpenJS Foundation and other contributors | jquery.org/license", + "jQuery Foundation, Inc. | jquery.org/license *", + // bootstrap + "* Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)", + "* Bootstrap v", + // prototype + "* Prototype JavaScript framework, version", + // angular js + "@license AngularJS v", + "Google LLC. http://angularjs.org", + "AngularJS v", + // react + "* @license React", + // d3 + "https://d3js.org v", + // require js + "* @license r.js ", + "* @license RequireJS ", + // ext js + "This file is part of Ext JS ", + "Contact: http://www.sencha.com/contact", + // leaflet + "* Leaflet " +]; + + +// one-to-one mapping between the name of the library and the heuristic +// as in `lib_content_heuristics` list +var lib_content_heuristics_names = [ + 'jquery', + 'jquery', + 'jquery', + 'bootstrap', + 'bootstrap', + 'prototype', + 'angularjs', + 'angularjs', + 'angularjs', + 'reactjs', + 'd3js', + 'requirejs', + 'requirejs', + 'extjs', + 'extjs', + 'leaflet', +]; + +var lib_src_heuristics = [ + // common cdns + "unpkg.com/", + "ajax.googleapis.com/ajax/libs/", + "cdnjs.cloudflare.com/ajax/libs/", + // custom + "lib/", + "libs/", + "/libraries/", + // library names + "gajs", // google analytics + "google-analytics-js", + "analytics.js", + "gwt", + "ink", + "vaadin", + "bootstrap", + "zurb", + "polymer", + "highcharts", + "infovis", + "flotcharts", + "createjs", + "googlemaps", + "google-maps", + "jquery", + "jqueryui", + "dojo", + "prototype", + "scriptaculous", + "mootools", + "spry", + "yui", + "yui2", + "yui3", + "qooxdoo", + "extjs", + "ext.js", + "ext-all.js", + "base2", + "closurelibrary", + "raphaë", + "react", + "reactjs", + "nextjs", + "next.js", + "preact", + "preactjs", + "modernizr", + "processingjs", + "backbone", + "leaflet", + "mapbox", + "lo-dash", + "underscore", + "sammy", + "rico", + "mochikit", + "graphaë", + "glow", + "socketio", + "socket.io", + "mustache", + "fabricjs", + "fabric.js", + "fusejs", + "fuse.js", + "tweenjs", + "sproutcore", + "zeptojs", + "threejs", + "three", + "three.js", + "philogl", + "camanjs", + "yepnope", + "labjs", + "headjs", + "controljs", + "requirejs", + "require.js", + "rightjs", + "jquerytools", + "pusher", + "paperjs", + "swiffy", + "movejs", + "amplifyjs", + "popcornjs", + "d3js", + "d3.", + "handlebars", + "knockout", + "spine", + "jquerymobile", + "webfontloader", + "angular", + "angularjs", + "angular.js", + "emberjs", + "ember.js", + "hammerjs", + "visibilityjs", + "velocityjs", + "ifvisiblejs", + "pixijs", + "dcjs", + "greensockjs", + "fastclick", + "isotope", + "marionette", + "canjs", + "vuejs", + "vue.cjs", + "vue.global.js", + "vue", + "nuxtjs", + "twojs", + "two.js", + "brewser", + "materialdesignlite", + "material-design-lite", + "kendoui", + "matterjs", + "riotjs", + "seajs", + "momentjs", + "momenttimezone", + "scrollmagic", + "swfobject", + "flexslider", + "spfjs", + "numeraljs", + "boomerangjs", + "boomerang.js", + "framerjs", + "marko", + "ampjs", + "gatsby", + "shopify", + "magentojs", + "wordpress", + "wix", + "workbox", + "bpmn-js", + "googletagmanager", + "gtm.js" +]; + + + +module.exports = { + js_builtin: js_builtin, + lib_src_heuristics: lib_src_heuristics, + lib_content_heuristics: lib_content_heuristics +}; + + + + + + + + + + + + + + + + + + diff --git a/analyses/open_redirect/package.json b/analyses/open_redirect/package.json new file mode 100644 index 0000000..388bfca --- /dev/null +++ b/analyses/open_redirect/package.json @@ -0,0 +1,47 @@ +{ + "name": "cs_csrf", + "version": "1.0.0", + "description": "", + "main": "static_analysis.js", + "dependencies": { + "crypto": "^1.0.1", + "elapsed-time-logger": "^1.1.7", + "fs": "0.0.1-security", + "js-beautify": "^1.15.1", + "path": "^0.12.7", + "process": "^0.11.10", + "puppeteer": "^23.9.0", + "core-js": "^3.39.0", + "csv-writer": "^1.6.0", + "domino": "^2.1.6", + "escodegen": "^2.1.0", + "esgraph": "*", + "espree": "^10.3.0", + "esprima": "^4.0.1", + "filbert": "^0.1.20", + "latest": "^0.2.0", + "open": "10.1.0", + "php-parser": "^3.2.1", + "process.argv": "^0.6.1", + "util": "^0.12.5", + "walkes": "*" + }, + "devDependencies": { + "async": "^3.2.6", + "blanket": "^1.2.3", + "grunt": "^1.6.1", + "grunt-jsdoc": "^2.4.1", + "grunt-mocha-istanbul": "^5.0.2", + "grunt-mocha-test": "^0.13.3", + "grunt-strip-code": "^1.0.12", + "istanbul": "*", + "jsdoc": "^4.0.4", + "mocha": "*", + "should": "*" + }, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "Soheil Khodayari", + "license": "AGPL3" +} diff --git a/analyses/open_redirect/semantic_types.js b/analyses/open_redirect/semantic_types.js new file mode 100644 index 0000000..0fcfba4 --- /dev/null +++ b/analyses/open_redirect/semantic_types.js @@ -0,0 +1,29 @@ +/* + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + Sematic Types for open redirect vulnerabilites +*/ + + + +var semanticTypes = {}; + +// write +semanticTypes.WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL"; +semanticTypes.WR_WIN_LOC_URL = "WR_WIN_LOC_URL"; +semanticTypes.WR_FRAME_URL = "WR_FRAME_URL"; + +module.exports = semanticTypes; \ No newline at end of file diff --git a/analyses/open_redirect/semantic_types.py b/analyses/open_redirect/semantic_types.py new file mode 100644 index 0000000..7d1a2e5 --- /dev/null +++ b/analyses/open_redirect/semantic_types.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + Semantic Types for open redirect vulnerabilities + + Usage: + ----------- + > import analyses.open_redirect.semantic_types as SemTypeDefinitions + +""" + +# write +WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL"; +WR_WIN_LOC_URL = "WR_WIN_LOC_URL"; + +# non reachable +NON_REACHABLE = "NON_REACH" + +# read +RD_WIN_LOC = "RD_WIN_LOC" +RD_WIN_NAME = "RD_WIN_NAME" +RD_DOC_REF = "RD_DOC_REF" +RD_PM = "RD_PM" +RD_WEB_STORAGE = "RD_WEB_STORAGE" +RD_DOM_TREE = "RD_DOM" +RD_COOKIE = "RD_COOKIE" + diff --git a/analyses/open_redirect/static_analysis.js b/analyses/open_redirect/static_analysis.js new file mode 100644 index 0000000..a2c8dbb --- /dev/null +++ b/analyses/open_redirect/static_analysis.js @@ -0,0 +1,443 @@ +/* + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + Static analysis main +*/ + + +/** + * ------------------------------------------------ + * third-party imports + * ------------------------------------------------ +**/ +const fs = require('fs'); +const pathModule = require('path'); +const crypto = require('crypto') +const argv = require("process.argv"); +const elapsed = require("elapsed-time-logger"); + +/** + * ------------------------------------------------ + * module imports + * ------------------------------------------------ +**/ +const constantsModule = require('./../../engine/lib/jaw/constants'); +const globalsModule = require('./globals.js'); +const SourceSinkAnalyzerModule = require('./traversals.js'); +const SourceSinkAnalyzer = SourceSinkAnalyzerModule.OpenRedirectSourceSinkAnalyzer; + +const GraphExporter = require('./../../engine/core/io/graphexporter'); + +/** + * ------------------------------------------------ + * constants and globals + * ------------------------------------------------ +**/ + +// directory where the data of the crawling will be saved +const BASE_DIR = pathModule.resolve(__dirname, '../..') +const dataStorageDirectory = pathModule.join(BASE_DIR, 'data'); + + +// when true, nodejs will log the current step for each webpage to the console +const DEBUG = true; + +const do_ast_preprocessing_passes = false; +var do_compress_graphs = true; +var overwrite_hpg = false; +var iterative_output = false; + +const FOXHOUND_EDGES = true; + +/** + * ------------------------------------------------ + * utility functions + * ------------------------------------------------ +**/ + +const withTimeout = (millis, promise) => { + const timeout = new Promise((resolve, reject) => + setTimeout( + () => reject(`Timed out after ${millis} ms.`), + millis)); + return Promise.race([ + promise, + timeout + ]); +}; + + +/** + * @function readFile + * @param file_path_name: absolute path of a file. + * @return the text content of the given file if it exists, otherwise -1. +**/ +function readFile(file_path_name){ + try { + const data = fs.readFileSync(file_path_name, 'utf8') + return data; + } catch (err) { + // console.error(err); + return -1; + } +} + + +/** + * @function getNameFromURL + * @param url: eTLD+1 domain name + * @return converts the url to a string name suitable for a directory by removing the colon and slash symbols +**/ +function getNameFromURL(url){ + return url.replace(/\:/g, '-').replace(/\//g, ''); +} + + +/** + * @function hashURL + * @param url: string + * @return returns the SHA256 hash of the given input in hexa-decimal format +**/ +function hashURL(url){ + const hash = crypto.createHash('sha256').update(url, 'utf8').digest('hex'); + return hash; +} + + +/** + * @function getOrCreateDataDirectoryForWebsite + * @param url: string + * @return creates a directory to store the data of the input url and returns the directory name. +**/ +function getOrCreateDataDirectoryForWebsite(url){ + const folderName = getNameFromURL(url); + const folderPath = pathModule.join(dataStorageDirectory, folderName); + if(!fs.existsSync(folderPath)){ + fs.mkdirSync(folderPath); + } + return folderPath; +} + + + +/** + * @function isLibraryScript + * @param {string} script: script src (when `mode: src`) or script content (when `mode: content`) + * @param {string} options: determines the type of the `script` param (format `{mode: type}` with types being `src` or `content`) + * @return {boolean} whether or not the input is a library script +**/ +function isLibraryScript(script, options){ + + let return_flag = false; + + if(options.mode === 'src'){ + + let script_src = script.toLowerCase(); + for(let h of globalsModule.lib_src_heuristics){ + if(script_src.includes(h)){ // check script src + return_flag = true; + break; + } + } + + }else{ // [options.mode === 'content'] + + let script_content = script.toLowerCase(); + for(let h of globalsModule.lib_content_heuristics){ + if(script_content.includes(h)){ // check script content + return_flag = true; + break; + } + } + } + + return return_flag; +} + +/** + * ------------------------------------------------ + * Main Static Analysis Thread + * ------------------------------------------------ +**/ + + +async function staticallyAnalyzeWebpage(url, webpageFolder){ + + let results_timing_file = pathModule.join(webpageFolder, "time.static_analysis.out"); + if(!overwrite_hpg && fs.existsSync(results_timing_file)){ + DEBUG && console.log('[skipping] results already exists for: '+ webpageFolder) + return 1; + } + + // read the crawled scripts from disk + let scripts = []; + var sourcemaps = {}; + let dirContent = fs.readdirSync( webpageFolder ); + + + let scripts_mapping = {}; + let scripts_mapping_content = await readFile(pathModule.join(webpageFolder, 'scripts_mapping.json')); + if(scripts_mapping_content != -1){ + try{ + scripts_mapping = JSON.parse(scripts_mapping_content); + } + catch{ + // PASS + } + } + + + var library_scripts = []; + let scriptFiles = dirContent.filter(function( elm ) {return elm.match(/.*\.(js$)/ig);}); + for(let i=0; i 10){ + + if(fs.existsSync(single_folder)){ + var urlContent = readFile(pathModule.join(single_folder, "url.out")); + if(urlContent != -1){ + var webpageUrl = urlContent.trim(); + await staticallyAnalyzeWebpage(webpageUrl, single_folder); + } + }else{ + console.log('[Warning] the following directory does not exists, but was marked for static analysis: '+ webpageFolder +'\n url is: '+ webpageUrl); + } + + }else{ + + const dataDirectory = getOrCreateDataDirectoryForWebsite(seedurl); + const urlsFile = pathModule.join(dataDirectory, "urls.out"); + const urlsFileContent = readFile(urlsFile); + + if(urlsFileContent != -1){ + + const globalTimer = elapsed.start('global_static_timer'); + + const urls = new Set(urlsFileContent.split("\n")); // do not consider duplicate urls + + for(let webpageUrl of urls.values()){ + + if(webpageUrl.trim().length > 1 ){ // eliminate empty strings + let _hash = hashURL(webpageUrl); + let webpageFolder = pathModule.join(dataDirectory, _hash); + if(fs.existsSync(webpageFolder)){ + await staticallyAnalyzeWebpage(webpageUrl, webpageFolder); + + }else{ + console.log('[Warning] the following directory does not exists, but was marked for static analysis: '+ webpageFolder +'\n url is: '+ webpageUrl); + } + } + } + + const globalTime = globalTimer.get(); + globalTimer.end(); + fs.writeFileSync(pathModule.join(dataDirectory, "time.static_analysis.out"), JSON.stringify({ + "total_static_timer": globalTime, + })); + + } + else{ + console.log('[Warning] urls.out is empty for website: '+ seedurl +', thus exiting static-analysis pass.') + } + } + +})(); + + + + + + diff --git a/analyses/open_redirect/static_analysis_api.py b/analyses/open_redirect/static_analysis_api.py new file mode 100644 index 0000000..1722317 --- /dev/null +++ b/analyses/open_redirect/static_analysis_api.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + API for running the open redirect preliminary analyses (i.e., property graph construction and identifying sinks) + + + Usage: + ------------ + $ start_model_construction(website_url, memory, timeout) + +""" + + + +import os, sys, json +import utils.io as IOModule +import constants as constantsModule +import utils.utility as utilityModule +from utils.logging import logger as LOGGER + + + +def start_model_construction(website_url, iterative_output='false', memory=None, timeout=None, compress_hpg='true', overwrite_hpg='false', specific_webpage=None): + + # setup defaults + if memory is None: + static_analysis_memory = '32000' + else: + static_analysis_memory = memory + + if timeout is None: + static_analysis_per_webpage_timeout = 600 # seconds + else: + static_analysis_per_webpage_timeout = timeout + + + open_redirect_analyses_command_cwd = os.path.join(constantsModule.BASE_DIR, "analyses/open_redirect") + open_redirect_static_analysis_driver_program = os.path.join(open_redirect_analyses_command_cwd, "static_analysis.js") + + open_redirect_static_analysis_command = "node --max-old-space-size=%s DRIVER_ENTRY --singlefolder=SINGLE_FOLDER --compresshpg=%s --overwritehpg=%s --iterativeoutput=%s"%(static_analysis_memory, compress_hpg, overwrite_hpg, iterative_output) + open_redirect_static_analysis_command = open_redirect_static_analysis_command.replace("DRIVER_ENTRY", open_redirect_static_analysis_driver_program) + + + website_folder_name = utilityModule.getDirectoryNameFromURL(website_url) + website_folder = os.path.join(constantsModule.DATA_DIR, website_folder_name) + + webpages_json_file = os.path.join(website_folder, 'webpages.json') + urls_file = os.path.join(website_folder, 'urls.out') + + + if specific_webpage is not None: + webpage_folder = os.path.join(constantsModule.DATA_DIR, specific_webpage) + if os.path.exists(webpage_folder): + node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder) + IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True) + + elif os.path.exists(webpages_json_file): + + fd = open(webpages_json_file, 'r') + webpages = json.load(fd) + fd.close() + + for webpage in webpages: + webpage_folder = os.path.join(website_folder, webpage) + if os.path.exists(webpage_folder): + + node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder) + IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True) + + + + elif os.path.exists(urls_file): + message = 'webpages.json file does not exist, falling back to urls.out' + LOGGER.warning(message) + + # read the urls from the webpage data + fd = open(urls_file, 'r') + urls = fd.readlines() + fd.close() + + # make sure that the list of urls is unique + # this would eliminate the cases where the crawler is executed multiple times for the same site + # without deleting the data of the old crawl and thus adds duplicate urls to urls.out file. + urls = list(set(urls)) + + for url in urls: + url = url.strip().rstrip('\n').strip() + webpage_folder_name = utilityModule.sha256(url) + webpage_folder = os.path.join(website_folder, webpage_folder_name) + if os.path.exists(webpage_folder): + node_command= open_redirect_static_analysis_command.replace('SINGLE_FOLDER', webpage_folder) + IOModule.run_os_command(node_command, cwd=open_redirect_analyses_command_cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True) + + else: + message = 'no webpages.json or urls.out file exists in the webapp directory; skipping analysis...' + LOGGER.warning(message) + diff --git a/analyses/open_redirect/static_analysis_py_api.py b/analyses/open_redirect/static_analysis_py_api.py new file mode 100644 index 0000000..4597a2f --- /dev/null +++ b/analyses/open_redirect/static_analysis_py_api.py @@ -0,0 +1,353 @@ +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + Detecting open redirect vulnerabilities + + + Usage: + ------------ + > import analyses.open_redirect.static_analysis_py_api as open_redirect_py_api + +""" + + +import os +import sys +import time +import json +import constants as constantsModule +import utils.io as IOModule +import docker.neo4j.manage_container as dockerModule +import hpg_neo4j.db_utility as DU +import hpg_neo4j.query_utility as QU +import analyses.open_redirect.traversals_cypher as open_redirect_py_traversals +from utils.logging import logger as LOGGER + + + +def get_url_for_webpage(webpage_directory): + content = None + fd = open(os.path.join(webpage_directory, "url.out"), "r") + content = fd.read() + fd.close() + return content + + +def get_name_from_url(url): + + """ + @param url: eTLD+1 domain name + @return converts the url to a string name suitable for a directory by removing the colon and slash symbols + + """ + return url.replace(':', '-').replace('/', '') + + + +# ------------------------------------------------------------------------------------ # +# Interface +# ------------------------------------------------------------------------------------ # +def build_and_analyze_hpg(seed_url, timeout=1800, overwrite=False, compress_hpg=True): + + """ + @param {string} seed_url + @param {integer} timeout: per page static analysis timeout + @description: imports an HPG inside a neo4j graph database and runs traversals over it. + """ + + if str(constantsModule.NEO4J_USE_DOCKER).lower() == 'true': + build_and_analyze_hpg_docker(seed_url, conn_timeout=timeout) + else: + build_and_analyze_hpg_local(seed_url, overwrite=overwrite, conn_timeout=timeout, compress_hpg=compress_hpg) + + # if timeout is not None: + # build_and_analyze_hpg_local_with_timeout(seed_url, timeout=timeout, overwrite=overwrite) + # else: + # if constantsModule.NEO4J_USE_DOCKER: + # build_and_analyze_hpg_docker(seed_url) + # else: + # build_and_analyze_hpg_local(seed_url, overwrite=overwrite) + + + +def build_and_analyze_hpg_local_with_timeout(seed_url, timeout=None, overwrite=False): + + if timeout is None: + static_analysis_per_webpage_timeout = 1800 # 30 mins (default) + else: + static_analysis_per_webpage_timeout = timeout + + webapp_folder_name = get_name_from_url(seed_url) + webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name) + if not os.path.exists(webapp_data_directory): + LOGGER.error("[TR] did not found the directory for HPG analysis: "+str(webapp_data_directory)) + return -1 + + webpages_json_file = os.path.join(webapp_data_directory, "webpages.json") + + if os.path.exists(webpages_json_file): + LOGGER.info('[TR] reading webpages.json') + fd = open(webpages_json_file, 'r') + webapp_pages = json.load(fd) + fd.close() + + else: + LOGGER.info('[TR] webpages.json does not exist; falling back to filesystem.') + # fall back to analyzing all pages if the `webpages.json` file is missing + webapp_pages = os.listdir(webapp_data_directory) + # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler) + webapp_pages = [item for item in webapp_pages if len(item) == 64] + + + for webpage in webapp_pages: + webpage_folder = os.path.join(webapp_data_directory, webpage) + if os.path.exists(webpage_folder): + + LOGGER.warning('[TR] HPG analyis for: %s'%(webpage_folder)) + + # do NOT re-analyze webpages + if str(overwrite).lower() == 'false': + OUTPUT_FILE = os.path.join(webpage_folder, "sinks.flows.out") + if os.path.exists(OUTPUT_FILE): + LOGGER.info('[TR] analyis results already exists for webpage: %s'%webpage_folder) + continue + + # note: the neo4j ports as in the config.yaml must be passed to the new process + command = "python3 -m analyses.open_redirect.analyze_hpg_api --seedurl={0} --webpage={1} --httpport={2} --boltport={3}".format(seed_url, webpage, constantsModule.NEO4J_HTTP_PORT, constantsModule.NEO4J_BOLT_PORT) + cwd = constantsModule.BASE_DIR + ret = IOModule.run_os_command(command, cwd= cwd, timeout=static_analysis_per_webpage_timeout, print_stdout=True, log_command=True) + + if ret < 0: + ## safely finish the analysis, even when timeout occurs + database_name = '{0}_{1}'.format(webapp_folder_name, webpage) + LOGGER.info('[TR] stopping neo4j for %s'%str(database_name)) + DU.ineo_stop_db_instance(database_name) + + ## remove db after analysis + LOGGER.info('[TR] removing neo4j for %s'%str(database_name)) + DU.ineo_remove_db_instance(database_name) + + LOGGER.info('[TR] finished HPG analyis for: %s'%(webpage_folder)) + + +def build_and_analyze_hpg_local(seed_url, overwrite=False, conn_timeout=None, compress_hpg=True): + + webapp_folder_name = get_name_from_url(seed_url) + webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name) + if not os.path.exists(webapp_data_directory): + LOGGER.error("[TR] did not found the directory for HPG analysis: "+str(webapp_data_directory)) + return -1 + + webpages_json_file = os.path.join(webapp_data_directory, "webpages.json") + + if os.path.exists(webpages_json_file): + LOGGER.info('[TR] reading webpages.json') + fd = open(webpages_json_file, 'r') + webapp_pages = json.load(fd) + fd.close() + + else: + LOGGER.info('[TR] webpages.json does not exist; falling back to filesystem.') + # fall back to analyzing all pages if the `webpages.json` file is missing + webapp_pages = os.listdir(webapp_data_directory) + # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler) + webapp_pages = [item for item in webapp_pages if len(item) == 64] + + + for webpage in webapp_pages: + webpage_folder = os.path.join(webapp_data_directory, webpage) + if os.path.exists(webpage_folder): + + LOGGER.warning('[TR] HPG analyis for: %s'%(webpage_folder)) + + if str(overwrite).lower() == 'false': + # do NOT re-analyze webpages + OUTPUT_FILE = os.path.join(webpage_folder, "sinks.flows.out") + if os.path.exists(OUTPUT_FILE): + LOGGER.info('[TR] analyis results already exists for webpage: %s'%webpage_folder) + continue + + # requirement: the database name must have a length between 3 and 63 characters + # must always import into the default neo4j database + neo4j_database_name = 'neo4j' + + database_name = '{0}_{1}'.format(webapp_folder_name, webpage) + + nodes_file = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME) + rels_file = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME) + rels_dynamic_file = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME) + + nodes_file_gz = os.path.join(webpage_folder, constantsModule.NODE_INPUT_FILE_NAME +'.gz') + rels_file_gz = os.path.join(webpage_folder, constantsModule.RELS_INPUT_FILE_NAME +'.gz') + rels_dynamic_file_gz = os.path.join(webpage_folder, constantsModule.RELS_DYNAMIC_INPUT_FILE_NAME +'.gz') + + if os.path.exists(nodes_file) and os.path.exists(rels_file) and os.path.exists(rels_dynamic_file): + LOGGER.info('[TR] hpg files exist in decompressed format, skipping de-compression.') + + elif os.path.exists(nodes_file_gz) and os.path.exists(rels_file_gz) and os.path.exists(rels_dynamic_file_gz): + LOGGER.info('[TR] de-compressing hpg.') + # de-compress the hpg + IOModule.decompress_graph(webpage_folder) + else: + LOGGER.error('[TR] The nodes/rels.csv files do not exist in %s, skipping.'%webpage_folder) + continue + + neo4j_http_port = constantsModule.NEO4J_HTTP_PORT + neo4j_bolt_port = constantsModule.NEO4J_BOLT_PORT + + LOGGER.warning('[TR] removing any previous neo4j instance for %s'%str(database_name)) + DU.ineo_remove_db_instance(database_name) + + LOGGER.info('[TR] creating db %s with http port %s'%(database_name, neo4j_http_port)) + DU.ineo_create_db_instance(database_name, neo4j_http_port) + + # check if the bolt port requested by the config.yaml is not the default one + if not ( int(neo4j_http_port) + 2 == int(neo4j_bolt_port) ): + LOGGER.info('[TR] setting the requested bolt port %s for db %s'%(neo4j_bolt_port, database_name)) + DU.ineo_set_bolt_port_for_db_instance(database_name, neo4j_bolt_port) + + LOGGER.info('[TR] importing the database with neo4j-admin.') + DU.neoadmin_import_db_instance(database_name, neo4j_database_name, nodes_file, rels_file, rels_dynamic_file) + + LOGGER.info('[TR] changing the default neo4j password to enable programmatic access.') + DU.ineo_set_initial_password_and_restart(database_name, password=constantsModule.NEO4J_PASS) + + if str(compress_hpg).lower() == 'true': + # compress the hpg after the model import + IOModule.compress_graph(webpage_folder) + + LOGGER.info('[TR] waiting for the neo4j connection to be ready...') + time.sleep(10) + LOGGER.info('[TR] connection: %s'%constantsModule.NEO4J_CONN_HTTP_STRING) + connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150, conn=constantsModule.NEO4J_CONN_HTTP_STRING) + if not connection_success: + try: + LOGGER.info('[TR] stopping neo4j for %s'%str(database_name)) + DU.ineo_stop_db_instance(database_name) + + ## remove db after analysis + DU.ineo_remove_db_instance(database_name) + except: + LOGGER.info('[TR] ran into exception while prematurely stopping neo4j for %s'%str(database_name)) + continue + + LOGGER.info('[TR] starting to run the queries.') + webpage_url = get_url_for_webpage(webpage_folder) + try: + DU.exec_fn_within_transaction(open_redirect_py_traversals.run_traversals, webpage_url, webpage_folder, webpage, conn=constantsModule.NEO4J_CONN_STRING, conn_timeout=conn_timeout) + except Exception as e: + LOGGER.error(e) + LOGGER.error('[TR] neo4j connection error.') + outfile = os.path.join(webpage_folder, "sinks.flows.out") + if not os.path.exists(outfile): + with open(outfile, 'w+') as fd: + error_json = {"error": str(e)} + json.dump(error_json, fd, ensure_ascii=False, indent=4) + + LOGGER.info('[TR] stopping neo4j for %s'%str(database_name)) + DU.ineo_stop_db_instance(database_name) + + ## remove db after analysis + LOGGER.info('[TR] removing neo4j for %s'%str(database_name)) + DU.ineo_remove_db_instance(database_name) + + +def build_and_analyze_hpg_docker(seed_url, conn_timeout=None): + + """ + @param {string} seed_url + @description: imports an HPG inside a neo4j docker instance and runs traversals over it. + + """ + webapp_folder_name = get_name_from_url(seed_url) + webapp_data_directory = os.path.join(constantsModule.DATA_DIR, webapp_folder_name) + if not os.path.exists(webapp_data_directory): + LOGGER.error("[Traversals] did not found the directory for HPG analysis: "+str(webapp_data_directory)) + return -1 + + webapp_pages = os.listdir(webapp_data_directory) + # the name of each webpage folder is a hex digest of a SHA256 hash (as stored by the crawler) + webapp_pages = [item for item in webapp_pages if len(item) == 64] + + + # neo4j config + build = True + build_container = True + query = True + stop_container = True + + # must use the default docker container db name which is the only active db in docker + database_name = 'neo4j' + container_name = 'neo4j_container_' + + for each_webpage in webapp_pages: + + relative_import_path = os.path.join(webapp_folder_name, each_webpage) + container_name = container_name + each_webpage + webpage = os.path.join(webapp_data_directory, each_webpage) + LOGGER.warning('HPG for: %s'%(webpage)) + + # de-compress the hpg + IOModule.decompress_graph(webpage) + + # import the CSV files into an active neo4j database inside a docker container + if build: + nodes_file = os.path.join(webpage, constantsModule.NODE_INPUT_FILE_NAME) + rels_file = os.path.join(webpage, constantsModule.RELS_INPUT_FILE_NAME) + if not (os.path.exists(nodes_file) and os.path.exists(rels_file)): + LOGGER.error('The HPG nodes.csv / rels.csv files do not exist in the provided folder, skipping...') + continue + + # must build a container only once + if build_container: + + # remove the old container & database if it exists + dockerModule.stop_neo4j_container(container_name) + dockerModule.remove_neo4j_container(container_name) + dockerModule.remove_neo4j_database(database_name, container_name) + time.sleep(5) + + dockerModule.create_neo4j_container(container_name) + LOGGER.info('waiting 5 seconds for the neo4j container to be ready.') + time.sleep(5) + + LOGGER.info('importing data inside container.') + dockerModule.import_data_inside_container(container_name, database_name, relative_import_path, 'CSV') + LOGGER.info('waiting for the tcp port 7474 of the neo4j container to be ready...') + connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150) + if not connection_success: + sys.exit(1) + else: + dockerModule.start_neo4j_container(container_name) + LOGGER.info('waiting for the tcp port 7474 of the neo4j container to be ready...') + connection_success = DU.wait_for_neo4j_bolt_connection(timeout=150) + if not connection_success: + sys.exit(1) + + # compress the hpg after the model import + IOModule.compress_graph(webpage) + + # step3: run the vulnerability detection queries + if query: + webpage_url = get_url_for_webpage(webpage) + DU.exec_fn_within_transaction(open_redirect_py_traversals.run_traversals, webpage_url, webpage, each_webpage, conn_timeout=conn_timeout) + + + # stop the neo4j docker container + if stop_container: + dockerModule.stop_neo4j_container(container_name) diff --git a/analyses/open_redirect/traversals.js b/analyses/open_redirect/traversals.js new file mode 100644 index 0000000..ac4a1c4 --- /dev/null +++ b/analyses/open_redirect/traversals.js @@ -0,0 +1,373 @@ +/* + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + Lightweight module for identifying request hijacking sources/sinks +*/ + +const constantsModule = require('./../../engine/lib/jaw/constants'); +const esprimaParser = require('./../../engine/lib/jaw/parser/jsparser'); +const globalsModule = require('./globals.js'); +const walkes = require('walkes'); +const escodgen = require('escodegen'); +var Set = require('./../../engine/lib/analyses/set'); +const DEBUG = false; + + +// -------------------------------------------------------------------------------- // +// SemanticTypes +// -------------------------------------------------------------------------------- // + + +const WR_WIN_OPEN_URL = "WR_WIN_OPEN_URL"; +const WR_WIN_LOC_URL = "WR_WIN_LOC_URL"; +const WR_FRAME_URL = "WR_FRAME_URL"; + +// -------------------------------------------------------------------------------- // +// CLS +// -------------------------------------------------------------------------------- // + +function requireUncached(module) { + delete require.cache[require.resolve(module)]; + return require(module); +} + +/** + * OpenRedirectSourceSinkAnalyzer + * @constructor + */ +function OpenRedirectSourceSinkAnalyzer() { + "use strict"; + // re-instantiate every time + this.api = require('./../../engine/model_builder'); + this.scopeCtrl = require('./../../engine/lib/jaw/scope/scopectrl'); + this.modelCtrl = require('./../../engine/lib/jaw/model/modelctrl'); + this.modelBuilder = require('./../../engine/lib/jaw/model/modelbuilder'); + this.scopeCtrl.clear(); + this.modelCtrl.clear(); +} + + +// -------------------------------------------------------------------------------- // +// Utility +// -------------------------------------------------------------------------------- // + +function hasIdentifierChildren(node){ + var flag = false; + if(!node) return flag; + if(node.type === "Identifier"){ + flag = true; + }else{ + walkes(node, { + Identifier: function(node, recurse){ + if(node.type === "Identifier"){ + flag = true; + } + } + }); + } + return flag; +} + + +function getIdentifierChildren(node){ + + if(!node) return []; + + if(node.type === "Identifier"){ + return [node.name]; + }else{ + + let identifier_names = new Set(); + walkes(node, { + + // CallExpression: function(node, recurse){ + // recurse(node.arguments); + // }, + FunctionExpression: function(node,recurse){ + // we do not want function expression arguments + // thus, do not recurse here + }, + CallExpression: function(node, recurse){ + // we want the call expression arguments, e.g., JSON.stringify(x) + // here, recurse only on the arguments + for(let arg of node.arguments){ + recurse(arg); + } + }, + MemberExpression: function(node, recurse){ + // we only care about the member expression base objects + // except when we have a `this.property` expression + // where we are interested in the property part of the member expression + let member_expression = escodgen.generate(node); + if(member_expression.startsWith("this.")){ // handle ThisExpression + member_expression = member_expression.replace('this.', '') + let identifier_name = member_expression.substr(0, member_expression.indexOf('.')); + if(!globalsModule.js_builtin.includes(identifier_name)){ + identifier_names.add(identifier_name); + } + }else{ + recurse(node.object); + } + }, + ObjectExpression: function(node, recurse){ + // recurse on object expression values only + // as keys cannot be tainted + node.properties.forEach(prop=>{ + recurse(prop.value); + }) + }, + Identifier: function(node, recurse){ + if(node.type === "Identifier"){ + if(!globalsModule.js_builtin.includes(node.name)){ + identifier_names.add(node.name); + } + } + } + }); + + return [].concat(identifier_names.values()); // convert Set() to list with the spread operator + } +} + +// -------------------------------------------------------------------------------- // +// API +// -------------------------------------------------------------------------------- // + +OpenRedirectSourceSinkAnalyzer.prototype.build_static_model = async function(code){ + + let theSourceSinkAnalyzer = this; + let language = constantsModule.LANG.js; + await theSourceSinkAnalyzer.api.initializeModelsFromSource(code, language); + await theSourceSinkAnalyzer.api.buildInitializedModels(); +} + + +OpenRedirectSourceSinkAnalyzer.prototype.get_sources = async function(){ + +} + + +OpenRedirectSourceSinkAnalyzer.prototype.get_sinks = async function(){ + + /* + ==================== + Sinks + ==================== + + window.location = TAINT; + window.location.href = TAINT; + window.location.replace(TAINT); + window.location.assign(TAINT); + window.open(url) = TAINT; + frame.src = TAINT; + + */ + + var outputs = []; + function appendSinkOutput(node, location, id, script_id, semantic_types, sink_code, sink_type, taint_possibility, sink_identifier_names){ + + if(node.semanticType){ + node.semanticType.concat(['sink', sink_type]); + node.semanticType.concat(semantic_types); + }else{ + node.semanticType = ['sink', sink_type]; + node.semanticType.concat(semantic_types); + } + + outputs.push({ + "location": location, + "id": id, + "script": script_id, + "semantic_types": semantic_types, + "sink_code": sink_code, + "sink_type": sink_type, + "taint_possibility": taint_possibility, // true if the sink has at least one Identifier (i.e., not just literals) + "sink_identifiers": sink_identifier_names, + }); + } + + let engine = this; + let pageScopeTrees = engine.scopeCtrl.pageScopeTrees; + if(!pageScopeTrees){ + return []; + } + for await (let scopeTree of pageScopeTrees){ + const pageModels = engine.modelCtrl.getPageModels(scopeTree); + const intraProceduralModels = pageModels.intraProceduralModels; + const ast = scopeTree.scopes[0].ast; + const script_id = ast.value; + + walkes(ast, { + + AssignmentExpression: function(node, recurse){ + // CASE: + // window.location = TAINT; + // window.location.href = TAINT; + // location.href = TAINT; + if(node && node.left && node.left.type==="MemberExpression" && ( + (node.left.object.type==="Identifier" && (node.left.object.name==="window" || node.left.object.name==="win" || node.left.object.name==="w") && node.left.property.type==="Identifier" && node.left.property.name==="location") + || + (node.left.object.type==="Identifier" && (node.left.object.name==="location") && node.left.property.type==="Identifier" && node.left.property.name==="href") + || + (node.left.object.type==="MemberExpression" && node.left.object.object.type==="Identifier" && (node.left.object.object.name==="window" || node.left.object.object.name==="win" || node.left.object.object.name==="w") && node.left.object.property.type==="Identifier" && node.left.object.property.name==="location" && node.left.property.type==="Identifier" && node.left.property.name==="href") + ) + ){ + + let taint_argument = node.right; + var taint_possibility = false; + if(taint_argument){ + identifier_names = getIdentifierChildren(taint_argument); + if(identifier_names.length > 0){ + taint_possibility = true; + } + } + var identifiers_object = { + WR_WIN_LOC_URL : identifier_names + } + var taint_possibility_object = { + WR_WIN_LOC_URL: taint_possibility + } + appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object); + } + else if (node && node.left && node.left.type==="MemberExpression" && ( + (node.left.object.type==="Identifier" && (node.left.object.name==="frame" || node.left.object.name==="iframe") && node.left.property.type==="Identifier" && node.left.property.name==="src")) + ){ + + let taint_argument = node.right; + var taint_possibility = false; + if(taint_argument){ + identifier_names = getIdentifierChildren(taint_argument); + if(identifier_names.length > 0){ + taint_possibility = true; + } + } + var identifiers_object = { + WR_FRAME_URL: identifier_names + } + var taint_possibility_object = { + WR_FRAME_URL: taint_possibility + } + appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_FRAME_URL], escodgen.generate(node), "frame.src", taint_possibility_object, identifiers_object); + } + + if(node && node.right && node.right.type === "FunctionExpression"){ + recurse(node.right) + } + }, + + CallExpression: function(node, recurse){ + + // CASE: window.open(URL) + if(node.callee.type === "MemberExpression" && node.callee.object.type === "Identifier" && + (node.callee.object.name === "window" || node.callee.object.name === "win" || node.callee.object.name === "w") && + node.callee.property.type === "Identifier" && node.callee.property.name === "open"){ + + + var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null; + var taint_possibility = false; + var identifier_names = []; + if(taint_argument){ + identifier_names = getIdentifierChildren(taint_argument); + if(identifier_names.length > 0){ + taint_possibility = true; + } + } + + var identifiers_object = { + WR_WIN_OPEN_URL: identifier_names + } + var taint_possibility_object = { + WR_WIN_OPEN_URL: taint_possibility + } + appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_OPEN_URL], escodgen.generate(node), "window.open()", taint_possibility_object, identifiers_object); + + } + // CASE: + // location.replace(TAINT); + // location.assign(TAINT); + else if(node.callee.type === "MemberExpression" && node.callee.object.type === "Identifier" && + (node.callee.object.name === "location") && + node.callee.property.type === "Identifier" && (node.callee.property.name === "replace" || node.callee.property.name === "assign")){ + + var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null; + var taint_possibility = false; + var identifier_names = []; + if(taint_argument){ + identifier_names = getIdentifierChildren(taint_argument); + if(identifier_names.length > 0){ + taint_possibility = true; + } + } + + var identifiers_object = { + WR_WIN_LOC_URL: identifier_names + } + var taint_possibility_object = { + WR_WIN_LOC_URL: taint_possibility + } + appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object); + + } + // CASE: + // window.location.replace(TAINT); + // window.location.assign(TAINT); + else if(node.callee.type === "MemberExpression" && node.callee.property.type==="Identifier" && (node.callee.property.name==="replace" || node.callee.property.name==="assign") && + node.callee.object.type === "MemberExpression" && node.callee.object.object.type === "Identifier" && (node.callee.object.object.name === "window" || node.callee.object.object.name === "win" || node.callee.object.object.name === "w") && + node.callee.object.property.type === "Identifier" && node.callee.object.property.name === "location" + ){ + + var taint_argument = (node.arguments && node.arguments.length > 0)? node.arguments[0]: null; + var taint_possibility = false; + var identifier_names = []; + if(taint_argument){ + identifier_names = getIdentifierChildren(taint_argument); + if(identifier_names.length > 0){ + taint_possibility = true; + } + } + + var identifiers_object = { + WR_WIN_LOC_URL: identifier_names + } + var taint_possibility_object = { + WR_WIN_LOC_URL: taint_possibility + } + appendSinkOutput(node, node.loc.start.line, node._id, script_id, [WR_WIN_LOC_URL], escodgen.generate(node), "window.location", taint_possibility_object, identifiers_object); + + } + // handle cases where there are multiple call expressions in a single statement + // e.g., window.open(source).then(resp => console.log(resp)) + else{ + recurse(node.callee); + for(let arg of node.arguments){ + recurse(arg); + } + + } + } + + + }); + + } + + return outputs; + +} + +module.exports = { + OpenRedirectSourceSinkAnalyzer: OpenRedirectSourceSinkAnalyzer, +}; \ No newline at end of file diff --git a/analyses/open_redirect/traversals_cypher.py b/analyses/open_redirect/traversals_cypher.py new file mode 100644 index 0000000..f2b5085 --- /dev/null +++ b/analyses/open_redirect/traversals_cypher.py @@ -0,0 +1,570 @@ +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2024 Soheil Khodayari, CISPA + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + + Description: + ------------ + traversals for detecting open redirect vulnerabilities + + Usage: + ----------- + > import analyses.open_redirect.traversals_cypher + +""" + +import subprocess +import hashlib +import urllib.parse +import os +import time +import re +import sys +import jsbeautifier +import json + +import constants as constantsModule +import utils.utility as utilityModule +import hpg_neo4j.db_utility as DU +import hpg_neo4j.query_utility as QU +import analyses.general.data_flow as DF +import analyses.open_redirect.semantic_types as SemTypeDefinitions +from utils.logging import logger as LOGGER +from neo4j import GraphDatabase +from datetime import datetime + + +# ----------------------------------------------------------------------- # +# Globals +# ----------------------------------------------------------------------- # + + +DEBUG = False + + +# ----------------------------------------------------------------------- # +# Utility Functions +# ----------------------------------------------------------------------- # + + +def _unquote_url(url): + + """ + @param {string} url + @return {string} decoded url + """ + out = urllib.parse.unquote(url) + out = out.replace('&', '&') + + return out + +def _get_all_occurences(needle, haystack): + + """ + @param {string} needle + @param {string haystack + @description finds all occurences of needle in haystack + @return {array} a list of start index occurences of needle in haystack + """ + out = [m.start() for m in re.finditer(needle, haystack)] + return out + + +def _get_current_timestamp(): + + """ + @return {string} current date and time string + """ + now = datetime.now() + dt_string = now.strftime("%d/%m/%Y %H:%M:%S") + return dt_string + +def _get_unique_list(lst): + + """ + @param {list} lst + @return remove duplicates from list and return the resulting array + """ + return list(set(lst)) + + +def _get_orderd_unique_list(lst): + + """ + @param {list} lst + @return remove duplicates from list and return the resulting array maintaining the original list order + """ + final_list = [] + for item in lst: + if item not in final_list: + final_list.append(item) + return final_list + +def _get_line_of_location(esprima_location_str): + + """ + @param esprima_location_str + @return start line numnber of esprima location object + """ + start_index = esprima_location_str.index('line:') + len('line:') + end_index = esprima_location_str.index(',') + out = esprima_location_str[start_index:end_index] + return out + +def _get_location_part(nid_string): + + """ + @param {string} nid_string: string containing node id and location + @return {string} node id string + """ + start_index = nid_string.index('__Loc=') + len('__Loc=') + return nid_string[start_index:] + +def _get_node_id_part(nid_string): + + """ + @param {string} nid_string: string containing node id and location + @return {string} location string + """ + start_index = nid_string.find('__nid=') + if start_index != -1: + start_index = start_index + len('__nid=') + else: + start_index = 0 # handle the case where function name is not stored at the begining + + end_index = nid_string.index('__Loc=') + return nid_string[start_index:end_index] + + +def _get_function_name_part(nid_string): + + """ + @param {string} nid_string: string containing node id and location + @return {string} function_name string + """ + end_index = nid_string.index('__nid=') + return nid_string[:end_index] + + + +def _get_value_of_identifer_or_literal(node): + """ + @param {PGNode} node + @return {list} returns the pair of the value of a node and the node type (identifer or literal) + """ + if node['Type'] == 'Identifier': + return [node['Code'], node['Type']] + elif node['Type'] == 'Literal': + value = node['Value'] + raw = node['Raw'] + if value == '{}' and (raw.strip('\'').strip("\"").strip() != value): + return [node['Raw'], node['Type']] + else: + return [node['Value'], node['Type']] + + return ['', ''] + + + +# ----------------------------------------------------------------------- # +# Semantic Type Association to Program Slices +# ----------------------------------------------------------------------- # + +def _get_semantic_types(program_slices, num_slices): + + """ + @param {list} program_slices: slices of JS program + @param {int} num_slices: length of program_slices list + @return {list} the semantic types associated with the given program slices. + """ + + + semantic_type = SemTypeDefinitions.NON_REACHABLE + semantic_types = [] + + + # sources + WEB_STORAGE_STRINGS = [ + 'localStorage', + 'sessionStorage' + ] + + WIN_LOC_STRINGS = [ + 'window.location', + 'win.location', + 'w.location', + 'location.href', + 'location.hash', + 'loc.href', + 'loc.hash', + 'History.getBookmarkedState', + ] + + WIN_NAME_STRINGS = [ + 'window.name', + 'win.name' + ] + + DOM_READ_STRINGS = [ + 'document.getElement', + 'document.querySelector', + 'doc.getElement', + 'doc.querySelector', + '.getElementBy', + '.getElementsBy', + '.querySelector', + '$(', + 'jQuery(', + '.attr(', + '.getAttribute(', + '.readAttribute(' + ] + + DOM_READ_COOKIE_STRINGS = [ + 'document.cookie', + 'doc.cookie', + ] + + PM_STRINGS = [ + 'event.data', + 'evt.data' + ] + + DOC_REF_STRINGS = [ + 'document.referrer', + 'doc.referrer', + 'd.referrer', + ] + + + for i in range(num_slices): + program_slice = program_slices[i] + code = program_slice[0] + idents = program_slice[2] + + for item in WIN_LOC_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_WIN_LOC + semantic_types.append(semantic_type) + + + for item in WIN_NAME_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_WIN_NAME + semantic_types.append(semantic_type) + + + for item in DOC_REF_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_DOC_REF + semantic_types.append(semantic_type) + + + + for item in PM_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_PM + semantic_types.append(semantic_type) + + + + for item in DOM_READ_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_DOM_TREE + semantic_types.append(semantic_type) + + + for item in WEB_STORAGE_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_WEB_STORAGE + semantic_types.append(semantic_type) + + + for item in DOM_READ_COOKIE_STRINGS: + if item in code: + semantic_type = SemTypeDefinitions.RD_COOKIE + semantic_types.append(semantic_type) + + + + for identifier in idents: + + for item in WIN_LOC_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_WIN_LOC + semantic_types.append(semantic_type) + + + for item in WIN_NAME_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_WIN_NAME + semantic_types.append(semantic_type) + + + for item in DOC_REF_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_DOC_REF + semantic_types.append(semantic_type) + + + + for item in PM_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_PM + semantic_types.append(semantic_type) + + + + for item in DOM_READ_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_DOM_TREE + semantic_types.append(semantic_type) + + + for item in WEB_STORAGE_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_WEB_STORAGE + semantic_types.append(semantic_type) + + + for item in DOM_READ_COOKIE_STRINGS: + if item in identifier: + semantic_type = SemTypeDefinitions.RD_COOKIE + semantic_types.append(semantic_type) + + + + if len(semantic_types): + return list(set(semantic_types)) + + return [SemTypeDefinitions.NON_REACHABLE] + + +def _get_semantic_type_set(semantic_type_list): + + """ + @param {list} semantic_type_list: list of types that may include duplicate semantic types + @return {list} a unique semantic type list + """ + + semantic_type_list = _get_unique_list(semantic_type_list) + if len(semantic_type_list) > 1: + if SemTypeDefinitions.NON_REACHABLE in semantic_type_list: + semantic_type_list.remove(SemTypeDefinitions.NON_REACHABLE) + return semantic_type_list + + elif len(semantic_type_list) == 1: + return semantic_type_list + + else: + return [SemTypeDefinitions.NON_REACHABLE] + + + +# ----------------------------------------------------------------------- # +# Main: Taint Analysis +# ----------------------------------------------------------------------- # + + +def run_traversals(tx, webpage_url, webpage_directory, webpage_directory_hash='xxx', named_properties=[]): + """ + @param {string} webpage_url + @param {string} webpage_directory + @param {list} named_properties: `id` and `name` attributes in HTML that can be accessed through the `document` API + @return {list} a list of candidate requests for hjacking + """ + + + sinks_file = os.path.join(webpage_directory, "sinks.out.json") + if not os.path.exists(sinks_file): + LOGGER.error('[TR] sinks.out file does not exist in %s'%webpage_directory) + return -1 + + + fd = open(sinks_file, 'r') + sinks_json = json.load(fd) + fd.close() + sinks_list = sinks_json['sinks'] + + storage = {} + + + for sink_node in sinks_list: + + taintable_sink_identifiers = [] + + sink_identifiers_dict = sink_node["sink_identifiers"] + sink_taintable_semantic_types = [] + sink_taint_possiblity_vector = sink_node["taint_possibility"] + + for semantic_type in sink_taint_possiblity_vector: + if sink_taint_possiblity_vector[semantic_type] == True: + sink_taintable_semantic_types.append(semantic_type) + taintable_sink_identifiers.extend(sink_identifiers_dict[semantic_type]) + + + sink_id = str(sink_node["id"]) + sink_location = str(sink_node["location"]) + sink_type = sink_node["sink_type"] + sink_cfg_node = QU.get_ast_topmost(tx, {"Id": "%s"%sink_id}) + + + nid = sink_type + '__nid=' + sink_id + '__Loc=' + sink_location + + sink_node["taintable_semantic_types"] = sink_taintable_semantic_types + sink_node["cfg_node_id"] = sink_cfg_node["Id"] + + storage[nid] = { + "sink": sink_node, + "variables": {} + } + + + + for varname in taintable_sink_identifiers: + slice_values = DF._get_varname_value_from_context(tx, varname, sink_cfg_node) + + if DEBUG: print(varname, slice_values) + + semantic_types = _get_semantic_types(slice_values,len(slice_values)) + storage[nid]["variables"][varname]= { + "slices": slice_values, + "semantic_types": semantic_types + } + + lst = storage[nid]["sink"]["taintable_semantic_types"] + lst.extend(semantic_types) + storage[nid]["sink"]["taintable_semantic_types"] = lst + + + + + print_buffer = [] + json_buffer = {} + + timestamp = _get_current_timestamp() + sep = utilityModule.get_output_header_sep() + sep_sub = utilityModule.get_output_subheader_sep() + print_buffer.append(sep) + print_buffer.append('[timestamp] generated on %s\n'%timestamp) + print_buffer.append(sep+'\n') + print_buffer.append('[*] webpage URL: %s\n\n'%webpage_url) + print_buffer.append(sep_sub+'\n') + + json_buffer["url"] = webpage_url + json_buffer["flows"] = [] + for sink_nid in storage: + + sink_node = storage[sink_nid]["sink"] + + print_buffer.append('[*] webpage: %s\n'%webpage_directory_hash) + script_name = sink_node["script"].split('/')[-1] + print_buffer.append('[*] script: %s\n'%script_name) + semantic_types_for_sink = _get_unique_list(sink_node["taintable_semantic_types"]) + print_buffer.append('[*] semantic_types: {0}\n'.format(semantic_types_for_sink)) + print_buffer.append('[*] node_id: %s\n'%str(sink_node["id"])) + print_buffer.append('[*] cfg_node_id: %s\n'%str(sink_node["cfg_node_id"])) + print_buffer.append('[*] loc: %s\n'%sink_node["location"]) + print_buffer.append('[*] sink_type: %s\n'%(sink_node["sink_type"])) + print_buffer.append('[*] sink_code: %s\n'%sink_node["sink_code"]) + + json_flow_object = { + "webpage": webpage_directory_hash, + "script": script_name, + "semantic_types": semantic_types_for_sink, + "node_id": str(sink_node["id"]), + "cfg_node_id": str(sink_node["cfg_node_id"]), + "loc": sink_node["location"], + "sink_type": sink_node["sink_type"], + "sink_code": sink_node["sink_code"], + "program_slices": {}, + } + + program_slices_dict = storage[sink_nid]["variables"] + varnames = program_slices_dict.keys() + counter = 1 + + + for varname in varnames: + + program_slices = program_slices_dict[varname]["slices"] + num_slices = len(program_slices) + varname_semantic_types = program_slices_dict[varname]["semantic_types"] + + idx = 0 + for i in range(num_slices): + idx +=1 + program_slice = program_slices[i] + loc = _get_line_of_location(program_slice[3]) + code = program_slice[0] + + if 'function(' in code: + code = jsbeautifier.beautify(code) # pretty print function calls + + + current_slice = { + "index": str(idx), + "loc": loc, + "code": code, + } + + if i == 0 and varname in code: + + a = '\n%d:%s variable=%s\n'%(counter, str(varname_semantic_types), varname) + counter += 1 + b = """\t%s (loc:%s)- %s\n"""%(str(idx), loc,code) + print_buffer+= [a, b] + + if varname not in json_flow_object["program_slices"]: + json_flow_object["program_slices"][varname] = { + "semantic_types": varname_semantic_types, + "slices": [current_slice], + } + else: + json_flow_object["program_slices"][varname]["slices"].append(current_slice) + + else: + a = """\t%s (loc:%s)- %s\n"""%(str(idx), loc,code) + print_buffer += [a] + + if varname not in json_flow_object["program_slices"]: + json_flow_object["program_slices"][varname] = { + "semantic_types": varname_semantic_types, + "slices": [current_slice], + } + else: + json_flow_object["program_slices"][varname]["slices"].append(current_slice) + + json_buffer["flows"].append(json_flow_object) + print_buffer.append('\n\n') + print_buffer.append(sep_sub) + + output_file = os.path.join(webpage_directory, "sinks.flows.out") + with open(output_file, "w+") as fd: + for line in print_buffer: + fd.write(line) + + output_file_json = os.path.join(webpage_directory, "sinks.flows.out.json") + with open(output_file_json, "w+") as fd: + json.dump(json_buffer, fd, ensure_ascii=False, indent=4) + + + LOGGER.info('[TR] finished running the queries.') + + + + + + + \ No newline at end of file diff --git a/config.yaml b/config.yaml index 18fb717..037d964 100644 --- a/config.yaml +++ b/config.yaml @@ -94,6 +94,12 @@ cs_csrf: static: false static_neo4j: false +open_redirect: + enabled: false + passes: + crawling: false + static: false + static_neo4j: false request_hijacking: enabled: true diff --git a/install.sh b/install.sh index 678e0cd..1f22a58 100755 --- a/install.sh +++ b/install.sh @@ -14,6 +14,7 @@ sudo apt install -y chromium-browser (cd analyses/domclobbering && npm install) (cd analyses/cs_csrf && npm install) (cd analyses/request_hijacking && npm install) +(cd analyses/open_redirect && npm install) (cd engine && npm install) (cd engine/lib/jaw/dom-points-to && npm install) diff --git a/run_pipeline.py b/run_pipeline.py index f6d2c12..625fd46 100644 --- a/run_pipeline.py +++ b/run_pipeline.py @@ -44,6 +44,10 @@ import analyses.request_hijacking.static_analysis_py_api as request_hijacking_neo4j_analysis_api import analyses.request_hijacking.verification_api as request_hijacking_verification_api +import analyses.open_redirect.static_analysis_api as or_sast_model_construction_api +import analyses.open_redirect.static_analysis_py_api as or_neo4j_analysis_api + + def is_website_up(uri): try: response = requests.head(uri, timeout=20) @@ -246,6 +250,7 @@ def main(): # crawling if (config['domclobbering']['enabled'] and config['domclobbering']["passes"]["crawling"]) or \ (config['cs_csrf']['enabled'] and config['cs_csrf']["passes"]["crawling"]) or \ + (config['open_redirect']['enabled'] and config['open_redirect']["passes"]["crawling"]) or \ (config['request_hijacking']['enabled'] and config['request_hijacking']["passes"]["crawling"]): @@ -308,6 +313,19 @@ def main(): CSRFTraversalsModule.build_and_analyze_hpg(website_url) LOGGER.info("finished HPG construction and analysis over neo4j for site %s."%(website_url)) + # open redirects + if config['open_redirect']['enabled']: + # static analysis + if config['open_redirect']["passes"]["static"]: + LOGGER.info("static analysis for site %s."%(website_url)) + or_sast_model_construction_api.start_model_construction(website_url, iterative_output=iterative_output, memory=static_analysis_memory, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite_hpg=static_analysis_overwrite_hpg) + LOGGER.info("successfully finished static analysis for site %s."%(website_url)) + + # static analysis over neo4j + if config['open_redirect']["passes"]["static_neo4j"]: + LOGGER.info("HPG construction and analysis over neo4j for site %s."%(website_url)) + or_neo4j_analysis_api.build_and_analyze_hpg(website_url, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite=static_analysis_overwrite_hpg) + LOGGER.info("finished HPG construction and analysis over neo4j for site %s."%(website_url)) # request hijacking if config['request_hijacking']['enabled']: @@ -375,6 +393,7 @@ def main(): # crawling if (config['domclobbering']['enabled'] and config['domclobbering']["passes"]["crawling"]) or \ (config['cs_csrf']['enabled'] and config['cs_csrf']["passes"]["crawling"]) or \ + (config['open_redirect']['enabled'] and config['open_redirect']["passes"]["crawling"]) or \ (config['request_hijacking']['enabled'] and config['request_hijacking']["passes"]["crawling"]): LOGGER.info("crawling site at row %s - rank %s - %s"%(g_index, website_rank, website_url)) @@ -420,6 +439,18 @@ def main(): CSRFTraversalsModule.build_and_analyze_hpg(website_url) LOGGER.info("finished HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url)) + # open redirect + if config['open_redirect']['enabled']: + # static analysis + if config['open_redirect']["passes"]["static"]: + LOGGER.info("static analysis for site at row %s - rank %s - %s"%(g_index, website_rank, website_url)) + or_sast_model_construction_api.start_model_construction(website_url, iterative_output=iterative_output, memory=static_analysis_memory, timeout=static_analysis_per_webpage_timeout, compress_hpg=static_analysis_compress_hpg, overwrite_hpg=static_analysis_overwrite_hpg) + LOGGER.info("successfully finished static analysis for site at row %s - rank %s - %s"%(g_index, website_rank, website_url)) + + if config['open_redirect']["passes"]["static_neo4j"]: + LOGGER.info("HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url)) + or_neo4j_analysis_api.build_and_analyze_hpg(website_url, timeout=static_analysis_per_webpage_timeout, overwrite=static_analysis_overwrite_hpg, compress_hpg=static_analysis_compress_hpg) + LOGGER.info("finished HPG construction and analysis over neo4j for site %s - %s"%(website_rank, website_url)) # request hijacking