diff --git a/cobra/__version__.py b/cobra/__version__.py index 32f4ede2..e1b989d1 100644 --- a/cobra/__version__.py +++ b/cobra/__version__.py @@ -7,7 +7,7 @@ __issue_page__ = 'https://github.com/LoRexxar/Cobra-W/issues/new' __python_version__ = sys.version.split()[0] __platform__ = platform.platform() -__version__ = '1.5.0' +__version__ = '1.6.0' __author__ = 'LoRexxar' __author_email__ = 'LoRexxar@gmail.com' __license__ = 'MIT License' diff --git a/cobra/cast.py b/cobra/cast.py index 48b062ad..e941a52a 100644 --- a/cobra/cast.py +++ b/cobra/cast.py @@ -25,7 +25,10 @@ class CAST(object): - languages = ['php', 'java', 'sol'] + languages = {'php': "php", + 'java': "java", + 'sol': "sol", + 'js': "javascript"} def __init__(self, rule, target_directory, file_path, line, code, files=None, rule_class=None, repair_functions=[], controlled_params=[]): self.target_directory = target_directory @@ -44,7 +47,7 @@ def __init__(self, rule, target_directory, file_path, line, code, files=None, ru for language in self.languages: if self.file_path[-len(language):].lower() == language: - self.language = language + self.language = self.languages[language] os.chdir(self.target_directory) # Parse rule @@ -227,20 +230,8 @@ def is_controllable_param(self): logger.debug("[AST] String have variables: `Yes`") # variable - if param_name[:1] == '$': + if self.language == 'php': logger.debug("[AST] Is variable: `Yes`") - - # Get assign code block - # param_block_code = self.block_code(0) - # fi = codecs.open(self.file_path, "r", encoding='utf-8', errors='ignore') - # param_content = fi.read() - - # param_content = ast_object.get_nodes(self.file_path) - # - # if param_content is False: - # logger.debug("[AST] Can't get assign code block") - # return True, self.data - logger.debug("[Deep AST] Start AST for param {param_name}".format(param_name=param_name)) _is_co, _cp, expr_lineno, chain = anlysis_params(param_name, self.file_path, self.line, self.sr.vul_function, self.repair_functions, self.controlled_list, isexternal=True) @@ -257,32 +248,34 @@ def is_controllable_param(self): else: continue - else: - if self.language == 'java': - # Java variable didn't have `$` - param_block_code = self.block_code(0) - if param_block_code is False: - logger.debug("Can't get block code") - return True, self.data - logger.debug("[AST] Block code: ```{language}\r\n{code}```".format(language=self.language, - code=param_block_code)) - regex_assign_string = self.regex[self.language]['assign_string'].format(re.escape(param_name)) - string = re.findall(regex_assign_string, param_block_code) - if len(string) >= 1 and string[0] != '': - logger.debug("[AST] Is assign string: `Yes`") - continue - # return False, self.data - logger.debug("[AST] Is assign string: `No`") - - # Is assign out data - regex_get_param = r'String\s{0}\s=\s\w+\.getParameter(.*)'.format(re.escape(param_name)) - get_param = re.findall(regex_get_param, param_block_code) - if len(get_param) >= 1 and get_param[0] != '': - logger.debug("[AST] Is assign out data: `Yes`") - continue - # False, self.data - logger.debug("[AST] Is assign out data: `No`") + # else: + elif self.language == 'java': + # Java variable didn't have `$` + param_block_code = self.block_code(0) + if param_block_code is False: + logger.debug("Can't get block code") return True, self.data + logger.debug("[AST] Block code: ```{language}\r\n{code}```".format(language=self.language, + code=param_block_code)) + regex_assign_string = self.regex[self.language]['assign_string'].format(re.escape(param_name)) + string = re.findall(regex_assign_string, param_block_code) + if len(string) >= 1 and string[0] != '': + logger.debug("[AST] Is assign string: `Yes`") + continue + # return False, self.data + logger.debug("[AST] Is assign string: `No`") + + # Is assign out data + regex_get_param = r'String\s{0}\s=\s\w+\.getParameter(.*)'.format(re.escape(param_name)) + get_param = re.findall(regex_get_param, param_block_code) + if len(get_param) >= 1 and get_param[0] != '': + logger.debug("[AST] Is assign out data: `Yes`") + continue + # False, self.data + logger.debug("[AST] Is assign out data: `No`") + return True, self.data + + else: logger.debug("[AST] Not Java/PHP, can't parse ({l})".format(l=self.language)) continue # return False, self.data diff --git a/cobra/cli.py b/cobra/cli.py index 0c94953a..8bf94f3f 100644 --- a/cobra/cli.py +++ b/cobra/cli.py @@ -39,7 +39,9 @@ def get_sid(target, is_a_sid=False): def start(target, formatter, output, special_rules, a_sid=None, language=None, secret_name=None, black_path=None): """ Start CLI - :param secret_id: secret id or name? + :param black_path: + :param secret_name: + :param language: :param target: File, FOLDER, GIT :param formatter: :param output: @@ -61,7 +63,7 @@ def start(target, formatter, output, special_rules, a_sid=None, language=None, s r.status(d) # parse target mode and output mode - pa = ParseArgs(target, formatter, output, special_rules, black_path, a_sid=None) + pa = ParseArgs(target, formatter, output, special_rules, language, black_path, a_sid=None) target_mode = pa.target_mode output_mode = pa.output_mode black_path_list = pa.black_path_list @@ -81,10 +83,10 @@ def start(target, formatter, output, special_rules, a_sid=None, language=None, s main_language = dt.language main_framework = dt.framework else: - main_language = language - main_framework = language + main_language = pa.language + main_framework = pa.language - logger.info('[CLI] [STATISTIC] Language: {l} Framework: {f}'.format(l=main_language, f=main_framework)) + logger.info('[CLI] [STATISTIC] Language: {l} Framework: {f}'.format(l=",".join(main_language), f=main_framework)) logger.info('[CLI] [STATISTIC] Files: {fc}, Extensions:{ec}, Consume: {tc}'.format(fc=file_count, ec=len(files), tc=time_consume)) diff --git a/cobra/detection.py b/cobra/detection.py index 0c64ad4a..ee85259f 100644 --- a/cobra/detection.py +++ b/cobra/detection.py @@ -21,7 +21,7 @@ try: # for pip >= 10 from pip._internal.req import parse_requirements -except ImportError: # for pip <= 9.0.3 +except ImportError: # for pip <= 9.0.3 from pip.req import parse_requirements file_type = [] @@ -37,7 +37,7 @@ def __init__(self, target_directory, files): """ self.target_directory = target_directory self.files = files - self.lang = None + self.lang = [] self.requirements = None self.frame_data = {} self.language_data = {} @@ -57,6 +57,7 @@ def language(self): l_chiefly = 'false' if language.get('chiefly') is not None: l_chiefly = language.get('chiefly') + language_extensions[l_name] = { 'chiefly': l_chiefly, 'extensions': [] @@ -73,18 +74,21 @@ def language(self): for language, language_info in languages.items(): if ext in language_info['extensions']: if 'chiefly' in language_info and language_info['chiefly'].lower() == 'true': - logger.debug('[DETECTION] [LANGUAGE] found the chiefly language({language}), maybe have largest, continue...'.format( - language=language)) - self.lang = language + logger.debug( + '[DETECTION] [LANGUAGE] found the chiefly language({language}), maybe have largest, continue...'.format( + language=language)) + self.lang.append(language) else: logger.debug('[DETECTION] [LANGUAGE] not chiefly, continue...'.format(language=language)) tmp_language = language - if self.lang is None: - logger.debug('[DETECTION] [LANGUAGE] not found chiefly language, use the largest language(language) replace'.format( - language=tmp_language)) - self.lang = tmp_language - logger.debug('[DETECTION] [LANGUAGE] main language({main_language}), tmp language({tmp_language})'.format(tmp_language=tmp_language, - main_language=self.lang)) + if self.lang is []: + logger.debug( + '[DETECTION] [LANGUAGE] not found chiefly language, use the largest language(language) replace'.format( + language=tmp_language)) + self.lang.append(tmp_language) + logger.debug('[DETECTION] [LANGUAGE] main languages ({main_language}), tmp language({tmp_language})'.format( + tmp_language=tmp_language, + main_language=",".join(self.lang))) return self.lang @property diff --git a/cobra/engine.py b/cobra/engine.py index 8a762ba9..1b352d10 100644 --- a/cobra/engine.py +++ b/cobra/engine.py @@ -26,6 +26,7 @@ from .cast import CAST from .parser import scan_parser from .file import FileParseAll +from .file import ext_dict from rules.autorule import autorule from prettytable import PrettyTable from phply import phpast as php @@ -139,9 +140,9 @@ def score2level(score): return '{l}-{s}: {ast}'.format(l=level[:1], s=score_full, ast=a) -def scan_single(target_directory, single_rule, files=None, secret_name=None): +def scan_single(target_directory, single_rule, files=None, language=None, secret_name=None): try: - return SingleRule(target_directory, single_rule, files, secret_name).process() + return SingleRule(target_directory, single_rule, files, language, secret_name).process() except Exception: raise @@ -183,7 +184,7 @@ def store(result): vulnerability=rule.vulnerability, language=rule.language )) - result = scan_single(target_directory, rule, files, secret_name) + result = scan_single(target_directory, rule, files, language, secret_name) store(result) # print @@ -249,7 +250,7 @@ def store(result): 'msg': 'scan finished', 'result': { 'vulnerabilities': [x.__dict__ for x in find_vulnerabilities], - 'language': language, + 'language': ",".join(language), 'framework': framework, 'extension': extension_count, 'file': file_count, @@ -262,12 +263,13 @@ def store(result): class SingleRule(object): - def __init__(self, target_directory, single_rule, files, secret_name=None): + def __init__(self, target_directory, single_rule, files, language=None, secret_name=None): self.target_directory = target_directory self.find = Tool().find self.grep = Tool().grep self.sr = single_rule self.files = files + self.languages = language self.secret_name = secret_name # Single Rule Vulnerabilities """ @@ -419,7 +421,7 @@ def process(self): try: datas = Core(self.target_directory, vulnerability, self.sr, 'project name', ['whitelist1', 'whitelist2'], test=is_test, index=index, - files=self.files, secret_name=self.secret_name).scan() + files=self.files, languages=self.languages, secret_name=self.secret_name).scan() data = "" if len(datas) == 3: @@ -443,7 +445,7 @@ def process(self): else: if reason == 'New Core': # 新的规则 logger.debug('[CVI-{cvi}] [NEW-VUL] New Rules init') - new_rule_vulnerabilities = NewCore(self.sr, self.target_directory, data, self.files, 0, secret_name=self.secret_name) + new_rule_vulnerabilities = NewCore(self.sr, self.target_directory, data, self.files, 0, languages=self.languages, secret_name=self.secret_name) if len(new_rule_vulnerabilities) > 0: self.rule_vulnerabilities.extend(new_rule_vulnerabilities) @@ -484,7 +486,7 @@ def parse_match(self, single_match): class Core(object): def __init__(self, target_directory, vulnerability_result, single_rule, project_name, white_list, test=False, - index=0, files=None, secret_name=None): + index=0, files=None, languages=None, secret_name=None): """ Initialize :param: target_directory: @@ -509,13 +511,14 @@ def __init__(self, target_directory, vulnerability_result, single_rule, project_ # self.code_content = vulnerability_result.code_content.strip() self.code_content = vulnerability_result.code_content self.files = files + self.languages = languages self.secret_name = secret_name self.rule_match = single_rule.match self.rule_match_mode = single_rule.match_mode self.vul_function = single_rule.vul_function self.cvi = single_rule.svid - self.lan = single_rule.language + self.lan = single_rule.language.lower() self.single_rule = single_rule self.project_name = project_name @@ -629,6 +632,20 @@ def is_can_parse(self): return True return False + def is_target(self): + """ + try to find ext for target file and check it wheater target or not + :return: + """ + # get ext for file + fileext = self.file_path.split(".")[-1] + + if self.lan in ext_dict and fileext is not None: + if fileext not in ext_dict[self.lan]: + return True + + return False + def init_php_repair(self): """ 初始化修复函数规则 @@ -655,7 +672,7 @@ def init_php_repair(self): self.controlled_list += b except ImportError: - logger.warning('[AST][INIT] Secret_name init error... No nodule named {}'.format(self.secret_name)) + logger.warning('[AST][INIT] Secret_name init error... No module named {}'.format(self.secret_name)) # init for key in self.repair_dict: @@ -694,6 +711,10 @@ def scan(self): logger.debug("[RET] Annotation") return False, 'Annotation(注释)' + if not self.is_target(): + logger.error("[SCAN] file {} ext is not support, something error...".format(self.file_path)) + return False, 'Unsupport File' + # # function-param-regex # Match(function) -> Param-Controllable -> Repair -> Done @@ -704,7 +725,8 @@ def scan(self): # Match(function) -> vustomize-match() -> Param-Controllable -> Repair -> Done # logger.debug('[CVI-{cvi}] match-mode {mm}'.format(cvi=self.cvi, mm=self.rule_match_mode)) - if self.file_path[-3:].lower() == 'php': + # if self.file_path[-3:].lower() == 'php': + if self.lan == "php": try: self.init_php_repair() ast = CAST(self.rule_match, self.target_directory, self.file_path, self.line_number, @@ -777,7 +799,34 @@ def scan(self): logger.debug(traceback.format_exc()) return False, 'Exception' - elif self.file_path[-3:].lower() == 'sol': + # elif self.file_path[-3:].lower() == 'sol': + elif self.lan == "solidity": + try: + ast = CAST(self.rule_match, self.target_directory, self.file_path, self.line_number, + self.code_content, files=self.files, rule_class=self.single_rule, + repair_functions=self.repair_functions) + + # only match + if self.rule_match_mode == const.mm_regex_only_match: + # + # Regex-Only-Match + # Match(regex) -> Repair -> Done + # + logger.debug("[CVI-{cvi}] [ONLY-MATCH]".format(cvi=self.cvi)) + return True, 'Regex-only-match' + elif self.rule_match_mode == const.mm_regex_return_regex: + logger.debug("[CVI-{cvi}] [REGEX-RETURN-REGEX]".format(cvi=self.cvi)) + return True, 'Regex-return-regex' + else: + logger.warn("[CVI-{cvi} [OTHER-MATCH]] sol ruls only support for Regex-only-match and Regex-return-regex...".format(cvi=self.cvi)) + return False, 'Unsupport Match' + + except Exception as e: + logger.debug(traceback.format_exc()) + return False, 'Exception' + + # elif self.file_path[-3:].lower() == '.js': + elif self.lan == "javascript": try: ast = CAST(self.rule_match, self.target_directory, self.file_path, self.line_number, self.code_content, files=self.files, rule_class=self.single_rule, @@ -912,9 +961,10 @@ def auto_parse_match(single_match, svid, language): return mr -def NewCore(old_single_rule, target_directory, new_rules, files, count=0, secret_name=None): +def NewCore(old_single_rule, target_directory, new_rules, files, count=0, languages=None, secret_name=None): """ 处理新的规则生成 + :param languages: :param old_single_rule: :param secret_name: :param target_directory: diff --git a/cobra/file.py b/cobra/file.py index 35ef3abc..8c34b160 100644 --- a/cobra/file.py +++ b/cobra/file.py @@ -23,8 +23,15 @@ except ImportError: from urllib.parse import quote +ext_dict = { + "php": ['.php', '.php3', '.php4', '.php5', '.php7', '.pht', '.phs', '.phtml'], + "solidity": ['.sol'], + "javascript": ['.js'], +} -ext_list = ['.php', '.php3', '.php4', '.php5', '.php7', '.pht', '.phs', '.phtml', '.sol'] +ext_list = [] +for e in ext_dict: + ext_list += ext_dict[e] def file_list_parse(filelist): @@ -33,10 +40,9 @@ def file_list_parse(filelist): if not filelist: return result - for ext in ext_list: - for file in filelist: - if file[0] == ext: - result.append(file[1]['list']) + for file in filelist: + if file[0] in ext_list: + result.append(file[1]['list']) return result diff --git a/cobra/parser.py b/cobra/parser.py index 1d1e8c32..7aafda5e 100644 --- a/cobra/parser.py +++ b/cobra/parser.py @@ -622,7 +622,7 @@ def parameters_back(param, nodes, function_params=None, lineno=0, isback=isback) return is_co, cp, expr_lineno - if len(nodes) != 0 and is_co != 1 and is_co != -1: + if len(nodes) != 0 and is_co not in [-1, 1, 2]: node = nodes[len(nodes) - 1] if isinstance(node, php.Assignment) and param_name == get_node_name(node.node): # 回溯的过程中,对出现赋值情况的节点进行跟踪 diff --git a/cobra/pretreatment.py b/cobra/pretreatment.py index a9301d01..4b271745 100644 --- a/cobra/pretreatment.py +++ b/cobra/pretreatment.py @@ -42,7 +42,7 @@ def pre_ast(self): for filepath in fileext[1]['list']: all_nodes = [] - filepath = os.path.join(self.target_directory, filepath) + filepath = os.path.join(self.target_directory, filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'php' self.pre_result[filepath]['ast_nodes'] = [] diff --git a/cobra/rule.py b/cobra/rule.py index 3775509f..71a646ca 100644 --- a/cobra/rule.py +++ b/cobra/rule.py @@ -48,22 +48,26 @@ def block(index): class Rule(object): - def __init__(self, lan="php"): - if not lan: - lan = "php" - self.rules_path = rules_path + "/" + lan - if not os.path.exists(self.rules_path): - logger.error("[INIT][RULE] language {} can't found rules".format(self.rules_path)) - os.mkdir(self.rules_path) + def __init__(self, lans=["php"]): + if not lans: + lans = ["php"] - self.rule_list = self.list_parse() - - # import function from rule self.rule_dict = {} - for rule in self.rule_list: - rulename = rule.split('.')[0] - rulefile = "rules." + lan + "." + rulename - self.rule_dict[rulename] = __import__(rulefile, fromlist=rulename) + + # 逐个处理每一种lan + for lan in lans: + self.rules_path = rules_path + "/" + lan + if not os.path.exists(self.rules_path): + logger.error("[INIT][RULE] language {} can't found rules".format(self.rules_path)) + os.mkdir(self.rules_path) + + self.rule_list = self.list_parse() + + # import function from rule + for rule in self.rule_list: + rulename = rule.split('.')[0] + rulefile = "rules." + lan + "." + rulename + self.rule_dict[rulename] = __import__(rulefile, fromlist=rulename) self.vulnerabilities = self.vul_init() diff --git a/cobra/utils.py b/cobra/utils.py index 14160625..b43beb50 100644 --- a/cobra/utils.py +++ b/cobra/utils.py @@ -35,7 +35,7 @@ class ParseArgs(object): - def __init__(self, target, formatter, output, special_rules=None, black_path=None, a_sid=None): + def __init__(self, target, formatter, output, special_rules=None, language=None, black_path=None, a_sid=None): self.target = target self.formatter = formatter self.output = output @@ -74,7 +74,7 @@ def __init__(self, target, formatter, output, special_rules=None, black_path=Non self.black_path_list = [] if ',' in black_path: - self.black_path_list = [x for x in black_path.split(',') if x != " "] + self.black_path_list = [x.strip() for x in black_path.split(',') if x != ""] logger.info("[INIT][PARSE_ARGS] Black Path list is {}".format(self.black_path_list)) else: self.black_path_list = None @@ -83,6 +83,17 @@ def __init__(self, target, formatter, output, special_rules=None, black_path=Non else: self.black_path_list = None + # check and deal language + if language is not None and language is not "": + self.language = [] + + if ',' in language: + self.language = [x.strip() for x in language.split(',') if x != ""] + logger.info("[INIT][PARSE_ARGS] Language is {}".format(self.language)) + else: + self.language = [language.strip()] + logger.warning("[INIT][PARSE_ARGS] Language parse error.") + self.sid = a_sid @staticmethod diff --git a/docs/changelog.md b/docs/changelog.md index 37eff0d7..6f85f5b2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -111,4 +111,9 @@ - 2019-05-16 - Cobra-W 1.5.0 - 添加了-b参数以设置扫描时的黑名单,可以用来避免扫描第三方模块,造成无意义的搜索 - - 修复了tamper中无法设置函数名为输入的问题 \ No newline at end of file + - 修复了tamper中无法设置函数名为输入的问题 +- 2019-06-10 + - Cobra-W 1.6.0 + - 修复了部分在is_repair的判断错误问题 + - 重构了关于语言设置的问题,现在可以同时对多种语言扫描,并留下了各种语言的拓展位 + - 添加了关于简单的js的支持,现在可以进行正则匹配扫描 \ No newline at end of file diff --git a/docs/dev.md b/docs/dev.md index c2529791..0be013c3 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -8,42 +8,51 @@ ``` python .\cobra.py -h -usage: cobra [-h] [-t ] [-f ] [-o ] [-r ] - [-s ] [-d] - - ____ _ __ __ - / ___|___ | |__ _ __ __ _ \ \ / / -| | / _ \| '_ \| '__/ _` | \ \ /\ / / -| |__| (_) | |_) | | | (_| | --- \ V V / - \____\___/|_.__/|_| \__,_| \_/\_/ v0.8.3 - -GitHub: https://github.com/LoRexxar/Cobra-W - -Cobra is a static code analysis system that automates the detecting vulnerabilities and security issue. - -optional arguments: - -h, --help show this help message and exit - -Scan: - -t , --target - file, folder, compress, or repository address - -f , --format - vulnerability output format (formats: html, json, csv, - xml) - -o , --output - vulnerability output STREAM, FILE - -r , --rule - specifies rules e.g: 1000, 1001 - -s , --secret - secret repair function e.g: wordpress - -d, --debug open debug mode - -Usage: - python cobra.py -t tests/vulnerabilities - python cobra.py -t tests/vulnerabilities -r 1000, 1001 - python cobra.py -t tests/vulnerabilities -s wordpress - python cobra.py -t tests/vulnerabilities -f json -o /tmp/report.json - python cobra.py -t tests/vulnerabilities --debug +usage: cobra [-h] [-t ] [-f ] [-o ] [-r ] + [-s ] [-i ] [-l ] [-d] [-lan LANGUAGE] + [-b BLACK_PATH] + + ____ _ __ __ + / ___|___ | |__ _ __ __ _ \ \ / / +| | / _ \| '_ \| '__/ _` | \ \ /\ / / +| |__| (_) | |_) | | | (_| | --- \ V V / + \____\___/|_.__/|_| \__,_| \_/\_/ v1.6.0 + +GitHub: https://github.com/LoRexxar/Cobra-W + +Cobra is a static code analysis system that automates the detecting vulnerabilities and security issue. + +optional arguments: + -h, --help show this help message and exit + +Scan: + -t , --target + file, folder, compress, or repository address + -f , --format + vulnerability output format (formats: html, json, csv, + xml) + -o , --output + vulnerability output STREAM, FILE + -r , --rule + specifies rules e.g: 1000, 1001 + -s , --secret + secret repair function e.g: wordpress + -i , --sid + sid for cobra-wa + -l , --log + log name for cobra-wa + -d, --debug open debug mode + -lan LANGUAGE, --language LANGUAGE + set target language + -b BLACK_PATH, --blackpath BLACK_PATH + black path list + +Usage: + python cobra.py -t tests/vulnerabilities + python cobra.py -t tests/vulnerabilities -r 1000, 1001 + python cobra.py -t tests/vulnerabilities -s wordpress + python cobra.py -t tests/vulnerabilities -f json -o /tmp/report.json + python cobra.py -t tests/vulnerabilities --debug ``` ## 核心代码 diff --git a/rules/languages.xml b/rules/languages.xml index 8aaef816..5e434d0d 100644 --- a/rules/languages.xml +++ b/rules/languages.xml @@ -9,6 +9,9 @@ + + + @@ -19,6 +22,9 @@ + + + @@ -70,9 +76,6 @@ - - -