Skip to content

Commit

Permalink
Merge pull request #33 from LoRexxar/develop
Browse files Browse the repository at this point in the history
update 1.2.0
  • Loading branch information
LoRexxar authored Apr 16, 2019
2 parents a887185 + 08f0129 commit 949fa32
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 62 deletions.
2 changes: 1 addition & 1 deletion cobra/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__issue_page__ = 'https://github.com/LoRexxar/Cobra-W/issues/new'
__python_version__ = sys.version.split()[0]
__platform__ = platform.platform()
__version__ = '1.1.1'
__version__ = '1.2.0'
__author__ = 'LoRexxar'
__author_email__ = 'LoRexxar@gmail.com'
__license__ = 'MIT License'
Expand Down
15 changes: 9 additions & 6 deletions cobra/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .file import FileParseAll
from .parser import is_controllable
from .parser import anlysis_params
from .pretreatment import ast_object


class CAST(object):
Expand Down Expand Up @@ -229,16 +230,18 @@ def is_controllable_param(self):

# Get assign code block
# param_block_code = self.block_code(0)
fi = codecs.open(self.file_path, "r", encoding='utf-8', errors='ignore')
param_content = fi.read()
# fi = codecs.open(self.file_path, "r", encoding='utf-8', errors='ignore')
# param_content = fi.read()

if param_content is False:
logger.debug("[AST] Can't get assign code block")
return True, self.data
# param_content = ast_object.get_nodes(self.file_path)
#
# if param_content is False:
# logger.debug("[AST] Can't get assign code block")
# return True, self.data

logger.debug("[Deep AST] Start AST for param {param_name}".format(param_name=param_name))

_is_co, _cp, expr_lineno = anlysis_params(param_name, param_content, self.file_path, self.line, self.sr.vul_function, self.repair_functions)
_is_co, _cp, expr_lineno = anlysis_params(param_name, self.file_path, self.line, self.sr.vul_function, self.repair_functions)

if _is_co == 1:
logger.debug("[AST] Is assign string: `Yes`")
Expand Down
6 changes: 6 additions & 0 deletions cobra/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .file import Directory
from .utils import ParseArgs
from .utils import md5, random_generator
from .pretreatment import ast_object


def get_sid(target, is_a_sid=False):
Expand All @@ -46,6 +47,7 @@ def start(target, formatter, output, special_rules, a_sid=None, secret_name=None
:param a_sid: all scan id
:return:
"""
global ast_object
# generate single scan id
s_sid = get_sid(target)
r = Running(a_sid)
Expand Down Expand Up @@ -84,6 +86,10 @@ def start(target, formatter, output, special_rules, a_sid=None, secret_name=None
if pa.special_rules is not None:
logger.info('[CLI] [SPECIAL-RULE] only scan used by {r}'.format(r=','.join(pa.special_rules)))

# Pretreatment ast object
ast_object.init_pre(target_directory, files)
ast_object.pre_ast()

# scan
scan(target_directory=target_directory, a_sid=a_sid, s_sid=s_sid, special_rules=pa.special_rules,
language=main_language, framework=main_framework, file_count=file_count, extension_count=len(files),
Expand Down
6 changes: 3 additions & 3 deletions cobra/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,9 +682,9 @@ def scan(self):
logger.debug('[RULE_MATCH] {r}'.format(r=rule_match))
try:
# with open(self.file_path, 'r') as fi:
fi = codecs.open(self.file_path, "r", encoding='utf-8', errors='ignore')
code_contents = fi.read()
result = scan_parser(code_contents, rule_match, self.line_number, self.file_path, repair_functions=self.repair_functions)
# fi = codecs.open(self.file_path, "r", encoding='utf-8', errors='ignore')
# code_contents = fi.read()
result = scan_parser(rule_match, self.line_number, self.file_path, repair_functions=self.repair_functions)
logger.debug('[AST] [RET] {c}'.format(c=result))
if len(result) > 0:
if result[0]['code'] == 1: # 函数参数可控
Expand Down
87 changes: 38 additions & 49 deletions cobra/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from phply.phpparse import make_parser # 语法分析
from phply import phpast as php
from .log import logger
from .pretreatment import ast_object
import re
import codecs
import traceback
Expand Down Expand Up @@ -244,21 +245,7 @@ def get_filename(node, file_path): # 获取filename
constant_node = filenames[i]
constant_node_name = constant_node.name

f = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
file_content = f.read()
parser = make_parser()
all_nodes = parser.parse(file_content, debug=False, lexer=lexer.clone(), tracking=with_line)

for node in all_nodes:
if isinstance(node, php.FunctionCall) and node.name == "define":
define_params = node.params

if len(define_params) == 2 and define_params[0].node == constant_node_name:
filenames[i] = define_params[1].node

if isinstance(filenames[i], php.Constant): # 如果还没找到该常量,暂时退出
logger.warning("[AST] [INCLUDE FOUND] Can't found this constart {}, pass it ".format(filenames[i]))
filenames[i] = "not_found"
filenames[i] = ast_object.get_define(constant_node_name)

return filenames

Expand Down Expand Up @@ -858,20 +845,22 @@ def deep_parameters_back(param, back_node, function_params, count, file_path, li
try:
logger.debug("[Deep AST] open new file {file_path}".format(file_path=file_path_name))
# f = open(file_path_name, 'r')
f = codecs.open(file_path_name, "r", encoding='utf-8', errors='ignore')
file_content = f.read()
# f = codecs.open(file_path_name, "r", encoding='utf-8', errors='ignore')
# file_content = f.read()
all_nodes = ast_object.get_nodes(file_path_name)

except:
logger.warning("[Deep AST] error to open new file...continue")
continue

try:
# 目标可能语法错误
parser = make_parser()
except SyntaxError:
logger.warning('[AST] target php file exist SyntaxError...')
logger.warning('[AST] [ERROR]:{e}'.format(e=traceback.format_exc()))
# try:
# # 目标可能语法错误
# parser = make_parser()
# except SyntaxError:
# logger.warning('[AST] target php file exist SyntaxError...')
# logger.warning('[AST] [ERROR]:{e}'.format(e=traceback.format_exc()))

all_nodes = parser.parse(file_content, debug=False, lexer=lexer.clone(), tracking=with_line)
# all_nodes = parser.parse(file_content, debug=False, lexer=lexer.clone(), tracking=with_line)
node = cp
# node = php.Variable(cp)

Expand Down Expand Up @@ -917,7 +906,7 @@ def get_function_params(nodes):
return params


def anlysis_params(param, code_content, file_path, lineno, vul_function=None, repair_functions=None):
def anlysis_params(param, file_path, lineno, vul_function=None, repair_functions=None):
"""
在cast调用时做中转数据预处理
:param repair_functions:
Expand All @@ -940,15 +929,16 @@ def anlysis_params(param, code_content, file_path, lineno, vul_function=None, re
param = php.ObjectProperty(param_left, param_right)

param = php.Variable(param)
try:
# 目标可能语法错误
parser = make_parser()
except SyntaxError:
logger.warning('[AST] target php file exist SyntaxError...')
logger.warning('[AST] [ERROR]:{e}'.format(e=traceback.format_exc()))
return -1, "", ""

all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)
# try:
# # 目标可能语法错误
# parser = make_parser()
# except SyntaxError:
# logger.warning('[AST] target php file exist SyntaxError...')
# logger.warning('[AST] [ERROR]:{e}'.format(e=traceback.format_exc()))
# return -1, "", ""
#
# all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)
all_nodes = ast_object.get_nodes(file_path)

# 做一次处理,解决Variable(Variable('$id'))的问题
while isinstance(param.name, php.Variable):
Expand Down Expand Up @@ -1052,9 +1042,9 @@ def analysis_binaryop_node(node, back_node, vul_function, vul_lineno, function_p

if file_path is not None:
# with open(file_path, 'r') as fi:
fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
code_content = fi.read()
is_co, cp, expr_lineno = anlysis_params(param, code_content, file_path, param_lineno,
# fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
# code_content = fi.read()
is_co, cp, expr_lineno = anlysis_params(param, file_path, param_lineno,
vul_function=vul_function)
else:
count = 0
Expand Down Expand Up @@ -1083,10 +1073,10 @@ def analysis_objectproperry_node(node, back_node, vul_function, vul_lineno, func
# is_co, cp, expr_lineno = parameters_back(param, back_node, function_params)
if file_path is not None:
# with open(file_path, 'r') as fi:
fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
code_content = fi.read()
# fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
# code_content = fi.read()

is_co, cp, expr_lineno = anlysis_params(param, code_content, file_path, param_lineno, vul_function=vul_function)
is_co, cp, expr_lineno = anlysis_params(param, file_path, param_lineno, vul_function=vul_function)
else:
count = 0
is_co, cp, expr_lineno = deep_parameters_back(node, back_node, function_params, count,
Expand Down Expand Up @@ -1131,10 +1121,10 @@ def analysis_functioncall_node(node, back_node, vul_function, vul_lineno, functi

if file_path is not None:
# with open(file_path, 'r') as fi:
fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
code_content = fi.read()
# fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
# code_content = fi.read()

is_co, cp, expr_lineno = anlysis_params(param, code_content, file_path, param_lineno,
is_co, cp, expr_lineno = anlysis_params(param, file_path, param_lineno,
vul_function=vul_function)
else:
count = 0
Expand All @@ -1161,10 +1151,10 @@ def analysis_variable_node(node, back_node, vul_function, vul_lineno, function_p

if file_path is not None:
# with open(file_path, 'r') as fi:
fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
code_content = fi.read()
# fi = codecs.open(file_path, 'r', encoding='utf-8', errors='ignore')
# code_content = fi.read()

is_co, cp, expr_lineno = anlysis_params(param, code_content, file_path, param_lineno, vul_function=vul_function)
is_co, cp, expr_lineno = anlysis_params(param, file_path, param_lineno, vul_function=vul_function)
else:
count = 0
is_co, cp, expr_lineno = deep_parameters_back(node, back_node, function_params, count, file_path,
Expand Down Expand Up @@ -1485,7 +1475,7 @@ def analysis(nodes, vul_function, back_node, vul_lineo, file_path=None, function
back_node.append(node)


def scan_parser(code_content, sensitive_func, vul_lineno, file_path, repair_functions=[]):
def scan_parser(sensitive_func, vul_lineno, file_path, repair_functions=[]):
"""
开始检测函数
:param repair_functions:
Expand All @@ -1499,8 +1489,7 @@ def scan_parser(code_content, sensitive_func, vul_lineno, file_path, repair_func
global scan_results, is_repair_functions
scan_results = []
is_repair_functions = repair_functions
parser = make_parser()
all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)
all_nodes = ast_object.get_nodes(file_path)

for func in sensitive_func: # 循环判断代码中是否存在敏感函数,若存在,递归判断参数是否可控;对文件内容循环判断多次
back_node = []
Expand Down
105 changes: 105 additions & 0 deletions cobra/pretreatment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/4/15 14:51
# @Author : LoRexxar
# @File : Pretreatment.py
# @Contact : lorexxar@gmail.com


from phply.phplex import lexer # 词法分析
from phply.phpparse import make_parser # 语法分析
from phply import phpast as php
from .log import logger

import os
import codecs
import traceback


class Pretreatment:

def __init__(self):
self.file_list = []
self.target_directory = ""

self.pre_result = {}
self.define_dict = {}

self.pre_ast()

def init_pre(self, target_directory, files):
self.file_list = files
self.target_directory = target_directory

self.target_directory = os.path.normpath(self.target_directory)

def pre_ast(self):

for fileext in self.file_list:

if ".php" == fileext[0]:
# 下面是对于php文件的处理逻辑
for filepath in fileext[1]['list']:

filepath = os.path.join(self.target_directory, filepath)
self.pre_result[filepath] = {}
self.pre_result[filepath]['language'] = 'php'

fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
code_content = fi.read()

self.pre_result[filepath]['content'] = code_content

try:
parser = make_parser()
all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True)

# 合并字典
self.pre_result[filepath]['ast_nodes'] = all_nodes

except SyntaxError as e:
logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

# 搜索所有的常量

for node in all_nodes:
if isinstance(node, php.FunctionCall) and node.name == "define":
define_params = node.params
logger.debug("[AST][Pretreatment] new define {}={}".format(define_params[0].node, define_params[1].node))
self.define_dict[define_params[0].node] = define_params[1].node

def get_nodes(self, filepath):
filepath = os.path.normpath(filepath)

if filepath in self.pre_result:
return self.pre_result[filepath]['ast_nodes']

elif self.target_directory + filepath in self.pre_result:
return self.pre_result[self.target_directory + filepath]['ast_nodes']

else:
logger.warning("[AST] file {} parser not found...".format(filepath))
return False

def get_content(self, filepath):

if filepath in self.pre_result:
return self.pre_result[filepath]['content']

else:
logger.warning("[AST] file {} parser not found...".format(filepath))
return False

def get_object(self):
return self

def get_define(self, define_name):
if define_name in self.define_dict:
return self.define_dict[define_name]

else:
logger.warning("[AST] [INCLUDE FOUND] Can't found this constart {}, pass it ".format(define_name))
return "not_found"


ast_object = Pretreatment()
6 changes: 5 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,8 @@
- 2018-08-31
- Cobra-W 1.1.0
- 更新了新的regex-return-regex模式

- 2019-04-16
- Cobra-W 1.2.0
- 修复了include节点中出现变量,无法正确回溯的问题
- 花大代价尝试重构关于ast处理部分,把ast处理整体提出
- 解决了之前无法检索define全局变量的问题
10 changes: 8 additions & 2 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
from cobra.parser import scan_parser
from cobra.parser import anlysis_params
from cobra.config import project_directory
from cobra.pretreatment import ast_object


files = [('.php', {'list': ["v_parser.php", "v.php"]})]
ast_object.init_pre(project_directory + '/tests/vulnerabilities/', files)
ast_object.pre_ast()


target_projects = project_directory + '/tests/vulnerabilities/v_parser.php'
Expand All @@ -33,8 +39,8 @@


def test_scan_parser():
assert scan_parser(code_contents, sensitive_func, lineno, target_projects)
assert scan_parser(sensitive_func, lineno, target_projects)


def test_anlysis_params():
assert anlysis_params(param, code_contents2, target_projects2, lineno2)
assert anlysis_params(param, target_projects2, lineno2)

0 comments on commit 949fa32

Please sign in to comment.