Skip to content

Commit f47bfaf

Browse files
committed
[colic] Support scancli
This code provides support to use scancli, a faster version of scancode. Signed-off-by: Valerio Cosentino <valcos@bitergia.com>
1 parent e0bea2a commit f47bfaf

File tree

3 files changed

+105
-13
lines changed

3 files changed

+105
-13
lines changed

graal/backends/core/analyzers/nomos.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ def __init__(self, exec_path):
4848
def analyze(self, **kwargs):
4949
"""Add information about license
5050
51-
:param file_path: file path
51+
:param file_paths: file path
5252
5353
:returns result: dict of the results of the analysis
5454
"""
5555
result = {'licenses': []}
56-
file_path = kwargs['file_path']
56+
file_path = kwargs['file_paths']
5757

5858
try:
5959
msg = subprocess.check_output([self.exec_path, file_path]).decode("utf-8")

graal/backends/core/analyzers/scancode.py

+72-3
Original file line numberDiff line numberDiff line change
@@ -28,31 +28,55 @@
2828
from .analyzer import Analyzer
2929

3030

31+
SCANCODE_CLI_EXEC = 'etc/scripts/scancli.py'
32+
CONFIGURE_EXEC = 'configure'
33+
34+
3135
class ScanCode(Analyzer):
3236
"""A wrapper for nexB/scancode-toolkit.
3337
3438
This class allows to call scancode-toolkit over a file, parses
3539
the result of the analysis and returns it as a dict.
3640
3741
:param exec_path: path of the scancode executable
42+
:param cli: True, if scancode_cli is used
3843
"""
3944
version = '0.1.0'
4045

41-
def __init__(self, exec_path):
46+
def __init__(self, exec_path, cli=False):
47+
self.cli = cli
4248
if not GraalRepository.exists(exec_path):
4349
raise GraalError(cause="executable path %s not valid" % exec_path)
4450

4551
self.exec_path = exec_path
4652

53+
if cli:
54+
exec_path = self.exec_path.replace(SCANCODE_CLI_EXEC, CONFIGURE_EXEC)
55+
_ = subprocess.check_output([exec_path, '--help']).decode("utf-8")
56+
4757
def analyze(self, **kwargs):
4858
"""Add information about license
4959
50-
:param file_path: file path
60+
:param file_paths: file paths
61+
62+
:returns result: dict of the results of the analysis
63+
"""
64+
if not self.cli:
65+
result = self.__analyze_scancode(**kwargs)
66+
else:
67+
result = self.__analyze_scancode_cli(**kwargs)
68+
69+
return result
70+
71+
def __analyze_scancode(self, **kwargs):
72+
"""Add information about license using scancode
73+
74+
:param file_paths: file paths
5175
5276
:returns result: dict of the results of the analysis
5377
"""
5478
result = {'licenses': []}
55-
file_path = kwargs['file_path']
79+
file_path = kwargs['file_paths']
5680

5781
try:
5882
msg = subprocess.check_output([self.exec_path, '--json-pp', '-', '--license', file_path]).decode("utf-8")
@@ -68,3 +92,48 @@ def analyze(self, **kwargs):
6892
result['licenses'] = licenses_raw['files'][0]['licenses']
6993

7094
return result
95+
96+
def __analyze_scancode_cli(self, **kwargs):
97+
"""Add information about license using scancode-cli
98+
99+
:param file_paths: file paths
100+
101+
:returns result: dict of the results of the analysis
102+
"""
103+
result = {'files': []}
104+
105+
try:
106+
cmd_scancli = ['python3', self.exec_path]
107+
cmd_scancli.extend(kwargs['file_paths'])
108+
msg = subprocess.check_output(cmd_scancli).decode("utf-8")
109+
except subprocess.CalledProcessError as e:
110+
raise GraalError(cause="Scancode failed at %s, %s" % (' '.join(kwargs['file_paths']),
111+
e.output.decode("utf-8")))
112+
finally:
113+
subprocess._cleanup()
114+
115+
if not msg:
116+
return {'files': []}
117+
118+
output_content = ''
119+
outputs_json = []
120+
for line in msg.split('\n'):
121+
if line == '':
122+
if output_content:
123+
output_json = json.loads(output_content)[1:]
124+
outputs_json.append(output_json)
125+
output_content = ''
126+
else:
127+
continue
128+
else:
129+
output_content += line
130+
131+
if output_content:
132+
output_json = json.loads(output_content)[1:]
133+
outputs_json.append(output_json)
134+
135+
for output_json in outputs_json:
136+
file_info = output_json[0]['files'][0]
137+
result['files'].append(file_info)
138+
139+
return result

graal/backends/core/colic.py

+31-8
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@
3333

3434
NOMOS = 'nomos'
3535
SCANCODE = 'scancode'
36+
SCANCODE_CLI = 'scancode_cli'
3637

3738
CATEGORY_COLIC_NOMOS = 'code_license_' + NOMOS
3839
CATEGORY_COLIC_SCANCODE = 'code_license_' + SCANCODE
40+
CATEGORY_COLIC_SCANCODE_CLI = 'code_license_' + SCANCODE_CLI
3941

4042
logger = logging.getLogger(__name__)
4143

@@ -44,7 +46,7 @@ class CoLic(Graal):
4446
"""CoLic backend.
4547
4648
This class extends the Graal backend. It gathers license information
47-
using Nomos
49+
using Nomos, Scancode or Scancode-cli
4850
4951
:param uri: URI of the Git repository
5052
:param git_path: path to the repository or to the log file
@@ -59,9 +61,9 @@ class CoLic(Graal):
5961
:raises RepositoryError: raised when there was an error cloning or
6062
updating the repository.
6163
"""
62-
version = '0.4.0'
64+
version = '0.5.0'
6365

64-
CATEGORIES = [CATEGORY_COLIC_NOMOS, CATEGORY_COLIC_SCANCODE]
66+
CATEGORIES = [CATEGORY_COLIC_NOMOS, CATEGORY_COLIC_SCANCODE, CATEGORY_COLIC_SCANCODE_CLI]
6567

6668
def __init__(self, uri, git_path, exec_path, worktreepath=DEFAULT_WORKTREE_PATH,
6769
entrypoint=None, in_paths=None, out_paths=None,
@@ -84,6 +86,8 @@ def fetch(self, category=CATEGORY_COLIC_NOMOS, paths=None,
8486

8587
if category == CATEGORY_COLIC_SCANCODE:
8688
self.analyzer_kind = SCANCODE
89+
elif category == CATEGORY_COLIC_SCANCODE_CLI:
90+
self.analyzer_kind = SCANCODE_CLI
8791
elif category == CATEGORY_COLIC_NOMOS:
8892
self.analyzer_kind = NOMOS
8993
else:
@@ -108,6 +112,8 @@ def metadata_category(item):
108112
return CATEGORY_COLIC_NOMOS
109113
elif item['analyzer'] == SCANCODE:
110114
return CATEGORY_COLIC_SCANCODE
115+
elif item['analyzer'] == SCANCODE_CLI:
116+
return CATEGORY_COLIC_SCANCODE_CLI
111117
else:
112118
raise GraalError(cause="Unknown analyzer %s" % item['analyzer'])
113119

@@ -135,6 +141,7 @@ def _analyze(self, commit):
135141
:param commit: a Perceval commit item
136142
"""
137143
analysis = []
144+
files_to_process = []
138145

139146
for committed_file in commit['files']:
140147

@@ -148,9 +155,23 @@ def _analyze(self, commit):
148155
if not GraalRepository.exists(local_path):
149156
continue
150157

151-
license_info = self.analyzer.analyze(local_path)
152-
license_info.update({'file_path': file_path})
153-
analysis.append(license_info)
158+
if self.analyzer_kind == NOMOS:
159+
license_info = self.analyzer.analyze(local_path)
160+
license_info.update({'file_path': file_path})
161+
analysis.append(license_info)
162+
elif self.analyzer_kind == SCANCODE:
163+
license_info = self.analyzer.analyze(local_path)
164+
license_info.update({'file_path': file_path})
165+
analysis.append(license_info)
166+
else:
167+
files_to_process.append((file_path, local_path))
168+
169+
if files_to_process:
170+
local_paths = [f[1] for f in files_to_process]
171+
analysis = self.analyzer.analyze(local_paths)
172+
173+
for i in range(len(analysis['files'])):
174+
analysis['files'][i]['file_path'] = files_to_process[i][0]
154175

155176
return analysis
156177

@@ -176,11 +197,13 @@ class LicenseAnalyzer:
176197
def __init__(self, exec_path, kind=NOMOS):
177198
if kind == SCANCODE:
178199
self.analyzer = ScanCode(exec_path)
200+
elif kind == SCANCODE_CLI:
201+
self.analyzer = ScanCode(exec_path, cli=True)
179202
else:
180203
self.analyzer = Nomos(exec_path)
181204

182205
def analyze(self, file_path):
183-
"""Analyze the content of a file using Nomos
206+
"""Analyze the content of a file using Nomos/Scancode
184207
185208
:param file_path: file path
186209
@@ -189,7 +212,7 @@ def analyze(self, file_path):
189212
'licenses': [..]
190213
}
191214
"""
192-
kwargs = {'file_path': file_path}
215+
kwargs = {'file_paths': file_path}
193216
analysis = self.analyzer.analyze(**kwargs)
194217

195218
return analysis

0 commit comments

Comments
 (0)