Skip to content

Commit a516b7d

Browse files
committed
[integration] Add support of Graal's CoLic Backend to ELK
Signed-off-by: inishchith <inishchith@gmail.com>
1 parent 78e466c commit a516b7d

File tree

6 files changed

+521
-0
lines changed

6 files changed

+521
-0
lines changed

grimoire_elk/enriched/colic.py

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2015-2019 Bitergia
4+
#
5+
# This program is free software; you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation; either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program; if not, write to the Free Software
17+
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18+
#
19+
# Authors:
20+
# Nishchith Shetty <inishchith@gmail.com>
21+
#
22+
23+
import logging
24+
from .enrich import Enrich, metadata
25+
from grimoirelab_toolkit.datetime import str_to_datetime
26+
27+
28+
MAX_SIZE_BULK_ENRICHED_ITEMS = 200
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class ColicEnrich(Enrich):
34+
35+
def get_identities(self, item):
36+
""" Return the identities from an item """
37+
identities = []
38+
39+
return identities
40+
41+
def has_identities(self):
42+
""" Return whether the enriched items contains identities """
43+
44+
return False
45+
46+
def get_field_unique_id(self):
47+
return "id"
48+
49+
def extract_modules(self, file_path):
50+
""" Extracts module path from the given file path """
51+
path_chunks = file_path.split('/')
52+
53+
modules = []
54+
for idx in range(len(path_chunks)):
55+
sub_path = '/'.join(path_chunks[:idx])
56+
57+
if sub_path:
58+
modules.append(sub_path)
59+
60+
return modules
61+
62+
@metadata
63+
def get_rich_item(self, file_analysis):
64+
# TODO: requires adjustments regarding category of backend used
65+
66+
eitem = {}
67+
68+
# entry["holders"] = file_analysis["holders"][0]["value"]
69+
eitem["file_path"] = file_analysis["file_path"]
70+
eitem["modules"] = self.extract_modules(eitem["file_path"])
71+
eitem["copyrights"] = []
72+
eitem["licenses"] = []
73+
eitem["license_name"] = []
74+
eitem["has_license"] = 0
75+
eitem["has_copyright"] = 0
76+
77+
if file_analysis.get("licenses", False):
78+
eitem["has_license"] = 1
79+
for _license in file_analysis["licenses"]:
80+
eitem["licenses"].extend(_license["matched_rule"]["licenses"])
81+
eitem["license_name"].append(_license["name"])
82+
83+
if file_analysis.get("copyrights", False):
84+
eitem["has_copyright"] = 1
85+
for _copyright in file_analysis["copyrights"]:
86+
eitem["copyrights"].append(_copyright["value"])
87+
88+
return eitem
89+
90+
def get_rich_items(self, item):
91+
# The real data
92+
entry = item['data']
93+
94+
enriched_items = []
95+
96+
for file_analysis in entry["analysis"]:
97+
eitem = self.get_rich_item(file_analysis)
98+
99+
for f in self.RAW_FIELDS_COPY:
100+
if f in item:
101+
eitem[f] = item[f]
102+
else:
103+
eitem[f] = None
104+
105+
# common attributes
106+
eitem['commit_sha'] = entry['commit']
107+
eitem['author'] = entry['Author']
108+
eitem['committer'] = entry['Commit']
109+
eitem['commit'] = entry['commit']
110+
eitem['message'] = entry['message']
111+
eitem['author_date'] = self.__fix_field_date(entry['AuthorDate'])
112+
eitem['commit_date'] = self.__fix_field_date(entry['CommitDate'])
113+
114+
if self.prjs_map:
115+
eitem.update(self.get_item_project(eitem))
116+
117+
# uuid
118+
eitem['id'] = "{}_{}".format(eitem['commit_sha'], eitem['file_path'])
119+
120+
eitem.update(self.get_grimoire_fields(entry["AuthorDate"], "file"))
121+
122+
self.add_repository_labels(eitem)
123+
self.add_metadata_filter_raw(eitem)
124+
125+
enriched_items.append(eitem)
126+
127+
return enriched_items
128+
129+
def enrich_items(self, ocean_backend, events=False):
130+
items_to_enrich = []
131+
num_items = 0
132+
ins_items = 0
133+
134+
for item in ocean_backend.fetch():
135+
rich_items = self.get_rich_items(item)
136+
137+
items_to_enrich.extend(rich_items)
138+
if len(items_to_enrich) < MAX_SIZE_BULK_ENRICHED_ITEMS:
139+
continue
140+
141+
num_items += len(items_to_enrich)
142+
ins_items += self.elastic.bulk_upload(items_to_enrich, self.get_field_unique_id())
143+
items_to_enrich = []
144+
145+
if len(items_to_enrich) > 0:
146+
num_items += len(items_to_enrich)
147+
ins_items += self.elastic.bulk_upload(items_to_enrich, self.get_field_unique_id())
148+
149+
if num_items != ins_items:
150+
missing = num_items - ins_items
151+
logger.error("%s/%s missing items for CoLic", str(missing), str(num_items))
152+
else:
153+
logger.info("%s items inserted for CoLic", str(num_items))
154+
155+
return num_items
156+
157+
def __fix_field_date(self, date_value):
158+
"""Fix possible errors in the field date"""
159+
160+
field_date = str_to_datetime(date_value)
161+
162+
try:
163+
_ = int(field_date.strftime("%z")[0:3])
164+
except ValueError:
165+
field_date = field_date.replace(tzinfo=None)
166+
167+
return field_date.isoformat()

grimoire_elk/raw/colic.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2015-2019 Bitergia
4+
#
5+
# This program is free software; you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation; either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program; if not, write to the Free Software
17+
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18+
#
19+
# Authors:
20+
# Nishchith Shetty <inishchith@gmail.com>
21+
#
22+
23+
from .elastic import ElasticOcean
24+
from ..elastic_mapping import Mapping as BaseMapping
25+
26+
27+
class Mapping(BaseMapping):
28+
29+
@staticmethod
30+
def get_elastic_mappings(es_major):
31+
"""Get Elasticsearch mapping.
32+
33+
Ensure data.message is string, since it can be very large
34+
35+
:param es_major: major version of Elasticsearch, as string
36+
:returns: dictionary with a key, 'items', with the mapping
37+
"""
38+
39+
mapping = '''
40+
{
41+
"dynamic":true,
42+
"properties": {
43+
"data": {
44+
"properties": {
45+
"message": {
46+
"type": "text",
47+
"index": true
48+
}
49+
}
50+
}
51+
}
52+
}
53+
'''
54+
55+
return {"items": mapping}
56+
57+
58+
class ColicOcean(ElasticOcean):
59+
"""CoLic Ocean feeder"""
60+
61+
mapping = Mapping
62+
63+
@classmethod
64+
def get_perceval_params_from_url(cls, url):
65+
params = []
66+
tokens = url.split(' ', 1) # Just split the URL not the filter
67+
url = tokens[0]
68+
params.append(url)
69+
70+
return params

grimoire_elk/utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
from grimoire_elk.elastic import ElasticConnectException
3131
from grimoire_elk.elastic import ElasticSearch
32+
# Connectors for Graal
33+
from graal.backends.core.colic import CoLic, CoLicCommand
3234
# Connectors for Perceval
3335
from grimoire_elk.raw.hyperkitty import HyperKittyOcean
3436
from perceval.backends.core.askbot import Askbot, AskbotCommand
@@ -68,6 +70,7 @@
6870
from perceval.backends.mozilla.remo import ReMo, ReMoCommand
6971
from perceval.backends.opnfv.functest import Functest, FunctestCommand
7072
# Connectors for EnrichOcean
73+
from .enriched.colic import ColicEnrich
7174
from .enriched.askbot import AskbotEnrich
7275
from .enriched.bugzilla import BugzillaEnrich
7376
from .enriched.bugzillarest import BugzillaRESTEnrich
@@ -105,6 +108,7 @@
105108
from .enriched.telegram import TelegramEnrich
106109
from .enriched.twitter import TwitterEnrich
107110
# Connectors for Ocean
111+
from .raw.colic import ColicOcean
108112
from .raw.askbot import AskbotOcean
109113
from .raw.bugzilla import BugzillaOcean
110114
from .raw.bugzillarest import BugzillaRESTOcean
@@ -200,6 +204,7 @@ def get_connectors():
200204
return {"askbot": [Askbot, AskbotOcean, AskbotEnrich, AskbotCommand],
201205
"bugzilla": [Bugzilla, BugzillaOcean, BugzillaEnrich, BugzillaCommand],
202206
"bugzillarest": [BugzillaREST, BugzillaRESTOcean, BugzillaRESTEnrich, BugzillaRESTCommand],
207+
"colic": [CoLic, ColicOcean, ColicEnrich, CoLicCommand],
203208
"confluence": [Confluence, ConfluenceOcean, ConfluenceEnrich, ConfluenceCommand],
204209
"crates": [Crates, CratesOcean, CratesEnrich, CratesCommand],
205210
"discourse": [Discourse, DiscourseOcean, DiscourseEnrich, DiscourseCommand],

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ urllib3==1.24.3
88
-e git+https://github.com/chaoss/grimoirelab-cereslib/#egg=grimoirelab-cereslib
99
-e git+https://github.com/chaoss/grimoirelab-kingarthur/#egg=grimoirelab-kingarthur
1010
-e git+https://github.com/chaoss/grimoirelab-perceval/#egg=grimoirelab-perceval
11+
-e git+https://github.com/chaoss/grimoirelab-graal/#egg=grimoirelab-graal
1112
-e git+https://github.com/chaoss/grimoirelab-perceval-mozilla/#egg=grimoirelab-perceval-mozilla
1213
-e git+https://github.com/chaoss/grimoirelab-perceval-opnfv/#egg=grimoirelab-perceval-opnfv
1314
-e git+https://github.com/chaoss/grimoirelab-perceval-puppet/#egg=grimoirelab-perceval-puppet

0 commit comments

Comments
 (0)