Skip to content

Commit

Permalink
metrics: modifty index interface of python kernel (#967)
Browse files Browse the repository at this point in the history
Signed-off-by: frank-zsy <syzhao1988@126.com>

Signed-off-by: frank-zsy <syzhao1988@126.com>
  • Loading branch information
frank-zsy authored Sep 13, 2022
1 parent 49846b8 commit 9ad3af4
Show file tree
Hide file tree
Showing 8 changed files with 502 additions and 289 deletions.
682 changes: 450 additions & 232 deletions notebook/handbook_py.ipynb

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions src/db/clickhouse.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import sys
import os
from easydict import EasyDict
from matplotlib.pyplot import get
from config import getConfig
from clickhouse_driver import Client

_client = None
def getClient():
global _client
if _client == None:
config = EasyDict((getConfig()).get('db').get('clickhouse'))
config = EasyDict(getConfig()).db.clickhouse
_client = Client(config.host, config.port, config.database, config.user, config.password)
return _client

Expand All @@ -19,4 +16,3 @@ def query(q):
def queryDataframe(q):
client = getClient()
return client.query_dataframe(q)

8 changes: 3 additions & 5 deletions src/db/neo4j_driver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from py2neo import Node, Relationship, Graph
import sys
import os
from py2neo import Graph
from easydict import EasyDict
from config import getConfig

Expand All @@ -9,10 +7,10 @@
def getClient():
global _driver
if _driver == None:
neo4j_config = EasyDict(EasyDict(getConfig()).db.neo4j)
neo4j_config = EasyDict(getConfig()).db.neo4j
_driver = Graph(neo4j_config.host)
return _driver

def query(query_sql):
result = getClient().run(query_sql) # return a cursor object
return result.data() # transform cursor to list
41 changes: 21 additions & 20 deletions src/metrics/activity_openrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@
REVIEW_COMMENT_WEIGHT = 4
PULL_MERGED_WEIGHT = 2

def getRepoActivityOrOpenrank(config, calType='activity'):
def getRepoOpenrank(config):
"""_summary_
Args:
config (QueryConfig): config of query.
calType (str, optional): type of metrics, 'activity' or 'open_rank'.
Returns:
neo4j cursor: query results of neo4j
"""
config = getMergedConfig(config)
calType = 'open_rank'
repoWhereClause = getRepoWhereClauseForNeo4j(config)
timeWhereClause = getTimeRangeWhereClauseForNeo4j(config, 'r')
timeActivityOrOpenrankClause = getTimeRangeSumClauseForNeo4j(config, 'r.{}'.format(calType))
Expand Down Expand Up @@ -66,16 +66,16 @@ def getRepoActivityOrOpenrank(config, calType='activity'):
i[calType] = np.around(i[calType])
return resultArr[0:config.get('limit')]

def getUserActivityOrOpenrank(config, calType='activity'):
def getUserOpenrank(config):
"""_summary_
Args:
config (QueryConfig): config of query.
calType (str, optional): type of metrics, 'activity' or 'open_rank'.
Returns:
neo4j cursor: query results of neo4j
"""
config = getMergedConfig(config)
calType = 'open_rank'
userWhereClause = getUserWhereClauseForNeo4j(config)
timeWhereClause = getTimeRangeWhereClauseForNeo4j(config, 'u')
timeActivityClause = getTimeRangeSumClauseForNeo4j(config, 'u.{}'.format(calType))
Expand All @@ -93,7 +93,7 @@ def getUserActivityOrOpenrank(config, calType='activity'):
sqrt({}*issue_comment + {}*open_issue + {}*open_pull + {}*review_comment + {}*merged_pull) AS activity \
'.format(ISSUE_COMMENT_WEIGHT, OPEN_ISSUE_WEIGHT, OPEN_PULL_WEIGHT, REVIEW_COMMENT_WEIGHT, PULL_MERGED_WEIGHT)

def getRepoActivityWithDetail(config):
def getRepoActivity(config):
config = getMergedConfig(config)
whereClauses = ["type IN ('IssuesEvent', 'IssueCommentEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent')"] # specify types to reduce memory usage and calculation
repoWhereClause = getRepoWhereClauseForClickhouse(config)
Expand All @@ -107,12 +107,12 @@ def getRepoActivityWithDetail(config):
{}, \
{} \
FROM \
(".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull' }),
(".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull', 'defaultValue': '0' }),
) + \
"SELECT \
{}, \
Expand All @@ -138,14 +138,15 @@ def getRepoActivityWithDetail(config):
FROM github_log.events \
WHERE {} \
GROUP BY repo_id, org_id, actor_id, month \
HAVING activity > 0\
HAVING activity > 0 \
) \
GROUP BY id, time\
{}\
) \
GROUP BY id \
ORDER BY activity[-1] {} \
FORMAT JSONCompact".format(ISSUE_COMMENT_WEIGHT, OPEN_ISSUE_WEIGHT, OPEN_PULL_WEIGHT, REVIEW_COMMENT_WEIGHT,
FORMAT JSONCompact".format(ISSUE_COMMENT_WEIGHT, OPEN_ISSUE_WEIGHT,
OPEN_PULL_WEIGHT, REVIEW_COMMENT_WEIGHT, PULL_MERGED_WEIGHT,
' AND '.join(whereClauses),
'ORDER BY activity DESC LIMIT {} BY time'.format(config.get('limit')) if config.get('limit') > 0 else '',
config.get('order')
Expand All @@ -166,7 +167,7 @@ def return_row(row):
}
return list(map(return_row, result))

def getUserActivityWithDetail(config = QueryConfig, withBot = True):
def getUserActivity(config = QueryConfig, withBot = True):
config = getMergedConfig(config)
whereClauses = ["type IN ('IssuesEvent', 'IssueCommentEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent')"] # specify types to reduce memory usage and calculation
userWhereClause = getUserWhereClauseForClickhouse(config)
Expand All @@ -180,12 +181,12 @@ def getUserActivityWithDetail(config = QueryConfig, withBot = True):
{}, \
{} \
FROM \
(".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull' })
(".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment', 'defaultValue': '0' }),
getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull', 'defaultValue': '0' })
) + \
"SELECT \
{}, \
Expand Down
19 changes: 16 additions & 3 deletions src/metrics/basic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from itertools import groupby
import db.clickhouse as clickhouse
from numpy import append
from label_data_utils import getGitHubData, getLabelData
import datetime
Expand Down Expand Up @@ -85,9 +86,17 @@ def process(l):
return '({})'.format(' OR '.join(arr))
repoWhereClauseArray = []
if config.get('repoIds'): repoWhereClauseArray.append('repo_id IN {}'.format(config.get('repoIds')))
if config.get('repoNames'): repoWhereClauseArray.append('repo_name IN {}'.format(config.get('repoNames')))
if config.get('repoNames'):
# find id first
sql = 'SELECT DISTINCT(repo_id) FROM github_log.events WHERE repo_name IN {}'.format(config.get('repoNames'))
ids = clickhouse.query(sql)
repoWhereClauseArray.append('repo_id IN {}'.format(list(map(lambda i: i[0], ids))))
if config.get('orgIds'): repoWhereClauseArray.append('org_id IN {}'.format(config.get('orgIds')))
if config.get('orgNames'): repoWhereClauseArray.append('org_name IN {}'.format(config.get('orgNames')))
if config.get('orgNames'):
# find id first
sql = 'SELECT DISTINCT(org_id) FROM github_log.events WHERE org_login IN {}'.format(config.get('orgNames'))
ids = clickhouse.query(sql)
repoWhereClauseArray.append('org_id IN {}'.format(list(map(lambda i: i[0], ids))))
if config.get('labelIntersect'):
return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
if config.get('labelUnion'):
Expand Down Expand Up @@ -123,7 +132,11 @@ def process(l):
return None
userWhereClauseArray = []
if config.get('userIds'): userWhereClauseArray.append('actor_id IN {}'.format(config.get('userIds')))
if config.get('userLogins'): userWhereClauseArray.append('actor_login IN {}'.format(config.get('userLogins')))
if config.get('userLogins'):
# get id first
sql = 'SELECT DISTINCT(actor_id) FROM github_log.events WHERE actor_login IN {}'.format(config.get('userLogins'))
ids = clickhouse.query(sql)
userWhereClauseArray.append('actor_id IN {}'.format(list(map(lambda i: i[0], ids))))
if config.get('labelIntersect'):
return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
if config.get('labelUnion'):
Expand Down
14 changes: 4 additions & 10 deletions src/metrics/index.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
import sys
import os
import chaoss
import activity_openrank
import attention
import related_users

def getRepoActivity(config):
return activity_openrank.getRepoActivityOrOpenrank(config, 'activity')
return activity_openrank.getRepoActivity(config)
def getUserActivity(config):
return activity_openrank.getUserActivityOrOpenrank(config, 'activity')
def getRepoActivityWithDetail(config):
return activity_openrank.getRepoActivityWithDetail(config)
def getUserActivityWithDetail(config):
return activity_openrank.getUserActivityWithDetail(config)
return activity_openrank.getUserActivity(config)
def getRepoOpenrank(config):
return activity_openrank.getRepoActivityOrOpenrank(config, 'open_rank')
return activity_openrank.getRepoOpenrank(config)
def getUserOpenrank(config):
return activity_openrank.getUserActivityOrOpenrank(config, 'open_rank')
return activity_openrank.getUserOpenrank(config)
def getRelatedUsers(config):
return related_users.getRelatedUsers(config)
def getAttention(config):
Expand Down
2 changes: 1 addition & 1 deletion src/metrics/related_users.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from basic import QueryConfig, getMergedConfig, getRepoWhereClauseForNeo4j, getTimeRangeWhereClauseForNeo4j
from basic import getMergedConfig, getRepoWhereClauseForNeo4j, getTimeRangeWhereClauseForNeo4j
import db.neo4j_driver as neo4j_driver

def getRelatedUsers(config):
Expand Down
19 changes: 6 additions & 13 deletions src/open_digger.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from functools import cmp_to_key
import db.neo4j_driver as neo4j_driver
import db.clickhouse as clickhouse
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots

class openDigger(object):
Expand All @@ -18,25 +16,22 @@ def __init__(self):
self.neo4j = neo4j_driver
class quick():
def showAll(self, repoName, startYear = 2015, endYear = 2021):
query_sql = "MATCH (r:Repo{name:\'"+str(repoName)+"\'}) RETURN r;"
data = openDigger().driver().neo4j.query(query_sql)[0]['r']
activityValues = []
openrankValues = []
config = { 'repoNames': [repoName], 'startYear': startYear, 'endYear': endYear, 'groupTimeRange': 'month' }
activity = func.getRepoActivity(config)
openrank = func.getRepoOpenrank(config)
for year in range(startYear, endYear + 1):
for month in range(1, 13):
k = '{}{}'.format(year, month)
activityValues.append(data.get('activity_{}'.format(k)))
openrankValues.append(data.get('open_rank_{}'.format(k)))
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
openDigger().render.Scatter(
y=activityValues,
y = activity[0].get('activity'),
mode="markers+lines",
name='activity'
name='activity'
))
fig.add_trace(
openDigger().render.Scatter(
y=openrankValues,
y = openrank[0].get('open_rank'),
mode="markers+lines",
name='openrank'
), secondary_y=True)
Expand All @@ -59,8 +54,6 @@ class index():
class activity():
def getRepoActivity(self, config): return func.getRepoActivity(config)
def getUserActivity(self, config): return func.getUserActivity(config)
def getRepoActivityWithDetail(self, config): return func.getRepoActivityWithDetail(config)
def getUserActivityWithDetail(self, config): return func.getUserActivityWithDetail(config)
class openrank():
def getRepoOpenrank(self, config): return func.getRepoOpenrank(config)
def getUserOpenrank(self, config): return func.getUserOpenrank(config)
Expand Down

0 comments on commit 9ad3af4

Please sign in to comment.