From dce383911dfb94e9d0093ea45b6b393e9b7e2c39 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Thu, 6 Jun 2024 10:44:17 +0300 Subject: [PATCH] feat(#7): metrics.sh, metrics.py, install reqs --- steps/install.sh | 1 + steps/{structure.sh => metrics.py} | 30 ++++++++++++++++++++++++++---- steps/metrics.sh | 3 ++- steps/requirements.txt | 1 + 4 files changed, 30 insertions(+), 5 deletions(-) rename steps/{structure.sh => metrics.py} (60%) create mode 100644 steps/requirements.txt diff --git a/steps/install.sh b/steps/install.sh index 3e31ec1..cded1b0 100644 --- a/steps/install.sh +++ b/steps/install.sh @@ -24,4 +24,5 @@ set -e npm install -g ghminer@0.0.3 +pip3 install -r requirements.txt echo "All dependencies are installed and up to date! Now you can run 'make' and build the dataset." diff --git a/steps/structure.sh b/steps/metrics.py similarity index 60% rename from steps/structure.sh rename to steps/metrics.py index 30c8a45..57f436c 100644 --- a/steps/structure.sh +++ b/steps/metrics.py @@ -1,4 +1,3 @@ -#!/usr/bin/env bash # The MIT License (MIT) # # Copyright (c) 2024 Aliaksei Bialiauski @@ -20,7 +19,30 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import pandas as pd -set -e -set -o pipefail - +frame = pd.read_csv('results.csv') +frame["ic"] = frame["issues"] / frame["commits"] +# @todo #7:30min Compute CPD and RC metrics too. +# We need to compute CPD and RC metrics too, however for now we don't have +# the proper dataset after ghminer execution. Need to collect required data +# first, and then compute CPDs and RCs. Don't forget to remove this puzzle. +frame.drop( + columns=[ + "branch", + "createdAt", + "lastCommitDate", + "lastReleaseDate", + "contributors", + "pulls", + "commits", + "issues", + "forks", + "stars", + "license", + "language", + "diskUsage" + ], + inplace=True +) +frame.to_csv("repos.csv", index=False) diff --git a/steps/metrics.sh b/steps/metrics.sh index d086c0e..87bced3 100644 --- a/steps/metrics.sh +++ b/steps/metrics.sh @@ -24,4 +24,5 @@ set -e set -o pipefail -# CPD, RC, IC +# Compute CPD, RC, IC metrics. +python3 metrics.py diff --git a/steps/requirements.txt b/steps/requirements.txt new file mode 100644 index 0000000..4b2e332 --- /dev/null +++ b/steps/requirements.txt @@ -0,0 +1 @@ +pandas==2.2.2