UdayLab · udayRage · Aug 16, 2024 · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/PAMI/AssociationRules/basic/confidence.py b/PAMI/AssociationRules/basic/confidence.py
@@ -153,7 +153,7 @@ class confidence:
     _memoryRSS = float()
     _associationRules = {}
 
-    def __init__(self, iFile, minConf, sep):
+    def __init__(self, iFile, minConf, sep="\t"):
         """
         :param iFile: input file name or path
         :type iFile: str

diff --git a/PAMI/extras/calculateMISValues/usingBeta.py b/PAMI/extras/calculateMISValues/usingBeta.py
@@ -46,7 +46,7 @@ class usingBeta():
                    Name of the Input file to get the patterns as DataFrame
     :param  beta: str :
                    Name of the output file to store complete set of frequent patterns
-    :param  threshold: int :
+    :param  LS: int :
                    The user can specify threshold either in count or proportion of database size. If the program detects the data type of threshold is integer, then it treats threshold is expressed in count.
     :param  sep: str :
                    This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.
@@ -66,13 +66,13 @@ class usingBeta():
     _iFile: str = ' '
     _beta: int = int()
     _sep: str = str()
-    _threshold: int = int()
+    _LS: int = int()
     _finalPatterns: dict = {}
 
-    def __init__(self, iFile: str, beta: int, threshold: int, sep: str):
+    def __init__(self, iFile: str, beta: int, LS: int, sep: str="\t"):
         self._iFile = iFile
         self._beta = beta
-        self._threshold = threshold
+        self._LS = LS
         self._sep = sep
         self._lno = 0
 
@@ -131,9 +131,9 @@ def calculateMIS(self) -> None:
             self._creatingItemSets()
             frequentItems = self._creatingFrequentItems()
             for x, y in frequentItems.items():
-              #self._finalPatterns[x] = min([y, self._threshold])
-                if y < self._threshold:
-                    self._finalPatterns[x] = self._threshold
+              #self._finalPatterns[x] = min([y, self._LS])
+                if y < self._LS:
+                    self._finalPatterns[x] = self._LS
                 else:
                     self._finalPatterns[x] = y
 

diff --git a/PAMI/extras/dbStats/TemporalDatabase.py b/PAMI/extras/dbStats/TemporalDatabase.py
@@ -41,7 +41,7 @@
 import numpy as np
 from urllib.request import urlopen
 from typing import Dict, Union
-
+import PAMI.extras.graph.plotLineGraphFromDictionary as plt
 
 class TemporalDatabase:
     """
@@ -442,7 +442,7 @@ def plotGraphs(self) -> None:
                              ['b', 'd', 'g', 'c', 'i'], ['b', 'd', 'g', 'e', 'j']]}
 
     # data = pd.DataFrame.from_dict('temporal_T10I4D100K.csv')
-    import PAMI.extras.graph.plotLineGraphFromDictionary as plt
+
 
     if len(sys.argv) < 3:
         print("Please provide two arguments.")

diff --git a/PAMI/extras/syntheticDataGenerator/TemporalDatabase.py b/PAMI/extras/syntheticDataGenerator/TemporalDatabase.py
@@ -32,7 +32,16 @@
 
 class TemporalDatabase:
     """
-    Creates a temporal database with transactions and timestamps.
+    :Description: - creates a temporal database with required parameter (e.g.,databaseSize, avgItemsPerTransaction, numItems and outputFile).
+                  - output can be printed in two ways either in text file or dataframe depending on the input type.
+
+    :Attributes:
+
+        :param databaseSize: int
+            number of transactions
+
+        :param avgItemsPerTransaction: int
+            average length of transactions
 
     This class generates a temporal database based on the given parameters and provides
     options to output the database in either a text file or a DataFrame format.
@@ -49,6 +58,7 @@ class TemporalDatabase:
 
     **Methods to execute code on terminal**
 
+
     Format:
 
         (.venv) $ python3 TemporalDatabase.py <numOfTransactions> <avgLenOfTransactions> <numItems> <outputFile> <percentage> <sep> <typeOfFile> <occurrenceProbabilityAtSameTimestamp> <occurrenceProbabilityToSkipSubsequentTimestamp>
@@ -58,6 +68,7 @@ class TemporalDatabase:
 
         (.venv) $ python3 TemporalDatabase.py 50 10 100 temporal.txt 50 \t database 0.1 0.1
 
+
     :param numOfTransactions: int
         Number of transactions to generate.
 
@@ -106,8 +117,8 @@ def __init__(self, numOfTransactions: int, avgLenOfTransactions: int,
         :param occurrenceProbabilityToSkipSubsequentTimestamp: Probability to skip subsequent timestamp.
         """
 
-        self.numOfTransactions = numOfTransactions
-        self.avgLenOfTransactions = avgLenOfTransactions
+        self.databaseSize = databaseSize
+        self.avgItemsPerTransaction = avgItemsPerTransaction
         self.numItems = numItems
         self.outputFile = outputFile
         if percentage > 1:
@@ -176,6 +187,7 @@ def create(self) -> None:
         """
         db = []
         lineSize = []
+
         self.current_timestamp = 0  # Initialize current timestamp
 
         for i in range(self.numOfTransactions):
@@ -193,10 +205,12 @@ def create(self) -> None:
                 lineSize.append([i, 0])
 
         sumRes = self.numOfTransactions * self.avgLenOfTransactions
+
         self.tuning(lineSize, sumRes)
 
         for i in range(len(lineSize)):
             if lineSize[i][1] > self.numItems:
+
                 raise ValueError(
                     "Error: Either increase numItems or decrease avgLenOfTransactions or modify percentage")
             line = np.random.choice(range(1, self.numItems + 1), lineSize[i][1], replace=False)

diff --git a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py
@@ -87,7 +87,7 @@ class TransactionalDatabase:
 
     """
 
-    def __init__(self, databaseSize, avgItemsPerTransaction, numItems,seperator = "\t") -> None:
+    def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") -> None:
         """
         Initialize the transactional database with the given parameters
 
@@ -97,14 +97,14 @@ def __init__(self, databaseSize, avgItemsPerTransaction, numItems,seperator = "\
         :type avgItemsPerTransaction: int
         :param numItems: total number of items
         :type numItems: int
-        :param seperator: separator to distinguish the items in a transaction
-        :type seperator: str
+        :param sep: separator to distinguish the items in a transaction
+        :type sep: str
         """
 
         self.databaseSize = databaseSize
         self.avgItemsPerTransaction = avgItemsPerTransaction
         self.numItems = numItems
-        self.seperator = seperator
+        self.sep = sep
         self.db = []
 
     def _generateArray(self, nums, avg, maxItems) -> list:
@@ -171,7 +171,7 @@ def save(self, filename) -> None:
 
         with open(filename, 'w') as f:
             for line in self.db:
-                f.write(str(self.seperator).join(map(str, line)) + '\n')
+                f.write(str(self.sep).join(map(str, line)) + '\n')
 
     def getTransactions(self, sep = "\t") -> pd.DataFrame:
         """
@@ -197,5 +197,5 @@ def getTransactions(self, sep = "\t") -> pd.DataFrame:
         obj.create()
         obj.save(sys.argv[5])
     else:
-        raise ValueError("Invalid number of arguments. Args: <numLines> <avgItemsPerLine> <numItems> <filename> or Args: <numLines> <avgItemsPerLine> <numItems> <seperator> <filename>")
+        raise ValueError("Invalid number of arguments. Args: <numLines> <avgItemsPerLine> <numItems> <filename> or Args: <numLines> <avgItemsPerLine> <numItems> <sep> <filename>")
 
diff --git a/PAMI/frequentPattern/basic/Apriori.py b/PAMI/frequentPattern/basic/Apriori.py
@@ -353,22 +353,22 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame:
 
         return dataFrame
 
-    def save(self, outFile: str, seperator = "\t" ) -> None:
+    def save(self, oFile: str, seperator = "\t" ) -> None:
         """
 
         Complete set of frequent patterns will be loaded in to an output file
 
-        :param outFile: name of the output file
-        :type outFile: csvfile
+        :param oFile: name of the output file
+        :type oFile: csvfile
         :return: None
         """
 
-        # self._oFile = outFile
+        # self._oFile = oFile
         # writer = open(self._oFile, 'w+')
         # for x, y in self._finalPatterns.items():
         #     patternsAndSupport = x.strip() + ":" + str(y[0])
         #     writer.write("%s \n" % patternsAndSupport)
-        with open(outFile, 'w') as f:
+        with open(oFile, 'w') as f:
             for x, y in self._finalPatterns.items():
                 x = seperator.join(x)
                 f.write(f"{x}:{y}\n")

diff --git a/PAMI/multipleMinimumSupportBasedFrequentPattern/basic/CFPGrowthPlus.py b/PAMI/multipleMinimumSupportBasedFrequentPattern/basic/CFPGrowthPlus.py
@@ -506,7 +506,7 @@ def startMine(self):
         """
         self.mine()
 
-    def Mine(self):
+    def mine(self):
         """
         main program to start the operation
 

diff --git a/PAMI/periodicCorrelatedPattern/basic/EPCPGrowth.py b/PAMI/periodicCorrelatedPattern/basic/EPCPGrowth.py
@@ -645,6 +645,45 @@ def startMine(self) -> None:
         self._memoryRSS = process.memory_info().rss
         print("Correlated Periodic-Frequent patterns were generated successfully using EPCPGrowth algorithm ")
 
+    def mine(self) -> None:
+        """
+        Mining process will start from this function
+        """
+
+        global _minSup, _maxPer, _minAllConf, _maxPerAllConf, _lno
+        self._startTime = _ab._time.time()
+        if self._iFile is None:
+            raise Exception("Please enter the file path or file name:")
+        if self._minSup is None:
+            raise Exception("Please enter the Minimum Support")
+        self._creatingItemSets()
+        self._minSup = self._convert(self._minSup)
+        self._minAllConf = float(self._minAllConf)
+        self._maxPer = self._convert(self._maxPer)
+        self._maxPerAllConf = float(self._maxPerAllConf)
+        _minSup, _minAllConf, _maxPer, _maxPerAllConf, _lno = self._minSup, self._minAllConf,  self._maxPer, self._maxPerAllConf, len(self._Database)
+        #print(_minSup, _minAllConf, _maxPer, _maxPerAllConf)
+        if self._minSup > len(self._Database):
+            raise Exception("Please enter the minSup in range between 0 to 1")
+        generatedItems, pfList = self._periodicFrequentOneItem()
+        updatedDatabases = self._updateDatabases(generatedItems)
+        for x, y in self._rank.items():
+            self._rankedUp[y] = x
+        info = {self._rank[k]: v for k, v in generatedItems.items()}
+        Tree = self._buildTree(updatedDatabases, info)
+        patterns = Tree.generatePatterns([])
+        self._finalPatterns = {}
+        for i in patterns:
+            sample = self._savePeriodic(i[0])
+            self._finalPatterns[sample] = i[1]
+        self._endTime = _ab._time.time()
+        process = _ab._psutil.Process(_ab._os.getpid())
+        self._memoryUSS = float()
+        self._memoryRSS = float()
+        self._memoryUSS = process.memory_full_info().uss
+        self._memoryRSS = process.memory_info().rss
+        print("Correlated Periodic-Frequent patterns were generated successfully using EPCPGrowth algorithm ")
+
     def getMemoryUSS(self) -> float:
         """
         Total amount of USS memory consumed by the mining process will be retrieved from this function

diff --git a/PAMI/periodicFrequentPattern/closed/CPFPMiner.py b/PAMI/periodicFrequentPattern/closed/CPFPMiner.py
@@ -499,7 +499,7 @@ def startMine(self):
         self._memoryRSS = process.memory_info().rss
         print("Closed periodic frequent patterns were generated successfully using CPFPMiner algorithm ")
 
-    def Mine(self):
+    def mine(self):
         """
         Mining process will start from here
         """

diff --git a/PAMI/periodicFrequentPattern/maximal/MaxPFGrowth.py b/PAMI/periodicFrequentPattern/maximal/MaxPFGrowth.py
@@ -725,7 +725,7 @@ def startMine(self) -> None:
         self._memoryRSS = _process.memory_info().rss
         print("Maximal Periodic Frequent patterns were generated successfully using MAX-PFPGrowth algorithm ")
 
-    def Mine(self) -> None:
+    def mine(self) -> None:
         """
         Mining process will start from this function
         :return: None

diff --git a/PAMI/periodicFrequentPattern/topk/kPFPMiner/kPFPMiner.py b/PAMI/periodicFrequentPattern/topk/kPFPMiner/kPFPMiner.py
@@ -392,6 +392,40 @@ def startMine(self):
         self._memoryUSS = process.memory_full_info().uss
         self._memoryRSS = process.memory_info().rss
 
+    def mine(self):
+        """
+        Main function of the program
+
+        """
+        self._startTime = _ab._time.time()
+        if self._iFile is None:
+            raise Exception("Please enter the file path or file name:")
+        if self._k is None:
+            raise Exception("Please enter the Minimum Support")
+        self._creatingItemSets()
+        self._k = self._convert(self._k)
+        plist = self._frequentOneItem()
+        for i in range(len(plist)):
+            itemI = plist[i]
+            tidSetI = self._tidList[itemI]
+            itemSetX = [itemI]
+            itemSets = []
+            tidSets = []
+            for j in range(i + 1, len(plist)):
+                itemJ = plist[j]
+                tidSetJ = self._tidList[itemJ]
+                y1 = list(set(tidSetI).intersection(tidSetJ))
+                if self.getPer_Sup(y1) <= self._maximum:
+                    itemSets.append(itemJ)
+                    tidSets.append(y1)
+            self._Generation(itemSetX, itemSets, tidSets)
+        print("kPFPMiner has successfully generated top-k frequent patterns")
+        self._endTime = _ab._time.time()
+        self._memoryUSS = float()
+        self._memoryRSS = float()
+        process = _ab._psutil.Process(_ab._os.getpid())
+        self._memoryUSS = process.memory_full_info().uss
+        self._memoryRSS = process.memory_info().rss
     def getMemoryUSS(self):
         """Total amount of USS memory consumed by the mining process will be retrieved from this function
 

diff --git a/PAMI/relativeFrequentPattern/basic/RSFPGrowth.py b/PAMI/relativeFrequentPattern/basic/RSFPGrowth.py
@@ -639,7 +639,7 @@ def startMine(self) -> None:
         self.__memoryUSS = process.memory_full_info().uss
         self.__memoryRSS = process.memory_info().rss
 
-    def Mine(self) -> None:
+    def mine(self) -> None:
             """
             Main program to start the operation
             :return: None