Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fix for the book #529

Merged
merged 14 commits into from
Aug 16, 2024
2 changes: 1 addition & 1 deletion PAMI/AssociationRules/basic/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ class confidence:
_memoryRSS = float()
_associationRules = {}

def __init__(self, iFile, minConf, sep):
def __init__(self, iFile, minConf, sep="\t"):
"""
:param iFile: input file name or path
:type iFile: str
Expand Down
14 changes: 7 additions & 7 deletions PAMI/extras/calculateMISValues/usingBeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class usingBeta():
Name of the Input file to get the patterns as DataFrame
:param beta: str :
Name of the output file to store complete set of frequent patterns
:param threshold: int :
:param LS: int :
The user can specify threshold either in count or proportion of database size. If the program detects the data type of threshold is integer, then it treats threshold is expressed in count.
:param sep: str :
This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.
Expand All @@ -66,13 +66,13 @@ class usingBeta():
_iFile: str = ' '
_beta: int = int()
_sep: str = str()
_threshold: int = int()
_LS: int = int()
_finalPatterns: dict = {}

def __init__(self, iFile: str, beta: int, threshold: int, sep: str):
def __init__(self, iFile: str, beta: int, LS: int, sep: str="\t"):
self._iFile = iFile
self._beta = beta
self._threshold = threshold
self._LS = LS
self._sep = sep
self._lno = 0

Expand Down Expand Up @@ -131,9 +131,9 @@ def calculateMIS(self) -> None:
self._creatingItemSets()
frequentItems = self._creatingFrequentItems()
for x, y in frequentItems.items():
#self._finalPatterns[x] = min([y, self._threshold])
if y < self._threshold:
self._finalPatterns[x] = self._threshold
#self._finalPatterns[x] = min([y, self._LS])
if y < self._LS:
self._finalPatterns[x] = self._LS
else:
self._finalPatterns[x] = y

Expand Down
4 changes: 2 additions & 2 deletions PAMI/extras/dbStats/TemporalDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import numpy as np
from urllib.request import urlopen
from typing import Dict, Union

import PAMI.extras.graph.plotLineGraphFromDictionary as plt

class TemporalDatabase:
"""
Expand Down Expand Up @@ -442,7 +442,7 @@ def plotGraphs(self) -> None:
['b', 'd', 'g', 'c', 'i'], ['b', 'd', 'g', 'e', 'j']]}

# data = pd.DataFrame.from_dict('temporal_T10I4D100K.csv')
import PAMI.extras.graph.plotLineGraphFromDictionary as plt


if len(sys.argv) < 3:
print("Please provide two arguments.")
Expand Down
20 changes: 17 additions & 3 deletions PAMI/extras/syntheticDataGenerator/TemporalDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,16 @@

class TemporalDatabase:
"""
Creates a temporal database with transactions and timestamps.
:Description: - creates a temporal database with required parameter (e.g.,databaseSize, avgItemsPerTransaction, numItems and outputFile).
- output can be printed in two ways either in text file or dataframe depending on the input type.

:Attributes:

:param databaseSize: int
number of transactions

:param avgItemsPerTransaction: int
average length of transactions

This class generates a temporal database based on the given parameters and provides
options to output the database in either a text file or a DataFrame format.
Expand All @@ -49,6 +58,7 @@ class TemporalDatabase:

**Methods to execute code on terminal**


Format:

(.venv) $ python3 TemporalDatabase.py <numOfTransactions> <avgLenOfTransactions> <numItems> <outputFile> <percentage> <sep> <typeOfFile> <occurrenceProbabilityAtSameTimestamp> <occurrenceProbabilityToSkipSubsequentTimestamp>
Expand All @@ -58,6 +68,7 @@ class TemporalDatabase:

(.venv) $ python3 TemporalDatabase.py 50 10 100 temporal.txt 50 \t database 0.1 0.1


:param numOfTransactions: int
Number of transactions to generate.

Expand Down Expand Up @@ -106,8 +117,8 @@ def __init__(self, numOfTransactions: int, avgLenOfTransactions: int,
:param occurrenceProbabilityToSkipSubsequentTimestamp: Probability to skip subsequent timestamp.
"""

self.numOfTransactions = numOfTransactions
self.avgLenOfTransactions = avgLenOfTransactions
self.databaseSize = databaseSize
self.avgItemsPerTransaction = avgItemsPerTransaction
self.numItems = numItems
self.outputFile = outputFile
if percentage > 1:
Expand Down Expand Up @@ -176,6 +187,7 @@ def create(self) -> None:
"""
db = []
lineSize = []

self.current_timestamp = 0 # Initialize current timestamp

for i in range(self.numOfTransactions):
Expand All @@ -193,10 +205,12 @@ def create(self) -> None:
lineSize.append([i, 0])

sumRes = self.numOfTransactions * self.avgLenOfTransactions

self.tuning(lineSize, sumRes)

for i in range(len(lineSize)):
if lineSize[i][1] > self.numItems:

raise ValueError(
"Error: Either increase numItems or decrease avgLenOfTransactions or modify percentage")
line = np.random.choice(range(1, self.numItems + 1), lineSize[i][1], replace=False)
Expand Down
12 changes: 6 additions & 6 deletions PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class TransactionalDatabase:

"""

def __init__(self, databaseSize, avgItemsPerTransaction, numItems,seperator = "\t") -> None:
def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") -> None:
"""
Initialize the transactional database with the given parameters

Expand All @@ -97,14 +97,14 @@ def __init__(self, databaseSize, avgItemsPerTransaction, numItems,seperator = "\
:type avgItemsPerTransaction: int
:param numItems: total number of items
:type numItems: int
:param seperator: separator to distinguish the items in a transaction
:type seperator: str
:param sep: separator to distinguish the items in a transaction
:type sep: str
"""

self.databaseSize = databaseSize
self.avgItemsPerTransaction = avgItemsPerTransaction
self.numItems = numItems
self.seperator = seperator
self.sep = sep
self.db = []

def _generateArray(self, nums, avg, maxItems) -> list:
Expand Down Expand Up @@ -171,7 +171,7 @@ def save(self, filename) -> None:

with open(filename, 'w') as f:
for line in self.db:
f.write(str(self.seperator).join(map(str, line)) + '\n')
f.write(str(self.sep).join(map(str, line)) + '\n')

def getTransactions(self, sep = "\t") -> pd.DataFrame:
"""
Expand All @@ -197,5 +197,5 @@ def getTransactions(self, sep = "\t") -> pd.DataFrame:
obj.create()
obj.save(sys.argv[5])
else:
raise ValueError("Invalid number of arguments. Args: <numLines> <avgItemsPerLine> <numItems> <filename> or Args: <numLines> <avgItemsPerLine> <numItems> <seperator> <filename>")
raise ValueError("Invalid number of arguments. Args: <numLines> <avgItemsPerLine> <numItems> <filename> or Args: <numLines> <avgItemsPerLine> <numItems> <sep> <filename>")

10 changes: 5 additions & 5 deletions PAMI/frequentPattern/basic/Apriori.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,22 +353,22 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame:

return dataFrame

def save(self, outFile: str, seperator = "\t" ) -> None:
def save(self, oFile: str, seperator = "\t" ) -> None:
"""

Complete set of frequent patterns will be loaded in to an output file

:param outFile: name of the output file
:type outFile: csvfile
:param oFile: name of the output file
:type oFile: csvfile
:return: None
"""

# self._oFile = outFile
# self._oFile = oFile
# writer = open(self._oFile, 'w+')
# for x, y in self._finalPatterns.items():
# patternsAndSupport = x.strip() + ":" + str(y[0])
# writer.write("%s \n" % patternsAndSupport)
with open(outFile, 'w') as f:
with open(oFile, 'w') as f:
for x, y in self._finalPatterns.items():
x = seperator.join(x)
f.write(f"{x}:{y}\n")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def startMine(self):
"""
self.mine()

def Mine(self):
def mine(self):
"""
main program to start the operation

Expand Down
39 changes: 39 additions & 0 deletions PAMI/periodicCorrelatedPattern/basic/EPCPGrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,45 @@ def startMine(self) -> None:
self._memoryRSS = process.memory_info().rss
print("Correlated Periodic-Frequent patterns were generated successfully using EPCPGrowth algorithm ")

def mine(self) -> None:
"""
Mining process will start from this function
"""

global _minSup, _maxPer, _minAllConf, _maxPerAllConf, _lno
self._startTime = _ab._time.time()
if self._iFile is None:
raise Exception("Please enter the file path or file name:")
if self._minSup is None:
raise Exception("Please enter the Minimum Support")
self._creatingItemSets()
self._minSup = self._convert(self._minSup)
self._minAllConf = float(self._minAllConf)
self._maxPer = self._convert(self._maxPer)
self._maxPerAllConf = float(self._maxPerAllConf)
_minSup, _minAllConf, _maxPer, _maxPerAllConf, _lno = self._minSup, self._minAllConf, self._maxPer, self._maxPerAllConf, len(self._Database)
#print(_minSup, _minAllConf, _maxPer, _maxPerAllConf)
if self._minSup > len(self._Database):
raise Exception("Please enter the minSup in range between 0 to 1")
generatedItems, pfList = self._periodicFrequentOneItem()
updatedDatabases = self._updateDatabases(generatedItems)
for x, y in self._rank.items():
self._rankedUp[y] = x
info = {self._rank[k]: v for k, v in generatedItems.items()}
Tree = self._buildTree(updatedDatabases, info)
patterns = Tree.generatePatterns([])
self._finalPatterns = {}
for i in patterns:
sample = self._savePeriodic(i[0])
self._finalPatterns[sample] = i[1]
self._endTime = _ab._time.time()
process = _ab._psutil.Process(_ab._os.getpid())
self._memoryUSS = float()
self._memoryRSS = float()
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss
print("Correlated Periodic-Frequent patterns were generated successfully using EPCPGrowth algorithm ")

def getMemoryUSS(self) -> float:
"""
Total amount of USS memory consumed by the mining process will be retrieved from this function
Expand Down
2 changes: 1 addition & 1 deletion PAMI/periodicFrequentPattern/closed/CPFPMiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def startMine(self):
self._memoryRSS = process.memory_info().rss
print("Closed periodic frequent patterns were generated successfully using CPFPMiner algorithm ")

def Mine(self):
def mine(self):
"""
Mining process will start from here
"""
Expand Down
2 changes: 1 addition & 1 deletion PAMI/periodicFrequentPattern/maximal/MaxPFGrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ def startMine(self) -> None:
self._memoryRSS = _process.memory_info().rss
print("Maximal Periodic Frequent patterns were generated successfully using MAX-PFPGrowth algorithm ")

def Mine(self) -> None:
def mine(self) -> None:
"""
Mining process will start from this function
:return: None
Expand Down
34 changes: 34 additions & 0 deletions PAMI/periodicFrequentPattern/topk/kPFPMiner/kPFPMiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,40 @@ def startMine(self):
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss

def mine(self):
"""
Main function of the program

"""
self._startTime = _ab._time.time()
if self._iFile is None:
raise Exception("Please enter the file path or file name:")
if self._k is None:
raise Exception("Please enter the Minimum Support")
self._creatingItemSets()
self._k = self._convert(self._k)
plist = self._frequentOneItem()
for i in range(len(plist)):
itemI = plist[i]
tidSetI = self._tidList[itemI]
itemSetX = [itemI]
itemSets = []
tidSets = []
for j in range(i + 1, len(plist)):
itemJ = plist[j]
tidSetJ = self._tidList[itemJ]
y1 = list(set(tidSetI).intersection(tidSetJ))
if self.getPer_Sup(y1) <= self._maximum:
itemSets.append(itemJ)
tidSets.append(y1)
self._Generation(itemSetX, itemSets, tidSets)
print("kPFPMiner has successfully generated top-k frequent patterns")
self._endTime = _ab._time.time()
self._memoryUSS = float()
self._memoryRSS = float()
process = _ab._psutil.Process(_ab._os.getpid())
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss
def getMemoryUSS(self):
"""Total amount of USS memory consumed by the mining process will be retrieved from this function

Expand Down
2 changes: 1 addition & 1 deletion PAMI/relativeFrequentPattern/basic/RSFPGrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def startMine(self) -> None:
self.__memoryUSS = process.memory_full_info().uss
self.__memoryRSS = process.memory_info().rss

def Mine(self) -> None:
def mine(self) -> None:
"""
Main program to start the operation
:return: None
Expand Down