diff --git a/tests/highUtilityFrequentPattern/basic/test_HUFIM.py b/tests/highUtilityFrequentPattern/basic/test_HUFIM.py new file mode 100644 index 00000000..f3d6b0fe --- /dev/null +++ b/tests/highUtilityFrequentPattern/basic/test_HUFIM.py @@ -0,0 +1,993 @@ +# HUFIM (High Utility Frequent Itemset Miner) algorithm helps us to mine High Utility Frequent ItemSets (HUFIs) from transactional databases. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# +# from PAMI.highUtilityFrequentPattern.basic import HUFIM as alg +# +# obj =alg.HUFIM("input.txt", 35, 20) +# +# obj.mine() +# +# Patterns = obj.getPatterns() +# +# print("Total number of high utility frequent Patterns:", len(Patterns)) +# +# obj.save("output") +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.highUtilityFrequentPattern.basic import abstract as _ab +from typing import List, Dict, Union +from deprecated import deprecated + + +class _Transaction: + """ + A class to store Transaction of a database + + :Attributes: + + items: list + A list of items in transaction + utilities: list + A list of utilities of items in transaction + transactionUtility: int + represent total sum of all utilities in the database + prefixUtility: + prefix Utility values of item + offset: + an offset pointer, used by projected transactions + support: + maintains the support of the transaction + + :Methods: + + projectedTransaction(offsetE) + A method to create new Transaction from existing starting from offsetE until the end + getItems() + return items in transaction + getUtilities() + return utilities in transaction + getLastPosition() + return last position in a transaction + removeUnpromisingItems() + A method to remove items which are having low values when compared with minUtil + insertionSort() + A method to sort all items in the transaction + getSupport() + returns the support of the transaction + """ + offset = 0 + prefixUtility = 0 + support = 1 + + def __init__(self, items: List[int], utilities: List[int], transactionUtility: int) -> None: + self.items = items + self.utilities = utilities + self.transactionUtility = transactionUtility + self.support = 1 + + def projectTransaction(self, offsetE: int) -> '_Transaction': + """ + A method to create new Transaction from existing transaction starting from offsetE until the end + + :param offsetE: an offset over the original transaction for projecting the transaction + :type offsetE: int + :return: a new transaction starting from offsetE until the end of the transaction + :rtype: _Transaction + """ + new_transaction = _Transaction(self.items, self.utilities, self.transactionUtility) + utilityE = self.utilities[offsetE] + new_transaction.prefixUtility = self.prefixUtility + utilityE + new_transaction.transactionUtility = self.transactionUtility - utilityE + new_transaction.support = self.support + for i in range(self.offset, offsetE): + new_transaction.transactionUtility -= self.utilities[i] + new_transaction.offset = offsetE + 1 + return new_transaction + + def getItems(self) -> List[int]: + """ + A method to return items in transaction + + :return: the list of items in transaction starting from offsetE until the end of the transactions + :rtype: list + """ + return self.items + + def getUtilities(self) -> List[int]: + """ + A method to return utilities in transaction + + :return: the list of utilities in transaction starting from offsetE until the end of the transaction + :rtype: list + """ + return self.utilities + + def getLastPosition(self) -> int: + """ + A method to return last position in a transaction + :return: the last position in a transaction + :rtype: int + """ + + return len(self.items) - 1 + + def getSupport(self) -> int: + """ + A method to return support in a transaction + + :return: the support in a transaction + :rtype: int + """ + + return self.support + + def removeUnpromisingItems(self, oldNamesToNewNames: Dict[int, int]) -> None: + """ + A method to remove items which are not present in the map passed to the function + + :param oldNamesToNewNames: A map represent old names to new names + :type oldNamesToNewNames: map + :return: None + """ + tempItems = [] + tempUtilities = [] + for idx, item in enumerate(self.items): + if item in oldNamesToNewNames: + tempItems.append(oldNamesToNewNames[item]) + tempUtilities.append(self.utilities[idx]) + else: + self.transactionUtility -= self.utilities[idx] + self.items = tempItems + self.utilities = tempUtilities + self.insertionSort() + + def insertionSort(self) -> None: + """ + A method to sort items in order + + :return: None + """ + for i in range(1, len(self.items)): + key = self.items[i] + utilityJ = self.utilities[i] + j = i - 1 + while j >= 0 and key < self.items[j]: + self.items[j + 1] = self.items[j] + self.utilities[j + 1] = self.utilities[j] + j -= 1 + self.items[j + 1] = key + self.utilities[j + 1] = utilityJ + + +class _Dataset: + """ + A class represent the list of transactions in this dataset + + :Attributes: + + transactions : + the list of transactions in this dataset + maxItem: + the largest item name + + :methods: + + createTransaction(line) + Create a transaction object from a line from the input file + getMaxItem() + return Maximum Item + getTransactions() + return transactions in database + + """ + transactions = [] + maxItem = 0 + + def __init__(self, datasetPath: Union[str, _ab._pd.DataFrame], sep: str) -> None: + self.strToInt = {} + self.intToStr = {} + self.cnt = 1 + self.sep = sep + self.createItemSets(datasetPath) + + def createItemSets(self, datasetPath: List[str]) -> None: + """ + Storing the complete transactions of the database/input file in a database variable + + :param datasetPath: list of paths to the input file to store + :type datasetPath: list + :return: None + + """ + self.Database = [] + self.transactions = [] + if isinstance(datasetPath, _ab._pd.DataFrame): + utilities, data, utilitySum = [], [], [] + if datasetPath.empty: + print("its empty..") + i = datasetPath.columns.values.tolist() + if 'Transactions' in i: + data = datasetPath['Transactions'].tolist() + if 'Utilities' in i: + utilities = datasetPath['Utilities'].tolist() + if 'UtilitySum' in i: + utilitySum = datasetPath['UtilitySum'].tolist() + for k in range(len(data)): + self.transactions.append(self.createTransaction(data[k], utilities[k], utilitySum[k])) + if isinstance(datasetPath, str): + if _ab._validators.url(datasetPath): + data = _ab._urlopen(datasetPath) + for line in data: + line = line.decode("utf-8") + trans_list = line.strip().split(':') + transactionUtility = int(trans_list[1]) + itemsString = trans_list[0].strip().split(self.sep) + itemsString = [x for x in itemsString if x] + utilityString = trans_list[2].strip().split(self.sep) + utilityString = [x for x in utilityString if x] + self.transactions.append(self.createTransaction(itemsString, utilityString, transactionUtility)) + else: + try: + with open(datasetPath, 'r', encoding='utf-8') as f: + for line in f: + trans_list = line.strip().split(':') + transactionUtility = int(trans_list[1]) + itemsString = trans_list[0].strip().split(self.sep) + itemsString = [x for x in itemsString if x] + utilityString = trans_list[2].strip().split(self.sep) + utilityString = [x for x in utilityString if x] + self.transactions.append( + self.createTransaction(itemsString, utilityString, transactionUtility)) + except IOError: + print("File Not Found") + quit() + + def createTransaction(self, items: List[str], utilities: List[str], utilitySum: int) -> _Transaction: + """ + A method to create Transaction from dataset given + + :param items: represent a single line of database + :type items: list + :param utilities: represent the utilities of items + :type utilities: list + :param utilitySum: represent the utilitySum + :type utilitySum: int + :return: a Transaction from given dataset + :rtype: _Transaction + """ + transactionUtility = utilitySum + itemsString = items + utilityString = utilities + items = [] + utilities = [] + for idx, item in enumerate(itemsString): + if self.strToInt.get(item) is None: + self.strToInt[item] = self.cnt + self.intToStr[self.cnt] = item + self.cnt += 1 + item_int = self.strToInt.get(item) + if item_int > self.maxItem: + self.maxItem = item_int + items.append(item_int) + utilities.append(int(utilityString[idx])) + return _Transaction(items, utilities, transactionUtility) + + def getMaxItem(self) -> int: + """ + A method to return name of the largest item + + :return: the name of the largest item in the dataset + :rtype: int + """ + return self.maxItem + + def getTransactions(self) -> List[_Transaction]: + """ + A method to return transactions from database + + :return: the list of transactions from database which have the highest utility + :rtype: list + """ + return self.transactions + + +class WrongNumberOfArguments(Exception): + pass + + +class HUFIM(_ab._utilityPatterns): + """ + :Description: HUFIM (High Utility Frequent Itemset Miner) algorithm helps us to mine High Utility Frequent ItemSets (HUFIs) from transactional databases. + + + :Reference: Kiran, R.U., Reddy, T.Y., Fournier-Viger, P., Toyoda, M., Reddy, P.K., & Kitsuregawa, M. (2019). + Efficiently Finding High Utility-Frequent Itemsets Using Cutoff and Suffix Utility. PAKDD 2019. + DOI: 10.1007/978-3-030-16145-3_15 + + + :param iFile: str : + Name of the Input file to mine complete set of Geo-referenced frequent sequence patterns + :param oFile: str : + Name of the output file to store complete set of Geo-referenced frequent sequence patterns + :param minSup: int or float or str : + The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float. + :param minUtil: int : + The user given minUtil value. + :param candidateCount: int + Number of candidates + :param maxMemory: int + Maximum memory used by this program for running + :param nFile: str : + Name of the input file to mine complete set of Geo-referenced frequent sequence patterns + :param sep: str : + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + + + :Attributes: + + iFile : file + Name of the input file to mine complete set of patterns + oFile : file + Name of the output file to store complete set of patterns + memoryRSS : float + To store the total amount of RSS memory consumed by the program + startTime:float + To record the start time of the mining process + endTime:float + To record the completion time of the mining process + minUtil : int + The user given minUtil value + minSup : float + The user given minSup value + highUtilityFrequentItemSets: map + set of high utility frequent itemSets + candidateCount: int + Number of candidates + utilityBinArrayLU: list + A map to hold the local utility values of the items in database + utilityBinArraySU: list + A map to hold the subtree utility values of the items is database + oldNamesToNewNames: list + A map which contains old names, new names of items as key value pairs + newNamesToOldNames: list + A map which contains new names, old names of items as key value pairs + singleItemSetsSupport: map + A map which maps from single itemsets (items) to their support + singleItemSetsUtility: map + A map which maps from single itemsets (items) to their utilities + maxMemory: float + Maximum memory used by this program for running + patternCount: int + Number of RHUI's + itemsToKeep: list + keep only the promising items i.e items that can extend other items to form RHUIs + itemsToExplore: list + list of items that needs to be explored + + :Methods: + + mine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + save(oFile) + Complete set of patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of patterns will be loaded in to a dataframe + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function + backTrackingHUFIM(transactionsOfP, itemsToKeep, itemsToExplore, prefixLength) + A method to mine the RHUIs Recursively + useUtilityBinArraysToCalculateUpperBounds(transactionsPe, j, itemsToKeep) + A method to calculate the sub-tree utility and local utility of all items that can extend itemSet P and e + output(tempPosition, utility) + A method to output a relative-high-utility itemSet to file or memory depending on what the user chose + isEqual(transaction1, transaction2) + A method to Check if two transaction are identical + useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(dataset) + A method to calculate the sub tree utility values for single items + sortDatabase(self, transactions) + A Method to sort transaction + sortTransaction(self, trans1, trans2) + A Method to sort transaction + useUtilityBinArrayToCalculateLocalUtilityFirstTime(self, dataset) + A method to calculate local utility values for single itemSets + + **Executing the code on terminal** + -------------------------------------------- + + .. code-block:: console + + Format: + + (.venv) $ python3 HUFIM.py + + Example Usage: + + (.venv) $ python3 HUFIM.py sampleTDB.txt output.txt 35 20 + + (.venv) $ python3 HUFIM.py sampleTDB.txt output.txt 35 20 + + .. note:: minSup will be considered in percentage of database transactions + + + **Sample run of importing the code** + ----------------------------------------------- + .. code-block:: python + + from PAMI.highUtilityFrequentPattern.basic import HUFIM as alg + + obj=alg.HUFIM("input.txt", 35, 20) + + obj.mine() + + Patterns = obj.getPatterns() + + print("Total number of high utility frequent Patterns:", len(Patterns)) + + obj.save("output") + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + **Credits:** + -------------------- + The complete program was written by pradeep pallikila under the supervision of Professor Rage Uday Kiran. + + """ + + _highUtilityFrequentItemSets = [] + _candidateCount = 0 + _utilityBinArrayLU = {} + _utilityBinArraySU = {} + _oldNamesToNewNames = {} + _newNamesToOldNames = {} + _singleItemSetsSupport = {} + _singleItemSetsUtility = {} + _strToInt = {} + _intToStr = {} + _temp = [0] * 5000 + _patternCount = int() + _maxMemory = 0 + _startTime = float() + _endTime = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _nFile = " " + _lno = 0 + _sep = "\t" + _minUtil = 0 + _minSup = 0 + _memoryUSS = float() + _memoryRSS = float() + + def __init__(self, iFile: str, minUtil: Union[int, float], minSup: Union[int, float], sep: str = "\t") -> None: + super().__init__(iFile, minUtil, minSup, sep) + + def _convert(self, value) -> Union[int, float]: + """ + To convert the given user specified value + + :param value: user specified value + :type value: int or float or str + :return: converted value + :rtype: int or float + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._dataset.getTransactions()) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._dataset.getTransactions()) * value) + else: + value = int(value) + return value + + @deprecated( + "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self) -> None: + """ + High Utility Frequent Pattern mining start here + + :return: None + """ + self.mine() + + def mine(self) -> None: + """ + High Utility Frequent Pattern mining start here + + :return: None + """ + self._startTime = _ab._time.time() + self._finalPatterns = {} + self._dataset = [] + self._dataset = _Dataset(self._iFile, self._sep) + self._singleItemSetsSupport = _ab._defaultdict(int) + self._singleItemSetsUtility = _ab._defaultdict(int) + self._useUtilityBinArrayToCalculateLocalUtilityFirstTime(self._dataset) + self._minUtil = int(self._minUtil) + self._minSup = self._convert(self._minSup) + itemsToKeep = [] + for key in self._utilityBinArrayLU.keys(): + if self._utilityBinArrayLU[key] >= self._minUtil and self._singleItemSetsSupport[key] >= self._minSup: + itemsToKeep.append(key) + itemsToKeep = sorted(itemsToKeep, key=lambda x: self._singleItemSetsUtility[x], reverse=True) + currentName = 1 + for idx, item in enumerate(itemsToKeep): + self._oldNamesToNewNames[item] = currentName + self._newNamesToOldNames[currentName] = item + itemsToKeep[idx] = currentName + currentName += 1 + for transaction in self._dataset.getTransactions(): + transaction.removeUnpromisingItems(self._oldNamesToNewNames) + self._sortDatabase(self._dataset.getTransactions()) + emptyTransactionCount = 0 + for transaction in self._dataset.getTransactions(): + if len(transaction.getItems()) == 0: + emptyTransactionCount += 1 + self._dataset.transactions = self._dataset.transactions[emptyTransactionCount:] + # calculating suffix utility values + totalUtility = 0 + for item in itemsToKeep: + totalUtility += self._singleItemSetsUtility[self._newNamesToOldNames[item]] + # piItems + piItems = [] + for item in itemsToKeep: + if totalUtility >= self._minUtil: + piItems.append(item) + totalUtility -= self._singleItemSetsUtility[self._newNamesToOldNames[item]] + else: + break + self._useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(self._dataset) + itemsToExplore = [] + for item in piItems: + if self._utilityBinArraySU[item] >= self._minUtil: + itemsToExplore.append(item) + self._backTrackingHUFIM(self._dataset.getTransactions(), itemsToKeep, itemsToExplore, 0) + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("High Utility Frequent patterns were generated successfully using HUFIM algorithm") + + def _backTrackingHUFIM(self, transactionsOfP: List[_Transaction], itemsToKeep: List[int], itemsToExplore: List[int], + prefixLength: int) -> None: + """ + A method to mine the HUFIs Recursively + + :param transactionsOfP: the list of transactions containing the current prefix P + :type transactionsOfP: list + :param itemsToKeep: the list of secondary items in the p-projected database + :type itemsToKeep: list + :param itemsToExplore: the list of primary items in the p-projected database + :type itemsToExplore: list + :param prefixLength: current prefixLength + :type prefixLength: int + :return: None + """ + # print("###############") + # print("P is", [self.dataset.intToStr.get(x) for x in self.temp[:prefixLength]]) + # print("items to explore", [self.dataset.intToStr.get(x) for x in [self.newNamesToOldNames[y] for y in itemsToExplore]]) + # print("items to keep", [self.dataset.intToStr.get(x) for x in [self.newNamesToOldNames[y] for y in itemsToKeep]]) + # print("--------------") + self._candidateCount += len(itemsToExplore) + for idx, e in enumerate(itemsToExplore): + # print("exploring item", self.dataset.intToStr.get(self.newNamesToOldNames[e])) + transactionsPe = [] + utilityPe = 0 + supportPe = 0 + previousTransaction = [] + consecutiveMergeCount = 0 + for transaction in transactionsOfP: + items = transaction.getItems() + if e in items: + positionE = items.index(e) + if transaction.getLastPosition() == positionE: + utilityPe += transaction.getUtilities()[positionE] + transaction.prefixUtility + supportPe += transaction.getSupport() + else: + projectedTransaction = transaction.projectTransaction(positionE) + utilityPe += projectedTransaction.prefixUtility + if previousTransaction == []: + previousTransaction = projectedTransaction + elif self._isEqual(projectedTransaction, previousTransaction): + if consecutiveMergeCount == 0: + items = previousTransaction.items[previousTransaction.offset:] + utilities = previousTransaction.utilities[previousTransaction.offset:] + support = previousTransaction.getSupport() + itemsCount = len(items) + positionPrevious = 0 + positionProjection = projectedTransaction.offset + while positionPrevious < itemsCount: + utilities[positionPrevious] += projectedTransaction.utilities[positionProjection] + positionPrevious += 1 + positionProjection += 1 + previousTransaction.prefixUtility += projectedTransaction.prefixUtility + sumUtilities = previousTransaction.prefixUtility + previousTransaction = _Transaction(items, utilities, + previousTransaction.transactionUtility + projectedTransaction.transactionUtility) + previousTransaction.prefixUtility = sumUtilities + previousTransaction.support = support + previousTransaction.support += projectedTransaction.getSupport() + else: + positionPrevious = 0 + positionProjected = projectedTransaction.offset + itemsCount = len(previousTransaction.items) + while positionPrevious < itemsCount: + previousTransaction.utilities[positionPrevious] += projectedTransaction.utilities[ + positionProjected] + positionPrevious += 1 + positionProjected += 1 + previousTransaction.transactionUtility += projectedTransaction.transactionUtility + previousTransaction.prefixUtility += projectedTransaction.prefixUtility + previousTransaction.support += projectedTransaction.getSupport() + consecutiveMergeCount += 1 + else: + transactionsPe.append(previousTransaction) + supportPe += previousTransaction.getSupport() + previousTransaction = projectedTransaction + consecutiveMergeCount = 0 + transaction.offset = positionE + if previousTransaction != []: + transactionsPe.append(previousTransaction) + supportPe += previousTransaction.getSupport() + # print("support is", supportPe) + self._temp[prefixLength] = self._newNamesToOldNames[e] + if (utilityPe >= self._minUtil) and (supportPe >= self._minSup): + self._output(prefixLength, utilityPe, supportPe) + if supportPe >= self._minSup: + self._useUtilityBinArraysToCalculateUpperBounds(transactionsPe, idx, itemsToKeep) + newItemsToKeep = [] + newItemsToExplore = [] + for l in range(idx + 1, len(itemsToKeep)): + itemK = itemsToKeep[l] + if self._utilityBinArraySU[itemK] >= self._minUtil: + newItemsToExplore.append(itemK) + newItemsToKeep.append(itemK) + elif self._utilityBinArrayLU[itemK] >= self._minUtil: + newItemsToKeep.append(itemK) + if len(transactionsPe) != 0: + self._backTrackingHUFIM(transactionsPe, newItemsToKeep, newItemsToExplore, prefixLength + 1) + + def _useUtilityBinArraysToCalculateUpperBounds(self, transactionsPe: List[_Transaction], j: int, + itemsToKeep: List[int]) -> None: + """ + A method to calculate the subtree utility and local utility of all items that can extend itemSet P U {e} + + :Attributes: + + :param transactionsPe: transactions the projected database for P U {e} + :type transactionsPe: list or Dataset + :param j:the position of j in the list of promising items + :type j:int + :param itemsToKeep :the list of promising items + :type itemsToKeep: list or Dataset + :return: None + """ + for i in range(j + 1, len(itemsToKeep)): + item = itemsToKeep[i] + self._utilityBinArrayLU[item] = 0 + self._utilityBinArraySU[item] = 0 + for transaction in transactionsPe: + sumRemainingUtility = 0 + i = len(transaction.getItems()) - 1 + while i >= transaction.offset: + item = transaction.getItems()[i] + if item in itemsToKeep: + sumRemainingUtility += transaction.getUtilities()[i] + self._utilityBinArraySU[item] += sumRemainingUtility + transaction.prefixUtility + self._utilityBinArrayLU[item] += transaction.transactionUtility + transaction.prefixUtility + i -= 1 + + def _output(self, tempPosition: int, utility: int, support: int): + """ + Method to print itemSets + + :Attributes: + + :param tempPosition: position of last item + :type tempPosition : int + :param utility: total utility of itemSet + :type utility: int + :param support: support of an itemSet + :type support: int + """ + self._patternCount += 1 + s1 = str() + for i in range(0, tempPosition + 1): + s1 += self._dataset.intToStr.get((self._temp[i])) + if i != tempPosition: + s1 += "\t" + self._finalPatterns[s1] = [utility, support] + + def _isEqual(self, transaction1: _Transaction, transaction2: _Transaction) -> bool: + """ + A method to Check if two transaction are identical + + :param transaction1: the first transaction + :type transaction1: Trans + :param transaction2: the second transaction + :type transaction2: Trans + :return : whether both are identical or not + :rtype: bool + """ + length1 = len(transaction1.items) - transaction1.offset + length2 = len(transaction2.items) - transaction2.offset + if length1 != length2: + return False + position1 = transaction1.offset + position2 = transaction2.offset + while position1 < len(transaction1.items): + if transaction1.items[position1] != transaction2.items[position2]: + return False + position1 += 1 + position2 += 1 + return True + + def _useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(self, dataset: _Dataset) -> None: + """ + Scan the initial database to calculate the subtree utility of each item using a utility-bin array + + :param dataset: the transaction database + :type dataset: Dataset + :return : None + """ + for transaction in dataset.getTransactions(): + sumSU = 0 + i = len(transaction.getItems()) - 1 + while i >= 0: + item = transaction.getItems()[i] + currentUtility = transaction.getUtilities()[i] + sumSU += currentUtility + if item in self._utilityBinArraySU.keys(): + self._utilityBinArraySU[item] += sumSU + else: + self._utilityBinArraySU[item] = sumSU + i -= 1 + + def _sortDatabase(self, transactions: List[_Transaction]) -> None: + """ + A Method to sort transaction + + :param transactions: transactions of items + :type transactions: list + :return: None + """ + compareItems = _ab._functools.cmp_to_key(self._sortTransaction) + transactions.sort(key=compareItems) + + def _sortTransaction(self, trans1: _Transaction, trans2: _Transaction) -> int: + """ + A Method to sort transaction + + :param trans1: the first transaction + :type trans1: Trans + :param trans2:the second transaction + :type trans2: Trans + :return: sorted transaction + :rtype: int + """ + transItemsX = trans1.getItems() + transItemsY = trans2.getItems() + pos1 = len(transItemsX) - 1 + pos2 = len(transItemsY) - 1 + if len(transItemsX) < len(transItemsY): + while pos1 >= 0: + sub = transItemsY[pos2] - transItemsX[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return -1 + elif len(transItemsX) > len(transItemsY): + while pos2 >= 0: + sub = transItemsY[pos2] - transItemsX[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return 1 + else: + while pos2 >= 0: + sub = transItemsY[pos2] - transItemsX[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return 0 + + def _useUtilityBinArrayToCalculateLocalUtilityFirstTime(self, dataset: _Dataset) -> None: + """ + A method to calculate local utility of single itemSets + + :param dataset: the transaction database + :type dataset: databases + :return: None + """ + for transaction in dataset.getTransactions(): + for idx, item in enumerate(transaction.getItems()): + self._singleItemSetsSupport[item] += 1 + self._singleItemSetsUtility[item] += transaction.getUtilities()[idx] + if item in self._utilityBinArrayLU: + self._utilityBinArrayLU[item] += transaction.transactionUtility + else: + self._utilityBinArrayLU[item] = transaction.transactionUtility + + def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: + """ + Storing final patterns in a dataframe + + :return: returning patterns in a dataframe + :rtype: pd.DataFrame + """ + dataFrame = {} + data = [] + for a, b in self._finalPatterns.items(): + data.append([a.replace('\t', ' '), b[0], b[1]]) + dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Utility', 'Support']) + + return dataFrame + + def getPatterns(self) -> Dict[str, List[Union[int, float]]]: + """ + Function to send the set of patterns after completion of the mining process + + :return: returning patterns + :rtype: dict + """ + return self._finalPatterns + + def save(self, outFile: str) -> None: + """ + Complete set of frequent patterns will be loaded in to an output file + + :param outFile: name of the output file + :type outFile: csv file + :return: None + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + patternsAndSupport = x.strip() + ":" + str(y[0]) + ":" + str(y[1]) + writer.write("%s \n" % patternsAndSupport) + + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + return self._endTime - self._startTime + + def printResults(self) -> None: + """ + This function is used to print the results + """ + print("Total number of High Utility Frequent Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in seconds:", self.getRuntime()) + + +if __name__ == "__main__": + print("Number of arguments:", len(_ab._sys.argv)) + print("Arguments:", _ab._sys.argv) + + if len(_ab._sys.argv) == 6: + iFile = _ab._sys.argv[1] + minUtil = _ab._sys.argv[2] + minSup = _ab._sys.argv[3] + oFile = _ab._sys.argv[4] + sep = _ab._sys.argv[5] + if sep == "\\t": + sep = "\t" + print("Input File:", iFile) + print("Minimum Utility:", minUtil) + print("Minimum Support:", minSup) + print("Output File:", oFile) + print("Separator:", sep) + _ap = HUFIM(iFile=iFile, minUtil=minUtil, minSup=minSup, sep=sep) + _ap.mine() + _ap.save(oFile) + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + elif len(_ab._sys.argv) == 5: + iFile = _ab._sys.argv[1] + minUtil = _ab._sys.argv[2] + minSup = _ab._sys.argv[3] + oFile = _ab._sys.argv[4] + + print("Input File:", iFile) + print("Minimum Utility:", minUtil) + print("Minimum Support:", minSup) + print("Output File:", oFile) + + _ap = HUFIM(iFile=iFile, minUtil=minUtil, minSup=minSup) + _ap.mine() + _ap.save(oFile) + + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + + + else: + raise WrongNumberOfArguments( + "Please provide four arguments: iFile, minUtil, minSup and oFile \n""or Please provide four arguments: iFile, minSup, oFile and sep") diff --git a/tests/highUtilityFrequentPattern/basic/test_HUFIM_terminal.ipynb b/tests/highUtilityFrequentPattern/basic/test_HUFIM_terminal.ipynb new file mode 100644 index 00000000..c9a38166 --- /dev/null +++ b/tests/highUtilityFrequentPattern/basic/test_HUFIM_terminal.ipynb @@ -0,0 +1,255 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Step1: Install PAMI library and upload your respective dataset" + ], + "metadata": { + "id": "WV5Dalojg4yq" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -U pami" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A_zajHFibvtZ", + "outputId": "64f04d82-ba22-40d6-b382-547d830199dc" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting pami\n", + " Downloading pami-2024.7.2-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from pami) (5.9.5)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from pami) (2.0.3)\n", + "Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (from pami) (5.15.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from pami) (3.7.1)\n", + "Collecting resource (from pami)\n", + " Downloading Resource-0.2.1-py2.py3-none-any.whl (25 kB)\n", + "Collecting validators (from pami)\n", + " Downloading validators-0.32.0-py3-none-any.whl (42 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.7/42.7 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from pami) (2.0.7)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from pami) (9.4.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pami) (1.25.2)\n", + "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from pami) (5.0.2)\n", + "Collecting sphinx-rtd-theme (from pami)\n", + " Downloading sphinx_rtd_theme-2.0.0-py2.py3-none-any.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting discord.py (from pami)\n", + " Downloading discord.py-2.4.0-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from pami) (3.3)\n", + "Collecting deprecated (from pami)\n", + " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated->pami) (1.14.1)\n", + "Requirement already satisfied: aiohttp<4,>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from discord.py->pami) (3.9.5)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (4.53.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (24.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pami) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pami) (2024.1)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly->pami) (8.4.2)\n", + "Collecting JsonForm>=0.0.2 (from resource->pami)\n", + " Downloading JsonForm-0.0.2.tar.gz (2.4 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting JsonSir>=0.0.2 (from resource->pami)\n", + " Downloading JsonSir-0.0.2.tar.gz (2.2 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting python-easyconfig>=0.1.0 (from resource->pami)\n", + " Downloading Python_EasyConfig-0.1.7-py2.py3-none-any.whl (5.4 kB)\n", + "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.8)\n", + "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.6)\n", + "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.1)\n", + "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.0.5)\n", + "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.1.10)\n", + "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.7)\n", + "Requirement already satisfied: Jinja2>=2.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (3.1.4)\n", + "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.16.1)\n", + "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (0.18.1)\n", + "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.2.0)\n", + "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.15.0)\n", + "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (0.7.16)\n", + "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.4.1)\n", + "Requirement already satisfied: requests>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.31.0)\n", + "Collecting sphinxcontrib-jquery<5,>=4 (from sphinx-rtd-theme->pami)\n", + " Downloading sphinxcontrib_jquery-4.1-py2.py3-none-any.whl (121 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.1/121.1 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (4.0.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2>=2.3->sphinx->pami) (2.1.5)\n", + "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from JsonForm>=0.0.2->resource->pami) (4.19.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->pami) (1.16.0)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from python-easyconfig>=0.1.0->resource->pami) (6.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (2024.6.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.18.1)\n", + "Building wheels for collected packages: JsonForm, JsonSir\n", + " Building wheel for JsonForm (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for JsonForm: filename=JsonForm-0.0.2-py3-none-any.whl size=3314 sha256=855f9176b21b508aff442bfcf5d2c600c6dd225662a9443d4398230c7bbc0381\n", + " Stored in directory: /root/.cache/pip/wheels/b6/e5/87/11026246d3bd4ad67c0615682d2d6748bbd9a40ac0490882bd\n", + " Building wheel for JsonSir (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for JsonSir: filename=JsonSir-0.0.2-py3-none-any.whl size=4752 sha256=1cd129231b056fba3de282f393e6d65d959513cc261ad9be0bf02161533643e3\n", + " Stored in directory: /root/.cache/pip/wheels/1d/4c/d3/4d9757425983b43eb709be1043d82cd03fb863ce5f56f117e6\n", + "Successfully built JsonForm JsonSir\n", + "Installing collected packages: JsonSir, validators, python-easyconfig, deprecated, sphinxcontrib-jquery, discord.py, sphinx-rtd-theme, JsonForm, resource, pami\n", + "Successfully installed JsonForm-0.0.2 JsonSir-0.0.2 deprecated-1.2.14 discord.py-2.4.0 pami-2024.7.2 python-easyconfig-0.1.7 resource-0.2.1 sphinx-rtd-theme-2.0.0 sphinxcontrib-jquery-4.1 validators-0.32.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://u-aizu.ac.jp/~udayrage/datasets/utilityDatabases/Utility_T10I4D100K.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AoOlU8vrb8vp", + "outputId": "4a11b701-efa0-46e1-ba8f-7a34455066ce" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2024-07-10 15:06:16-- https://u-aizu.ac.jp/~udayrage/datasets/utilityDatabases/Utility_T10I4D100K.csv\n", + "Resolving u-aizu.ac.jp (u-aizu.ac.jp)... 150.95.161.176, 150.31.244.160\n", + "Connecting to u-aizu.ac.jp (u-aizu.ac.jp)|150.95.161.176|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 7356594 (7.0M) [text/csv]\n", + "Saving to: ‘Utility_T10I4D100K.csv’\n", + "\n", + "Utility_T10I4D100K. 100%[===================>] 7.02M 1.32MB/s in 5.6s \n", + "\n", + "2024-07-10 15:06:23 (1.25 MB/s) - ‘Utility_T10I4D100K.csv’ saved [7356594/7356594]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Step2: Download and Upload abstarct.py into your google collab notebook" + ], + "metadata": { + "id": "YoqFr_YihKTx" + } + }, + { + "cell_type": "code", + "source": [ + "!python /content/test_HUFIM.py /content/Utility_T10I4D100K.csv 30000 1000 output.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c6XOEDzqcQ2o", + "outputId": "e337ec59-f16c-4688-ac29-dfce09c86cd0" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of arguments: 5\n", + "Arguments: ['/content/test_HUFIM.py', '/content/Utility_T10I4D100K.csv', '30000', '1000', 'output.csv']\n", + "Input File: /content/Utility_T10I4D100K.csv\n", + "Minimum Utility: 30000\n", + "Minimum Support: 1000\n", + "Output File: output.csv\n", + "High Utility Frequent patterns were generated successfully using HUFIM algorithm\n", + "Total number of Frequent Patterns: 385\n", + "Total Memory in USS: 168005632\n", + "Total Memory in RSS: 186916864\n", + "Total ExecutionTime in ms: 66.807133436203\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python /content/test_HUFIM.py /content/Utility_T10I4D100K.csv 30000 1000 output.csv \"\\t\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZixyAb_ScrcK", + "outputId": "709cf75b-b4da-423e-a31a-c91420263ed4" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of arguments: 6\n", + "Arguments: ['/content/test_HUFIM.py', '/content/Utility_T10I4D100K.csv', '30000', '1000', 'output.csv', '\\\\t']\n", + "Input File: /content/Utility_T10I4D100K.csv\n", + "Minimum Utility: 30000\n", + "Minimum Support: 1000\n", + "Output File: output.csv\n", + "Separator: \t\n", + "High Utility Frequent patterns were generated successfully using HUFIM algorithm\n", + "Total number of Frequent Patterns: 385\n", + "Total Memory in USS: 167170048\n", + "Total Memory in RSS: 185995264\n", + "Total ExecutionTime in ms: 66.29919171333313\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "2PUkzuDIubjP" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM.py b/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM.py new file mode 100644 index 00000000..9d71e037 --- /dev/null +++ b/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM.py @@ -0,0 +1,1053 @@ +# Spatial High Utility Frequent ItemSet Mining (SHUFIM) aims to discover all itemSets in a spatioTemporal database +# that satisfy the user-specified minimum utility, minimum support and maximum distance constraints +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.highUtilityGeoreferencedFrequentPattern.basic import SHUFIM as alg +# +# obj=alg.SHUFIM("input.txt","Neighbours.txt",35,20) +# +# obj.mine() +# +# patterns = obj.getPatterns() +# +# print("Total number of Spatial high utility frequent Patterns:", len(patterns)) +# +# obj.save("output") +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.highUtilityGeoreferencedFrequentPattern.basic import abstract as _ab +from functools import cmp_to_key as _comToKey +from deprecated import deprecated + + +class _Transaction: + """ + A class to store Transaction of a database + + :Attributes: + + items: list + A list of items in transaction + utilities: list + A list of utilites of items in transaction + transactionUtility: int + represent total sum of all utilities in the database + pmus: list + represent the pmu (probable maximum utility) of each element in the transaction + prefixutility: + prefix Utility values of item + offset: + an offset pointer, used by projected transactions + support: + maintains the support of the transaction + :Methods: + + projectedTransaction(offsetE): + A method to create new Transaction from existing till offsetE + getItems(): + return items in transaction + getUtilities(): + return utilities in transaction + getPmus(): + return pmus in transaction + getLastPosition(): + return last position in a transaction + removeUnpromisingItems(): + A method to remove items with low Utility than minUtil + insertionSort(): + A method to sort all items in the transaction + getSupport(): + returns the support of the transaction + """ + offset = 0 + prefixUtility = 0 + support = 1 + + def __init__(self, items, utilities, transactionUtility, pmus=None): + self.items = items + self.utilities = utilities + self.transactionUtility = transactionUtility + if pmus is not None: + self.pmus = pmus + self.support = 1 + + def projectTransaction(self, offsetE): + """ + A method to create new Transaction from existing till offsetE + :param offsetE: an offset over the original transaction for projecting the transaction + :type offsetE: int + """ + newTransaction = _Transaction(self.items, self.utilities, self.transactionUtility) + utilityE = self.utilities[offsetE] + newTransaction.prefixUtility = self.prefixUtility + utilityE + newTransaction.transactionUtility = self.transactionUtility - utilityE + newTransaction.support = self.support + for i in range(self.offset, offsetE): + newTransaction.transactionUtility -= self.utilities[i] + newTransaction.offset = offsetE + 1 + return newTransaction + + def getItems(self): + """ + A method to return items in transaction + """ + return self.items + + def getPmus(self): + """ + A method to return pmus in transaction + """ + return self.pmus + + def getUtilities(self): + """ + A method to return utilities in transaction + """ + return self.utilities + + # get the last position in this transaction + def getLastPosition(self): + """ + A method to return last position in a transaction + """ + return len(self.items) - 1 + + def getSupport(self): + """ + A method to return support of a transaction (number of transactions in the original database having the items present in this transaction) + """ + return self.support + + def removeUnpromisingItems(self, oldNamesToNewNames): + """ + A method to remove items with low Utility than minUtil + :param oldNamesToNewNames: A map represent old names to new names + :type oldNamesToNewNames: map + """ + tempItems = [] + tempUtilities = [] + for idx, item in enumerate(self.items): + if item in oldNamesToNewNames: + tempItems.append(oldNamesToNewNames[item]) + tempUtilities.append(self.utilities[idx]) + else: + self.transactionUtility -= self.utilities[idx] + self.items = tempItems + self.utilities = tempUtilities + self.insertionSort() + + def insertionSort(self): + """ + A method to sort items in order + """ + for i in range(1, len(self.items)): + key = self.items[i] + utilityJ = self.utilities[i] + j = i - 1 + while j >= 0 and key < self.items[j]: + self.items[j + 1] = self.items[j] + self.utilities[j + 1] = self.utilities[j] + j -= 1 + self.items[j + 1] = key + self.utilities[j + 1] = utilityJ + + +class _Dataset: + """ + A class represent the list of transactions in this dataset + + :Attributes: + + transactions : + the list of transactions in this dataset + maxItem: + the largest item name + + :methods: + + createTransaction(line): + Create a transaction object from a line from the input file + getMaxItem(): + return Maximum Item + getTransactions(): + return transactions in database + + """ + transactions = [] + maxItem = 0 + + def __init__(self, datasetPath, sep): + self.strToInt = {} + self.intToStr = {} + self.cnt = 1 + self.sep = sep + self.transactions = [] + self.createItemSets(datasetPath) + + def createItemSets(self, datasetPath): + """ + Storing the complete transactions of the database/input file in a database variable + + :param datasetPath: Path to the input file + + :type datasetPath: str + """ + pmuString = None + if isinstance(datasetPath, _ab._pd.DataFrame): + utilities, data, utilitySum, pmuString = [], [], [], [] + if datasetPath.empty: + print("its empty..") + i = datasetPath.columns.values.tolist() + if 'Transactions' in i: + data = datasetPath['Transactions'].tolist() + if 'Utilities' in i: + utilities = datasetPath['Utilities'].tolist() + if 'UtilitySum' in i: + utilitySum = datasetPath['UtilitySum'].tolist() + if 'pmuString' in i: + utilitySum = datasetPath['pmuString'].tolist() + for k in range(len(data)): + self.transactions.append(self.createTransaction(data[k], utilities[k], utilitySum[k], pmuString[k])) + if isinstance(datasetPath, str): + if _ab._validators.url(datasetPath): + data = _ab._urlopen(datasetPath) + for line in data: + line = line.decode("utf-8") + trans_list = line.strip().split(':') + transactionUtility = int(trans_list[1]) + itemsString = trans_list[0].strip().split(self.sep) + itemsString = [x for x in itemsString if x] + utilityString = trans_list[2].strip().split(self.sep) + utilityString = [x for x in utilityString if x] + if len(trans_list) == 4: + pmuString = trans_list[3].strip().split(self.sep) + pmuString = [x for x in pmuString if x] + self.transactions.append( + self.createTransaction(itemsString, utilityString, transactionUtility, pmuString)) + else: + try: + with open(datasetPath, 'r', encoding='utf-8') as f: + for line in f: + trans_list = line.strip().split(':') + transactionUtility = int(trans_list[1]) + itemsString = trans_list[0].strip().split(self.sep) + itemsString = [x for x in itemsString if x] + utilityString = trans_list[2].strip().split(self.sep) + utilityString = [x for x in utilityString if x] + if len(trans_list) == 4: + pmuString = trans_list[3].strip().split(self.sep) + pmuString = [x for x in pmuString if x] + self.transactions.append( + self.createTransaction(itemsString, utilityString, transactionUtility, pmuString)) + except IOError: + print("File Not Found") + quit() + + def createTransaction(self, items, utilities, utilitySum, pmustring): + """ + A method to create Transaction from dataset given + :param items: represent a utility items in a transaction + :type items: list + :param utilities: represent utility of an item in transaction + :type utilities: list + :param utilitySum: represent utility sum of transaction + :type utilitySum: int + :param pmustring: represent a pmustring in a given dataset + :type pmustring: str + """ + transactionUtility = utilitySum + itemsString = items + utilityString = utilities + pmuString = pmustring + items = [] + utilities = [] + pmus = [] + for idx, item in enumerate(itemsString): + if (self.strToInt).get(item) is None: + self.strToInt[item] = self.cnt + self.intToStr[self.cnt] = item + self.cnt += 1 + itemInt = self.strToInt.get(item) + if itemInt > self.maxItem: + self.maxItem = itemInt + items.append(itemInt) + utilities.append(int(utilityString[idx])) + if pmuString != None: + pmus.append(int(pmuString[idx])) + if pmuString == None: + pmus = None + return _Transaction(items, utilities, transactionUtility, pmus) + + def getMaxItem(self): + """ + A method to return name of the largest item + """ + return self.maxItem + + def getTransactions(self): + """ + A method to return transactions from database + """ + return self.transactions + + +class WrongNumberOfArguments(Exception): + pass + + +class SHUFIM(_ab._utilityPatterns): + """ + :Description: Spatial High Utility Frequent ItemSet Mining (SHUFIM) aims to discover all itemSets in a spatioTemporal database + that satisfy the user-specified minimum utility, minimum support and maximum distance constraints + + :Reference: 10.1007/978-3-030-37188-3_17 + + :param iFile: str : + Name of the Input file to mine complete set of Geo-referenced frequent sequence patterns + :param oFile: str : + Name of the output file to store complete set of Geo-referenced frequent sequence patterns + :param minSup: int or float or str : + The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float. + :param minUtil: int : + The user given minUtil value. + :param candidateCount: int + Number of candidates + :param maxMemory: int + Maximum memory used by this program for running + :param nFile: str : + Name of the input file to mine complete set of Geo-referenced frequent sequence patterns + :param sep: str : + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + + + :Attributes: + + iFile : file + Name of the input file to mine complete set of frequent patterns + nFile : file + Name of the Neighbours file that contain neighbours of items + oFile : file + Name of the output file to store complete set of frequent patterns + memoryRSS : float + To store the total amount of RSS memory consumed by the program + startTime:float + To record the start time of the mining process + endTime:float + To record the completion time of the mining process + minUtil : int + The user given minUtil + minSup : float + The user given minSup value + highUtilityFrequentSpatialItemSets: map + set of high utility itemSets + candidateCount: int + Number of candidates + utilityBinArrayLU: list + A map to hold the pmu values of the items in database + utilityBinArraySU: list + A map to hold the subtree utility values of the items is database + oldNamesToNewNames: list + A map to hold the subtree utility values of the items is database + newNamesToOldNames: list + A map to store the old name corresponding to new name + Neighbours : map + A dictionary to store the neighbours of a item + maxMemory: float + Maximum memory used by this program for running + patternCount: int + Number of SHUFI's (Spatial High Utility Frequent Itemsets) + itemsToKeep: list + keep only the promising items ie items whose supersets can be required patterns + itemsToExplore: list + keep items that subtreeUtility grater than minUtil + + :Methods : + + mine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + save(oFile) + Complete set of frequent patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of frequent patterns will be loaded in to a dataframe + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function + calculateNeighbourIntersection(self, prefixLength) + A method to return common Neighbours of items + backtrackingEFIM(transactionsOfP, itemsToKeep, itemsToExplore, prefixLength) + A method to mine the SHUIs Recursively + useUtilityBinArraysToCalculateUpperBounds(transactionsPe, j, itemsToKeep, neighbourhoodList) + A method to calculate the sub-tree utility and local utility of all items that can extend itemSet P and e + output(tempPosition, utility) + A method ave a high-utility itemSet to file or memory depending on what the user chose + isEqual(transaction1, transaction2) + A method to Check if two transaction are identical + intersection(lst1, lst2) + A method that return the intersection of 2 list + useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(dataset) + Scan the initial database to calculate the subtree utility of each items using a utility-bin array + sortDatabase(self, transactions) + A Method to sort transaction in the order of PMU + sortTransaction(self, trans1, trans2) + A Method to sort transaction in the order of PMU + useUtilityBinArrayToCalculateLocalUtilityFirstTime(self, dataset) + A method to scan the database using utility bin array to calculate the pmus + + **Executing the code on terminal :** + ----------------------------------------- + + .. code-block:: console + + Format: + + (.venv) $ python3 SHUFIM.py + + Example Usage: + + (.venv) $ python3 SHUFIM.py sampleTDB.txt output.txt sampleN.txt 35 20 + + .. note:: minSup will be considered in percentage of database transactions + + + **Sample run of importing the code:** + ----------------------------------------- + .. code-block:: python + + from PAMI.highUtilityGeoreferencedFrequentPattern.basic import SHUFIM as alg + + obj=alg.SHUFIM("input.txt","Neighbours.txt",35,20) + + obj.mine() + + patterns = obj.getPatterns() + + print("Total number of Spatial high utility frequent Patterns:", len(patterns)) + + obj.save("output") + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + **Credits:** + --------------------- + + The complete program was written by Pradeep Pallikila under the supervision of Professor Rage Uday Kiran. + + """ + _candidateCount = 0 + _utilityBinArrayLU = {} + _utilityBinArraySU = {} + _oldNamesToNewNames = {} + _newNamesToOldNames = {} + _singleItemSetsSupport = {} + _singleItemSetsUtility = {} + _strToint = {} + _intTostr = {} + _Neighbours = {} + _temp = [0] * 5000 + _maxMemory = 0 + _startTime = float() + _endTime = float() + _minSup = str() + _maxPer = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _nFile = " " + _sep = "\t" + _minUtil = 0 + _memoryUSS = float() + _memoryRSS = float() + + def __init__(self, iFile, nFile, minUtil, minSup, sep="\t"): + super().__init__(iFile, nFile, minUtil, minSup, sep) + + def _convert(self, value): + """ + To convert the type of user specified minSup value + + :param value: user specified minSup value + :type value: int o float or str + :return: converted type + :rtype: float + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._dataset.getTransactions()) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._dataset.getTransactions()) * value) + else: + value = int(value) + return value + + @deprecated( + "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self): + """ + High Utility Frequent Pattern mining start here + """ + self.mine() + + def mine(self): + """ + High Utility Frequent Pattern mining start here + """ + self._startTime = _ab._time.time() + self._patternCount = 0 + self._finalPatterns = {} + self._dataset = _Dataset(self._iFile, self._sep) + self._singleItemSetsSupport = _ab._defaultdict(int) + self._singleItemSetsUtility = _ab._defaultdict(int) + self._minUtil = int(self._minUtil) + self._minSup = self._convert(self._minSup) + with open(self._nFile, 'r') as o: + lines = o.readlines() + for line in lines: + line = line.split("\n")[0] + line_split = line.split(self._sep) + item = self._dataset.strToInt.get(line_split[0]) + lst = [] + for i in range(1, len(line_split)): + lst.append(self._dataset.strToInt.get(line_split[i])) + self._Neighbours[item] = lst + o.close() + InitialMemory = _ab._psutil.virtual_memory()[3] + self._useUtilityBinArrayToCalculateLocalUtilityFirstTime(self._dataset) + _itemsToKeep = [] + for key in self._utilityBinArrayLU.keys(): + if self._utilityBinArrayLU[key] >= self._minUtil and self._singleItemSetsSupport[key] >= self._minSup: + _itemsToKeep.append(key) + # sorting items in decreasing order of their utilities + _itemsToKeep = sorted(_itemsToKeep, key=lambda x: self._singleItemSetsUtility[x], reverse=True) + _currentName = 1 + for idx, item in enumerate(_itemsToKeep): + self._oldNamesToNewNames[item] = _currentName + self._newNamesToOldNames[_currentName] = item + _itemsToKeep[idx] = _currentName + _currentName += 1 + for transaction in self._dataset.getTransactions(): + transaction.removeUnpromisingItems(self._oldNamesToNewNames) + self._sortDatabase(self._dataset.getTransactions()) + _emptyTransactionCount = 0 + for transaction in self._dataset.getTransactions(): + if len(transaction.getItems()) == 0: + _emptyTransactionCount += 1 + self._dataset.transactions = self._dataset.transactions[_emptyTransactionCount:] + # calculating neighborhood suffix utility values + _secondary = [] + for idx, item in enumerate(_itemsToKeep): + _cumulativeUtility = self._singleItemSetsUtility[self._newNamesToOldNames[item]] + if self._newNamesToOldNames[item] in self._Neighbours: + neighbors = [self._oldNamesToNewNames[y] for y in self._Neighbours[self._newNamesToOldNames[item]] if + y in self._oldNamesToNewNames] + for i in range(idx + 1, len(_itemsToKeep)): + _nextItem = _itemsToKeep[i] + if _nextItem in neighbors: + _cumulativeUtility += self._singleItemSetsUtility[self._newNamesToOldNames[_nextItem]] + if _cumulativeUtility >= self._minUtil: + _secondary.append(item) + self._useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(self._dataset) + _itemsToExplore = [] + for item in _secondary: + if self._utilityBinArraySU[item] >= self._minUtil: + _itemsToExplore.append(item) + _commonitems = [] + for i in range(self._dataset.maxItem): + _commonitems.append(i) + self._backtrackingEFIM(self._dataset.getTransactions(), _itemsToKeep, _itemsToExplore, 0) + _finalMemory = _ab._psutil.virtual_memory()[3] + memory = (_finalMemory - InitialMemory) / 10000 + if memory > self._maxMemory: + self._maxMemory = memory + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print('Spatial High Utility Frequent Itemsets generated successfully using SHUFIM algorithm') + + def _backtrackingEFIM(self, transactionsOfP, itemsToKeep, itemsToExplore, prefixLength): + """ + A method to mine the SHUFIs Recursively + :param transactionsOfP: the list of transactions containing the current prefix P + :type transactionsOfP: list + :param itemsToKeep: the list of secondary items in the p-projected database + :type itemsToKeep: list + :param itemsToExplore: the list of primary items in the p-projected database + :type itemsToExplore: list + :param prefixLength: current prefixLength + :type prefixLength: int + """ + self._candidateCount += len(itemsToExplore) + for idx, e in enumerate(itemsToExplore): + initialMemory = _ab._psutil.virtual_memory()[3] + transactionsPe = [] + utilityPe = 0 + supportPe = 0 + previousTransaction = [] + consecutiveMergeCount = 0 + for transaction in transactionsOfP: + items = transaction.getItems() + if e in items: + positionE = items.index(e) + if transaction.getLastPosition() == positionE: + utilityPe += transaction.getUtilities()[positionE] + transaction.prefixUtility + supportPe += transaction.getSupport() + else: + projectedTransaction = transaction.projectTransaction(positionE) + utilityPe += projectedTransaction.prefixUtility + if previousTransaction == []: + previousTransaction = projectedTransaction + elif self._isEqual(projectedTransaction, previousTransaction): + if consecutiveMergeCount == 0: + items = previousTransaction.items[previousTransaction.offset:] + utilities = previousTransaction.utilities[previousTransaction.offset:] + support = previousTransaction.getSupport() + itemsCount = len(items) + positionPrevious = 0 + positionProjection = projectedTransaction.offset + while positionPrevious < itemsCount: + utilities[positionPrevious] += projectedTransaction.utilities[positionProjection] + positionPrevious += 1 + positionProjection += 1 + previousTransaction.prefixUtility += projectedTransaction.prefixUtility + sumUtilities = previousTransaction.prefixUtility + previousTransaction = _Transaction(items, utilities, + previousTransaction.transactionUtility + projectedTransaction.transactionUtility) + previousTransaction.prefixUtility = sumUtilities + previousTransaction.support = support + previousTransaction.support += projectedTransaction.getSupport() + else: + positionPrevious = 0 + positionProjected = projectedTransaction.offset + itemsCount = len(previousTransaction.items) + while positionPrevious < itemsCount: + previousTransaction.utilities[positionPrevious] += projectedTransaction.utilities[ + positionProjected] + positionPrevious += 1 + positionProjected += 1 + previousTransaction.transactionUtility += projectedTransaction.transactionUtility + previousTransaction.prefixUtility += projectedTransaction.prefixUtility + previousTransaction.support += projectedTransaction.getSupport() + consecutiveMergeCount += 1 + else: + transactionsPe.append(previousTransaction) + supportPe += previousTransaction.getSupport() + previousTransaction = projectedTransaction + consecutiveMergeCount = 0 + transaction.offset = positionE + if previousTransaction != []: + transactionsPe.append(previousTransaction) + supportPe += previousTransaction.getSupport() + self._temp[prefixLength] = self._newNamesToOldNames[e] + if utilityPe >= self._minUtil and supportPe >= self._minSup: + self._output(prefixLength, utilityPe, supportPe) + if supportPe >= self._minSup: + neighbourhoodList = self._calculateNeighbourIntersection(prefixLength) + # print(neighbourhoodList) + self._useUtilityBinArraysToCalculateUpperBounds(transactionsPe, idx, itemsToKeep, neighbourhoodList) + newItemsToKeep = [] + newItemsToExplore = [] + for l in range(idx + 1, len(itemsToKeep)): + itemK = itemsToKeep[l] + if self._utilityBinArraySU[itemK] >= self._minUtil: + if itemK in neighbourhoodList: + newItemsToExplore.append(itemK) + newItemsToKeep.append(itemK) + elif self._utilityBinArrayLU[itemK] >= self._minUtil: + if itemK in neighbourhoodList: + newItemsToKeep.append(itemK) + self._backtrackingEFIM(transactionsPe, newItemsToKeep, newItemsToExplore, prefixLength + 1) + finalMemory = _ab._psutil.virtual_memory()[3] + memory = (finalMemory - initialMemory) / 10000 + if self._maxMemory < memory: + self._maxMemory = memory + + def _useUtilityBinArraysToCalculateUpperBounds(self, transactionsPe, j, itemsToKeep, neighbourhoodList): + """ + A method to calculate the subtree utility and local utility of all items that can extend itemSet P U {e} + + :Attributes: + + :param transactionsPe: transactions the projected database for P U {e} + :type transactionsPe: list + :param j:the position of j in the list of promising items + :type j:int + :param itemsToKeep :the list of promising items + :type itemsToKeep: list + :param neighbourhoodList : the list of promising items that can extend itemSet P U {e} + :type neighbourhoodList: list + + """ + for i in range(j + 1, len(itemsToKeep)): + item = itemsToKeep[i] + self._utilityBinArrayLU[item] = 0 + self._utilityBinArraySU[item] = 0 + for transaction in transactionsPe: + length = len(transaction.getItems()) + i = length - 1 + while i >= transaction.offset: + item = transaction.getItems()[i] + if item in itemsToKeep: + remainingUtility = 0 + if self._newNamesToOldNames[item] in self._Neighbours: + itemNeighbours = self._Neighbours[self._newNamesToOldNames[item]] + for k in range(i, length): + transaction_item = transaction.getItems()[k] + if self._newNamesToOldNames[ + transaction_item] in itemNeighbours and transaction_item in neighbourhoodList: + remainingUtility += transaction.getUtilities()[k] + + remainingUtility += transaction.getUtilities()[i] + self._utilityBinArraySU[item] += remainingUtility + transaction.prefixUtility + self._utilityBinArrayLU[item] += transaction.transactionUtility + transaction.prefixUtility + i -= 1 + + def _calculateNeighbourIntersection(self, prefixLength): + """ + A method to find common Neighbours + :param prefixLength: the prefix itemSet + :type prefixLength:int + """ + intersectionList = self._Neighbours.get(self._temp[0]) + for i in range(1, prefixLength + 1): + intersectionList = self._intersection(self._Neighbours[self._temp[i]], intersectionList) + finalIntersectionList = [] + if intersectionList is None: + return finalIntersectionList + for item in intersectionList: + if item in self._oldNamesToNewNames: + finalIntersectionList.append(self._oldNamesToNewNames[item]) + return finalIntersectionList + + def _output(self, tempPosition, utility, support): + """ + A method save all high-utility itemSet to file or memory depending on what the user chose + :param tempPosition: position of last item + :type tempPosition : int + :param utility: total utility of itemSet + :type utility: int + :param support: support of an itemSet + :type support: int + """ + self._patternCount += 1 + s1 = str() + for i in range(0, tempPosition + 1): + s1 += self._dataset.intToStr.get((self._temp[i])) + if i != tempPosition: + s1 += "\t" + self._finalPatterns[s1] = [utility, support] + + def _isEqual(self, transaction1, transaction2): + """ + A method to Check if two transaction are identical + :param transaction1: the first transaction + :type transaction1: Trans + :param transaction2: the second transaction + :type transaction2: Trans + :return : whether both are identical or not + :rtype: bool + """ + + length1 = len(transaction1.items) - transaction1.offset + length2 = len(transaction2.items) - transaction2.offset + if length1 != length2: + return False + position1 = transaction1.offset + position2 = transaction2.offset + while position1 < len(transaction1.items): + if transaction1.items[position1] != transaction2.items[position2]: + return False + position1 += 1 + position2 += 1 + return True + + def _intersection(self, lst1, lst2): + """ + A method that return the intersection of 2 list + + :param lst1: items neighbour to item1 + :type lst1: list + :param lst2: items neighbour to item2 + :type lst2: list + :return :intersection of two lists + :rtype : list + """ + temp = set(lst2) + lst3 = [value for value in lst1 if value in temp] + return lst3 + + def _useUtilityBinArrayToCalculateSubtreeUtilityFirstTime(self, dataset): + """ + Scan the initial database to calculate the subtree utility of each item using a utility-bin array + :param dataset: the transaction database + :type dataset: Dataset + """ + for transaction in dataset.getTransactions(): + items = transaction.getItems() + utilities = transaction.getUtilities() + for idx, item in enumerate(items): + if item not in self._utilityBinArraySU: + self._utilityBinArraySU[item] = 0 + if self._newNamesToOldNames[item] not in self._Neighbours: + self._utilityBinArraySU[item] += utilities[idx] + continue + i = idx + 1 + sumSu = utilities[idx] + while i < len(items): + if self._newNamesToOldNames[items[i]] in self._Neighbours[self._newNamesToOldNames[item]]: + sumSu += utilities[i] + i += 1 + self._utilityBinArraySU[item] += sumSu + + def _sortDatabase(self, transactions): + """ + A Method to sort transaction in the order of PMU + :param transactions: transaction of items + :type transactions: Transaction + :return: sorted transaction + :rtype: Trans + """ + cmp_items = _comToKey(self._sortTransaction) + transactions.sort(key=cmp_items) + + def _sortTransaction(self, trans1, trans2): + """ + A Method to sort transaction in the order of PMU + :param trans1: the first transaction + :type trans1: Trans + :param trans2:the second transaction + :type trans2: Trans + :return: sorted transaction + :rtype: Trans + """ + trans1_items = trans1.getItems() + trans2_items = trans2.getItems() + pos1 = len(trans1_items) - 1 + pos2 = len(trans2_items) - 1 + if len(trans1_items) < len(trans2_items): + while pos1 >= 0: + sub = trans2_items[pos2] - trans1_items[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return -1 + elif len(trans1_items) > len(trans2_items): + while pos2 >= 0: + sub = trans2_items[pos2] - trans1_items[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return 1 + else: + while pos2 >= 0: + sub = trans2_items[pos2] - trans1_items[pos1] + if sub != 0: + return sub + pos1 -= 1 + pos2 -= 1 + return 0 + + def _useUtilityBinArrayToCalculateLocalUtilityFirstTime(self, dataset): + """ + A method to scan the database using utility bin array to calculate the pmus + :param dataset: the transaction database + :type dataset: dataset + """ + for transaction in dataset.getTransactions(): + for idx, item in enumerate(transaction.getItems()): + self._singleItemSetsSupport[item] += 1 + self._singleItemSetsUtility[item] += transaction.getUtilities()[idx] + pmu = transaction.getUtilities()[idx] + if item in self._Neighbours: + neighbors = self._Neighbours[item] + for idx, item in enumerate(transaction.getItems()): + if item in neighbors: + pmu += transaction.getUtilities()[idx] + if item in self._utilityBinArrayLU: + # self._utilityBinArrayLU[item] += transaction.getPmus()[idx] + self._utilityBinArrayLU[item] += pmu + else: + # self._utilityBinArrayLU[item] = transaction.getPmus()[idx] + self._utilityBinArrayLU[item] = pmu + + def getPatternsAsDataFrame(self): + """ + Storing final patterns in a dataframe + :return: returning patterns in a dataframe + :rtype: pd.DataFrame + """ + dataFrame = {} + data = [] + for a, b in self._finalPatterns.items(): + data.append([a.replace('\t', ' '), b[0], b[1]]) + dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Utility', 'Support']) + + return dataFrame + + def getPatterns(self): + """ + Function to send the set of patterns after completion of the mining process + + :return: returning patterns + :rtype: dict + """ + return self._finalPatterns + + def save(self, outFile): + """ + Complete set of patterns will be loaded in to an output file + + :param outFile: name of the output file + :type outFile: csv file + """ + self.oFile = outFile + writer = open(self.oFile, 'w+') + for x, y in self._finalPatterns.items(): + patternsAndSupport = x.strip() + ":" + str(y[0]) + ":" + str(y[1]) + writer.write("%s \n" % patternsAndSupport) + + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + return self._memoryRSS + + def getRuntime(self): + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + return self._endTime - self._startTime + + def printResults(self): + """ + This function is used to print the results + """ + print("Total number of Spatial High Utility Frequent Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in seconds:", self.getRuntime()) + + +if __name__ == "__main__": + print("Number of arguments:", len(_ab._sys.argv)) + print("Arguments:", _ab._sys.argv) + + if len(_ab._sys.argv) == 7: + iFile = _ab._sys.argv[1] + nFile = _ab._sys.argv[2] + minUtil = _ab._sys.argv[3] + minSup = _ab._sys.argv[4] + oFile = _ab._sys.argv[5] + sep = _ab._sys.argv[6] + if sep == "\\t": + sep = "\t" + print("Input File:", iFile) + print("Neighborhood File:", nFile) + print("Minimum Utility:", minUtil) + print("Minimum Support:", minSup) + print("Output File:", oFile) + print("Separator:", sep) + _ap = SHUFIM(iFile=iFile, nFile=nFile, minUtil=minUtil, minSup=minSup, sep=sep) + _ap.mine() + _ap.save(oFile) + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + elif len(_ab._sys.argv) == 6: + iFile = _ab._sys.argv[1] + nFile = _ab._sys.argv[2] + minUtil = _ab._sys.argv[3] + minSup = _ab._sys.argv[4] + oFile = _ab._sys.argv[5] + + print("Input File:", iFile) + print("Neighborhood File:", nFile) + print("Minimum Utility:", minUtil) + print("Minimum Support:", minSup) + print("Output File:", oFile) + + _ap = SHUFIM(iFile=iFile, nFile=nFile, minUtil=minUtil, minSup=minSup) + _ap.mine() + _ap.save(oFile) + + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + + + else: + raise WrongNumberOfArguments( + "Please provide five arguments: iFile, nFile, minUtil, minSup and oFile \n""or Please provide four arguments: iFile, minSup, oFile and sep") diff --git a/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM_terminal.ipynb b/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM_terminal.ipynb new file mode 100644 index 00000000..c1aed34b --- /dev/null +++ b/tests/highUtilityGeoreferencedFrequentPattern/basic/test_SHUFIM_terminal.ipynb @@ -0,0 +1,257 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Step1: Install PAMI library and upload your respective dataset" + ], + "metadata": { + "id": "WV5Dalojg4yq" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -U pami" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A_zajHFibvtZ", + "outputId": "c9b5a123-e6b3-485a-9f67-94d5275ea4c7" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting pami\n", + " Downloading pami-2024.7.2-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from pami) (5.9.5)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from pami) (2.0.3)\n", + "Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (from pami) (5.15.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from pami) (3.7.1)\n", + "Collecting resource (from pami)\n", + " Downloading Resource-0.2.1-py2.py3-none-any.whl (25 kB)\n", + "Collecting validators (from pami)\n", + " Downloading validators-0.32.0-py3-none-any.whl (42 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.7/42.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from pami) (2.0.7)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from pami) (9.4.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pami) (1.25.2)\n", + "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from pami) (5.0.2)\n", + "Collecting sphinx-rtd-theme (from pami)\n", + " Downloading sphinx_rtd_theme-2.0.0-py2.py3-none-any.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting discord.py (from pami)\n", + " Downloading discord.py-2.4.0-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m43.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from pami) (3.3)\n", + "Collecting deprecated (from pami)\n", + " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated->pami) (1.14.1)\n", + "Requirement already satisfied: aiohttp<4,>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from discord.py->pami) (3.9.5)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (4.53.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (24.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->pami) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pami) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pami) (2024.1)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly->pami) (8.4.2)\n", + "Collecting JsonForm>=0.0.2 (from resource->pami)\n", + " Downloading JsonForm-0.0.2.tar.gz (2.4 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting JsonSir>=0.0.2 (from resource->pami)\n", + " Downloading JsonSir-0.0.2.tar.gz (2.2 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting python-easyconfig>=0.1.0 (from resource->pami)\n", + " Downloading Python_EasyConfig-0.1.7-py2.py3-none-any.whl (5.4 kB)\n", + "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.8)\n", + "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.6)\n", + "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.1)\n", + "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.0.5)\n", + "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.1.10)\n", + "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.0.7)\n", + "Requirement already satisfied: Jinja2>=2.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (3.1.4)\n", + "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.16.1)\n", + "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (0.18.1)\n", + "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.2.0)\n", + "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.15.0)\n", + "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (0.7.16)\n", + "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (1.4.1)\n", + "Requirement already satisfied: requests>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->pami) (2.31.0)\n", + "Collecting sphinxcontrib-jquery<5,>=4 (from sphinx-rtd-theme->pami)\n", + " Downloading sphinxcontrib_jquery-4.1-py2.py3-none-any.whl (121 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.1/121.1 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (4.0.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2>=2.3->sphinx->pami) (2.1.5)\n", + "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from JsonForm>=0.0.2->resource->pami) (4.19.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->pami) (1.16.0)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from python-easyconfig>=0.1.0->resource->pami) (6.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.5.0->sphinx->pami) (2024.6.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.18.1)\n", + "Building wheels for collected packages: JsonForm, JsonSir\n", + " Building wheel for JsonForm (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for JsonForm: filename=JsonForm-0.0.2-py3-none-any.whl size=3314 sha256=3256c589a0b230dab3ef1fbf68dacb89d4cb9820739ca73061a14b353a3e134e\n", + " Stored in directory: /root/.cache/pip/wheels/b6/e5/87/11026246d3bd4ad67c0615682d2d6748bbd9a40ac0490882bd\n", + " Building wheel for JsonSir (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for JsonSir: filename=JsonSir-0.0.2-py3-none-any.whl size=4752 sha256=2412b2648e60d861eb1362a784e4a2979950061313d484c4c0dfcce4123e339d\n", + " Stored in directory: /root/.cache/pip/wheels/1d/4c/d3/4d9757425983b43eb709be1043d82cd03fb863ce5f56f117e6\n", + "Successfully built JsonForm JsonSir\n", + "Installing collected packages: JsonSir, validators, python-easyconfig, deprecated, sphinxcontrib-jquery, discord.py, sphinx-rtd-theme, JsonForm, resource, pami\n", + "Successfully installed JsonForm-0.0.2 JsonSir-0.0.2 deprecated-1.2.14 discord.py-2.4.0 pami-2024.7.2 python-easyconfig-0.1.7 resource-0.2.1 sphinx-rtd-theme-2.0.0 sphinxcontrib-jquery-4.1 validators-0.32.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://u-aizu.ac.jp/~udayrage/datasets/utilityDatabases/Utility_T10I4D100K.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AoOlU8vrb8vp", + "outputId": "e03cf910-6110-4ce7-9ddf-093eae8ae454" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2024-07-10 14:52:22-- https://u-aizu.ac.jp/~udayrage/datasets/utilityDatabases/Utility_T10I4D100K.csv\n", + "Resolving u-aizu.ac.jp (u-aizu.ac.jp)... 150.31.244.160, 150.95.161.176\n", + "Connecting to u-aizu.ac.jp (u-aizu.ac.jp)|150.31.244.160|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 7356594 (7.0M) [text/csv]\n", + "Saving to: ‘Utility_T10I4D100K.csv’\n", + "\n", + "Utility_T10I4D100K. 100%[===================>] 7.02M 1.50MB/s in 8.3s \n", + "\n", + "2024-07-10 14:52:32 (865 KB/s) - ‘Utility_T10I4D100K.csv’ saved [7356594/7356594]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Step2: Download and Upload abstarct.py into your google collab notebook" + ], + "metadata": { + "id": "YoqFr_YihKTx" + } + }, + { + "cell_type": "code", + "source": [ + "!python /content/test_SHUFIM.py /content/Utility_T10I4D100K.csv /content/mxt.txt 30000 1000 output.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c6XOEDzqcQ2o", + "outputId": "639264ab-93ea-45e3-9fb7-256b9d50a0ff" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of arguments: 6\n", + "Arguments: ['/content/test_SHUFIM.py', '/content/Utility_T10I4D100K.csv', '/content/mxt.txt', '30000', '1000', 'output.csv']\n", + "Input File: /content/Utility_T10I4D100K.csv\n", + "Neighborhood File: /content/mxt.txt\n", + "Minimum Utility: 30000\n", + "Minimum Support: 1000\n", + "Output File: output.csv\n", + "Spatial High Utility Frequent Itemsets generated successfully using SHUFIM algorithm\n", + "Total number of Frequent Patterns: 119\n", + "Total Memory in USS: 150441984\n", + "Total Memory in RSS: 169275392\n", + "Total ExecutionTime in ms: 17.65391755104065\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python /content/test_SHUFIM.py /content/Utility_T10I4D100K.csv /content/mxt.txt 30000 1000 output.csv \"\\t\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZixyAb_ScrcK", + "outputId": "e5a91647-3ad8-4efa-df1c-cd94bd026eb3" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of arguments: 7\n", + "Arguments: ['/content/test_SHUFIM.py', '/content/Utility_T10I4D100K.csv', '/content/mxt.txt', '30000', '1000', 'output.csv', '\\\\t']\n", + "Input File: /content/Utility_T10I4D100K.csv\n", + "Neighborhood File: /content/mxt.txt\n", + "Minimum Utility: 30000\n", + "Minimum Support: 1000\n", + "Output File: output.csv\n", + "Separator: \t\n", + "Spatial High Utility Frequent Itemsets generated successfully using SHUFIM algorithm\n", + "Total number of Frequent Patterns: 119\n", + "Total Memory in USS: 150495232\n", + "Total Memory in RSS: 169615360\n", + "Total ExecutionTime in ms: 18.100584983825684\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "2PUkzuDIubjP" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file