diff --git a/src/main/java/de/unijena/cheminf/clustering/art2a/Art2aEuclideanClusteringTask.java b/src/main/java/de/unijena/cheminf/clustering/art2a/Art2aEuclideanClusteringTask.java new file mode 100644 index 0000000..edc5194 --- /dev/null +++ b/src/main/java/de/unijena/cheminf/clustering/art2a/Art2aEuclideanClusteringTask.java @@ -0,0 +1,251 @@ +/* + * ART2a Clustering for Java + * Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a; + +import de.unijena.cheminf.clustering.art2a.clustering.euclideanClustering.Art2aEuclideanDoubleClustering; +import de.unijena.cheminf.clustering.art2a.exceptions.ConvergenceFailedException; +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClustering; +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClusteringResult; + +import java.util.concurrent.Callable; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Callable class for clustering input vectors (fingerprints). + * + * @author Betuel Sevindik + * @version 1.0.0.0 + */ +public class Art2aEuclideanClusteringTask implements Callable { + // + /** + * Float clustering task constructor. + * Creates a new Art2aClusteringTask instance with the specified parameters. + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. + * In addition, all inputs must have the same length. Each column of the matrix contains one component of the input. + * @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. + * @param anIsClusteringResultExported if the parameter is set to true, the cluster results + * are exported to text files. + * @param aRequiredSimilarity parameter indicating the minimum similarity between the current + * cluster vectors and the previous cluster vectors. The parameter is crucial + * for the convergence of the system. If the parameter is set too high, a much + * more accurate similarity is expected and the convergence may take longer, + * while a small parameter expects a lower similarity between the cluster + * vectors and thus the system may converge faster. + * @param aLearningParameter parameter to define the intensity of keeping the old cluster vector in mind + * before the system adapts it to the new sample vector. + * @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments + * is done in the constructor of Art2aFloatClustering. + * @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is + * done in the constructor of the ArtaFloatClustering. + * + */ + /** public Art2aEuclideanClusteringTask(float aVigilanceParameter, float[][] aDataMatrix, int aMaximumEpochsNumber, + boolean anIsClusteringResultExported, float aRequiredSimilarity, float aLearningParameter) + throws IllegalArgumentException, NullPointerException { + this.isClusteringResultExported = anIsClusteringResultExported; + this.isSeedSet = false; + this.art2aEuclideanClustering = new Art2aFloatClustering(aDataMatrix, aMaximumEpochsNumber, aVigilanceParameter, + aRequiredSimilarity, aLearningParameter); + }**/ + // + /** + * Float clustering task constructor. + * Creates a new Art2aClusteringTask instance with the specified parameters. + * For the required similarity and learning parameter default values are used. + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. + * In addition, all inputs must have the same length. + * Each column of the matrix contains one component of the input. + * @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. + * @param anIsClusteringResultExported if the parameter is set to true, the cluster results + * are exported to text files. + * @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments + * is done in the constructor of Art2aFloatClustering. + * @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is + * done in the constructor of the ArtaFloatClustering. + * + * @see de.unijena.cheminf.clustering.art2a.Art2aClusteringTask#Art2aClusteringTask(float, float[][], int, + * boolean, float, float) + */ + /**public Art2aClusteringTask(float aVigilanceParameter, float[][] aDataMatrix, int aMaximumEpochsNumber, + boolean anIsClusteringResultExported) + throws IllegalArgumentException, NullPointerException { + this(aVigilanceParameter, aDataMatrix, aMaximumEpochsNumber, anIsClusteringResultExported, + Art2aClusteringTask.REQUIRED_SIMILARITY_FLOAT, Art2aClusteringTask.DEFAULT_LEARNING_PARAMETER_FLOAT); + }**/ + // + /** + * Double clustering task constructor. + * Creates a new Art2aDoubleClustering instance with the specified parameters. + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. + * In addition, all inputs must have the same length. + * Each column of the matrix contains one component of the input. + * @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. + * @param anIsClusteringResultExported if the parameter is set to true, the cluster results are + * exported to text files. + * @param aRequiredSimilarity parameter indicating the minimum similarity between the current + * cluster vectors and the previous cluster vectors. + * @param aLearningParameter parameter to define the intensity of keeping the old cluster vector in mind + * before the system adapts it to the new sample vector. + * @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments + * is done in the constructor of Art2aFloatClustering. + * @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is + * done in the constructor of the ArtaFloatClustering. + */ + public Art2aEuclideanClusteringTask(double aVigilanceParameter, double[][] aDataMatrix, int aMaximumEpochsNumber, + boolean anIsClusteringResultExported, double aRequiredSimilarity, double aLearningParameter) + throws IllegalArgumentException, NullPointerException { + this.isClusteringResultExported = anIsClusteringResultExported; + this.art2aClustering = new Art2aEuclideanDoubleClustering(aDataMatrix, aMaximumEpochsNumber, aVigilanceParameter, + aRequiredSimilarity, aLearningParameter); + } + // + /** + * Double clustering task constructor. + * Creates a new Art2aDoubleClustering instance with the specified parameters. + * For the required similarity and learning parameter default values are used. + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. + * In addition, all inputs must have the same length. Each column of the matrix contains one component of the input. + * @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. + * @param anIsClusteringResultExported if the parameter is set to true, the cluster results are + * exported to text files. + * @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments + * is done in the constructor of Art2aFloatClustering. + * @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is + * done in the constructor of the ArtaFloatClustering. + * + * @see de.unijena.cheminf.clustering.art2a.Art2aClusteringTask#Art2aClusteringTask(double, double[][], int, + * boolean, double, double) + * + */ + public Art2aEuclideanClusteringTask(double aVigilanceParameter, double[][] aDataMatrix, int aMaximumEpochsNumber, + boolean anIsClusteringResultExported) throws IllegalArgumentException, NullPointerException { + this(aVigilanceParameter, aDataMatrix, aMaximumEpochsNumber, anIsClusteringResultExported, + Art2aClusteringTask.REQUIRED_SIMILARITY_DOUBLE, Art2aClusteringTask.DEFAULT_LEARNING_PARAMETER_DOUBLE); + } + // + // + // + /** + * Executes the clustering. + * + * @return clustering result. + */ + @Override + public IArt2aEuclideanClusteringResult call() { + try { + if(this.isSeedSet) { + return this.art2aClustering.getClusterResult(this.isClusteringResultExported, this.seed); + } else { + return this.art2aClustering.getClusterResult(this.isClusteringResultExported, + this.DEFAULT_SEED_VALUE_TO_RANDOMIZE_INPUT_VECTORS); + } + } catch (ConvergenceFailedException anException) { + Art2aEuclideanClusteringTask.LOGGER.log(Level.SEVERE, anException.toString(), anException); + return null; + } + } + // + // + // + /** + * + * User-defined seed value to randomize input vectors. + * Different seed values can lead to different clustering results. + * + * @param aSeed seed value + * @return user-defined seed value. + */ + public int setSeed(int aSeed) { + this.seed = aSeed; + this.isSeedSet = true; + return this.seed; + } + // +} diff --git a/src/main/java/de/unijena/cheminf/clustering/art2a/abstractResult/euclideanClusteringAbstractResult/Art2aEuclideanAbstractResult.java b/src/main/java/de/unijena/cheminf/clustering/art2a/abstractResult/euclideanClusteringAbstractResult/Art2aEuclideanAbstractResult.java new file mode 100644 index 0000000..7d8533c --- /dev/null +++ b/src/main/java/de/unijena/cheminf/clustering/art2a/abstractResult/euclideanClusteringAbstractResult/Art2aEuclideanAbstractResult.java @@ -0,0 +1,206 @@ +/* + * ART2a Clustering for Java + * Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a.abstractResult.euclideanClusteringAbstractResult; + +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClusteringResult; + +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Abstract class. + * This abstract class implements the IArt2aEuclideanClusteringResult interface. + * The interface provides methods to access clustering results. + * The concrete implementation of the clustering result properties in the IArt2aClusteringClusteringResult interface + * is taken over by this abstract class. + * + * + * @author Betuel Sevindik + * @version 1.0.0.0 + */ +public abstract class Art2aEuclideanAbstractResult implements IArt2aEuclideanClusteringResult { + /** + * Queue of type String for clustering result (process) + */ + private ConcurrentLinkedQueue clusteringProcess; + /** + * Queue of type String for clustering result + */ + private ConcurrentLinkedQueue clusteringResult; + /** + * The map maps the cluster number to the number of inputs in the cluster + */ + private HashMap clusteringNumberToClusterMemberMap; + // + /** + * Represents the cluster assignment of each input vector. For example clusterView[4] = 0 means that + * input vector with index 4 cluster 0 has been assigned. + */ + private final int[] clusterView; + /** + * Final number of epochs the system needed to converge. + */ + private final int numberOfEpochs; + /** + * Final number of clusters detected after successful completion of clustering. + */ + private final int numberOfDetectedClusters; + /** + * Initial capacity value for maps + */ + private final double INITIAL_CAPACITY_VALUE = 1.5; + /** + * Logger of this class. + */ + private static final Logger LOGGER = Logger.getLogger(Art2aEuclideanAbstractResult.class.getName()); + // + + /** + * Constructor of this class. + * + * @param aNumberOfEpochs final epoch number. + * @param aNumberOfDetectedClusters final dumber of detected clusters. + * @param aClusterView array for cluster assignments for each input vector. + * @param aClusteringProcessQueue clustering result (process) queue of type string. + * @param aClusteringResultQueue clustering result queue of type string. + * @throws IllegalArgumentException is thrown, when the given arguments are invalid. + */ + + public Art2aEuclideanAbstractResult(int aNumberOfEpochs, int aNumberOfDetectedClusters, int[] aClusterView, + ConcurrentLinkedQueue aClusteringProcessQueue, + ConcurrentLinkedQueue aClusteringResultQueue) throws IllegalArgumentException { + if (aNumberOfEpochs <= 0) { + throw new IllegalArgumentException("aNumberOfEpochs is invalid."); + } + if (aNumberOfDetectedClusters < 1) { + throw new IllegalArgumentException("aNumberOfDetectedClusters is invalid"); + } + this.clusterView = aClusterView; + this.numberOfEpochs = aNumberOfEpochs; + this.numberOfDetectedClusters = aNumberOfDetectedClusters; + this.clusteringProcess = aClusteringProcessQueue; + this.clusteringResult = aClusteringResultQueue; + this.clusteringNumberToClusterMemberMap = this.getClusterSize(this.clusterView); + } + // + + /** + * {@inheritDoc} + */ + @Override + public int getNumberOfEpochs() { + return this.numberOfEpochs; + } + // + + /** + * {@inheritDoc} + */ + @Override + public int getNumberOfDetectedClusters() { + return this.numberOfDetectedClusters; + } + // + + /** + * {@inheritDoc} + */ + @Override + public int[] getClusterIndices(int aClusterNumber) throws IllegalArgumentException { + if (aClusterNumber >= this.numberOfDetectedClusters) { + throw new IllegalArgumentException("The specified cluster number does not exist and exceeds " + + "the maximum number of clusters."); + + } else { + int[] tmpIndicesInCluster = new int[this.clusteringNumberToClusterMemberMap.get(aClusterNumber)]; + int tmpInputIndices = 0; + int tmpIterator = 0; + for (int tmpClusterMember : this.clusterView) { + if (tmpClusterMember == aClusterNumber) { + tmpIndicesInCluster[tmpIterator] = tmpInputIndices; + tmpIterator++; + } + tmpInputIndices++; + } + return tmpIndicesInCluster; + } + } + // + + /** + * {@inheritDoc } + */ + @Override + public void exportClusteringResultsToTextFiles(Writer aClusteringResultWriter, Writer aClusteringProcessWriter) + throws NullPointerException { + if (aClusteringResultWriter == null || aClusteringProcessWriter == null) { + throw new NullPointerException("At least one of the writers is null."); + } + if (this.clusteringResult == null || this.clusteringProcess == null) { + throw new NullPointerException("The associated argument that enables the export of the clustering results is " + + "is set to false. \n" + + "Please set the argument for export to true."); + } + try { + for (String tmpClusteringResult : this.clusteringResult) { + aClusteringResultWriter.write(tmpClusteringResult + "\n"); + } + for (String tmpClusteringProcess : this.clusteringProcess) { + aClusteringProcessWriter.write(tmpClusteringProcess + "\n"); + } + } catch (IOException anException) { + Art2aEuclideanAbstractResult.LOGGER.log(Level.SEVERE, "Export to text files failed."); + } + } + // + + /** + * Method for determining the size of the detected clusters. + * + * @param aClusterView represents the cluster assignments of each input vector. + * @return HashMap maps the cluster number to the number of inputs in the cluster. + */ + private HashMap getClusterSize(int[] aClusterView) { + HashMap tmpClusterToMemberMap = + new HashMap<>((int) (this.getNumberOfDetectedClusters() * this.INITIAL_CAPACITY_VALUE)); + for (int tmpClusterMembers : aClusterView) { + if (tmpClusterMembers == -1) { + continue; + } + if (!tmpClusterToMemberMap.containsKey(tmpClusterMembers)) { + tmpClusterToMemberMap.put(tmpClusterMembers, 1); + } else { + tmpClusterToMemberMap.put(tmpClusterMembers, tmpClusterToMemberMap.get(tmpClusterMembers) + 1); + } + } + return tmpClusterToMemberMap; + } + +} diff --git a/src/main/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClustering.java b/src/main/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClustering.java index 6efb01f..241009d 100644 --- a/src/main/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClustering.java +++ b/src/main/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClustering.java @@ -26,10 +26,11 @@ package de.unijena.cheminf.clustering.art2a.clustering.euclideanClustering; import de.unijena.cheminf.clustering.art2a.exceptions.ConvergenceFailedException; -import de.unijena.cheminf.clustering.art2a.interfaces.IArt2aClustering; -import de.unijena.cheminf.clustering.art2a.interfaces.IArt2aClusteringResult; -import de.unijena.cheminf.clustering.art2a.results.Art2aDoubleClusteringResult; +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClustering; +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClusteringResult; +import de.unijena.cheminf.clustering.art2a.results.euclideanClusteringResult.Art2aEuclideanDoubleClusteringResult; +import java.util.Arrays; import java.util.Random; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.logging.Logger; @@ -39,14 +40,13 @@ * machine precision. The clustering is done by comparing the Euclidean distances of the input vectors * and allows a fast, stable and unsupervised clustering for open categorical problems. * This class intends the clustering of fingerprints. - * * Literature: * D. Wienke,"Neural resonance and adaption. Towards nature's principles in artificial pattern recognition",1993 * * @author Zeynep Dagtekin * @version 1.0.0.0 */ -public class Art2aEuclideanDoubleClustering implements IArt2aClustering { +public class Art2aEuclideanDoubleClustering implements IArt2aEuclideanClustering { // /** * Matrix with all fingerprints to be clustered. @@ -94,7 +94,7 @@ public class Art2aEuclideanDoubleClustering implements IArt2aClustering { /** * Number of fingerprints to be clustered. */ - public int numberOfInputVectors; + private final int numberOfInputVectors; /** * Dimensionality of the fingerprint. */ @@ -151,8 +151,6 @@ public class Art2aEuclideanDoubleClustering implements IArt2aClustering { public Art2aEuclideanDoubleClustering(double[][] aDataMatrix, int aMaximumNumberOfEpochs, double aVigilanceParameter, double aRequiredSimilarity, double aLearningParameter) throws IllegalArgumentException, NullPointerException { - // initialization of the network: - // Step 1 if(aDataMatrix == null) { throw new NullPointerException("aDataMatrix is null."); } @@ -163,7 +161,7 @@ public Art2aEuclideanDoubleClustering(double[][] aDataMatrix, int aMaximumNumber throw new IllegalArgumentException("The vigilance parameter must be greater than 0."); } if(aRequiredSimilarity < 0.0 || aRequiredSimilarity > 1.0) { - throw new IllegalArgumentException("The required similarity parameter must be greater than 0."); + throw new IllegalArgumentException("The required similarity parameter must be between 0 and 1."); } if(aLearningParameter < 0.0 || aLearningParameter > 1.0) { throw new IllegalArgumentException("The learning parameter must be greater than 0 and smaller than 1."); @@ -177,10 +175,13 @@ public Art2aEuclideanDoubleClustering(double[][] aDataMatrix, int aMaximumNumber this.numberOfComponents = this.dataMatrix[0].length; this.scalingFactor = 1.0 / Math.sqrt(this.numberOfComponents + 1.0); this.thresholdForContrastEnhancement = 1.0 / Math.sqrt(this.numberOfComponents + 1.0); + System.out.println("Data matrix initialized with " + this.numberOfInputVectors + " input vectors."); + System.out.println("Initial cluster matrix: " + Arrays.deepToString(this.clusterMatrix)); + } // // - // + // /** * The input data matrix with the input vectors/fingerprints is checked for correctness. * Accordingly, the input matrix must not contain any vectors that consist of components smaller than 0. @@ -213,6 +214,9 @@ private double[][] getCheckedAndScaledDataMatrix(double[][] aDataMatrix) throws throw new IllegalArgumentException("All vectors are null vectors. Clustering not possible."); } } + System.out.println("Data Matrix: " + Arrays.deepToString(aDataMatrix)); + System.out.println("Number of vectors: " + aDataMatrix.length); + System.out.println("Vector dimensions: " + tmpNumberOfVectorComponents); return aDataMatrix; } // @@ -236,53 +240,65 @@ private boolean isConverged(int aNumberOfDetectedClasses, int aConvergenceEpoch) if(aConvergenceEpoch < this.maximumNumberOfEpochs) { // Check convergence by evaluating the similarity of the cluster vectors of this and the previous epoch. tmpIsConverged = true; - double tmpDistanceOfClassVector = 0.0; - double tmpMaxDistanceOfClassVector = 0.0; + double tmpDistanceOfClassVector; + double tmpSpacialShift; + double tmpSumOfRowComponents; double tmpSumOfClassVector; double[] tmpCurrentRowInClusterMatrix; + double[] tmpPreviousEpochOtherRow; double[] tmpPreviousEpochRow; + double[] tmpEuclideanDistanceArray = new double[aNumberOfDetectedClasses]; // Finding the Maximum Distance. for (int i = 0; i < aNumberOfDetectedClasses; i++) { - tmpCurrentRowInClusterMatrix = this.clusterMatrix[i]; - tmpPreviousEpochRow = this.clusterMatrixPreviousEpoch[i]; - for (int j = 0; j < this.numberOfComponents; j++) { - tmpSumOfClassVector = tmpCurrentRowInClusterMatrix[j] - tmpPreviousEpochRow[j]; - tmpDistanceOfClassVector += tmpSumOfClassVector * tmpSumOfClassVector; - } - if (tmpDistanceOfClassVector > tmpMaxDistanceOfClassVector) { - tmpDistanceOfClassVector = tmpMaxDistanceOfClassVector; - } - // Scaling of the Maximum Distance. - double tmpSumOfCurrentRow = 0.0; - double tmpSumOfPreviousEpochRow = 0.0; - double tmpSquaredSumOfCurrentRow = 0.0; - double tmpSquaredSumOfPreviousEpochRow = 0.0; - double tmpNormalizationFactor = 0.0; - double tmpNormalizedDistance = 0.0; - for (int j = 0; j < this.numberOfComponents; j++){ - tmpSumOfCurrentRow += tmpCurrentRowInClusterMatrix[j] + tmpCurrentRowInClusterMatrix[j]; - tmpSumOfPreviousEpochRow += tmpPreviousEpochRow[j] + tmpPreviousEpochRow[j]; - tmpSquaredSumOfCurrentRow += tmpSumOfCurrentRow * tmpSumOfCurrentRow; - tmpSquaredSumOfPreviousEpochRow += tmpSumOfPreviousEpochRow * tmpSumOfPreviousEpochRow; - tmpNormalizationFactor += tmpSquaredSumOfCurrentRow + tmpSquaredSumOfPreviousEpochRow; - tmpNormalizedDistance = tmpDistanceOfClassVector / tmpNormalizationFactor; + for (int j = i + 1; j < aNumberOfDetectedClasses; j++) { + tmpPreviousEpochRow = this.clusterMatrixPreviousEpoch[i]; + tmpPreviousEpochOtherRow = this.clusterMatrixPreviousEpoch[j]; + tmpDistanceOfClassVector = 0.0; + for (int tmpComponentsOfEpochRow = 0; tmpComponentsOfEpochRow < this.numberOfComponents; tmpComponentsOfEpochRow++) { + tmpSumOfClassVector = tmpPreviousEpochRow[tmpComponentsOfEpochRow] - tmpPreviousEpochOtherRow[tmpComponentsOfEpochRow]; + tmpDistanceOfClassVector += tmpSumOfClassVector * tmpSumOfClassVector; + //tmpEuclideanDistanceArray[tmpComponentsOfEpochRow] = tmpDistanceOfClassVector; + } + tmpEuclideanDistanceArray[j] = tmpDistanceOfClassVector; } - if (tmpNormalizedDistance < this.requiredSimilarity) { - tmpIsConverged = false; - break; + int tmpGreatestDistanceIndex = 0; + for (int tmpPossibleMaxDistance = 0; tmpPossibleMaxDistance < tmpEuclideanDistanceArray.length; tmpPossibleMaxDistance++) { + if (tmpEuclideanDistanceArray[tmpPossibleMaxDistance] > tmpEuclideanDistanceArray[tmpGreatestDistanceIndex]) { + tmpGreatestDistanceIndex = tmpPossibleMaxDistance;//max distance 100% dissimilarity, therefore min distance 0% dissimilarity + } } + //for instance if two cars are 60% distant, they are not similar, they have to be minimum 40% far away from each other + //Maximum distance that is allowed in order to converge + for (int tmpVectorRow = 0 ; tmpVectorRow < aNumberOfDetectedClasses; tmpVectorRow++ ) { + tmpCurrentRowInClusterMatrix = this.clusterMatrix[tmpVectorRow]; + tmpPreviousEpochRow = this.clusterMatrixPreviousEpoch[tmpVectorRow]; + tmpSpacialShift = 0.0; + for (int tmpVectorComponent = 0; tmpVectorComponent < this.numberOfComponents; tmpVectorComponent++) { + tmpSumOfRowComponents = tmpPreviousEpochRow[tmpVectorComponent] - tmpCurrentRowInClusterMatrix[tmpVectorComponent]; + tmpSpacialShift += tmpSumOfRowComponents * tmpSumOfRowComponents; + } + double tmpThresholdOfConvergence = tmpGreatestDistanceIndex * this.requiredSimilarity; + if (tmpSpacialShift > tmpThresholdOfConvergence) { + tmpIsConverged = false; + break; + } + } } if(!tmpIsConverged) { for(int tmpCurrentClusterMatrixVector = 0; tmpCurrentClusterMatrixVector < this.clusterMatrix.length; tmpCurrentClusterMatrixVector++) { tmpRow = this.clusterMatrix[tmpCurrentClusterMatrixVector]; - this.clusterMatrixPreviousEpoch[tmpCurrentClusterMatrixVector] = tmpRow; + this.clusterMatrixPreviousEpoch[tmpCurrentClusterMatrixVector] = Arrays.copyOf(tmpRow, tmpRow.length); } } } else { throw new ConvergenceFailedException(String.format("Convergence failed for vigilance parameter: %2f" ,this.vigilanceParameter)); } + System.out.println("Epoch: " + aConvergenceEpoch + ", Converged: " + tmpIsConverged); + System.out.println("Checking convergence for epoch: " + aConvergenceEpoch); + System.out.println("Current cluster matrix: " + Arrays.deepToString(this.clusterMatrix)); + System.out.println("Previous epoch cluster matrix: " + Arrays.deepToString(this.clusterMatrixPreviousEpoch)); return tmpIsConverged; } // @@ -295,6 +311,8 @@ private boolean isConverged(int aNumberOfDetectedClasses, int aConvergenceEpoch) public void initializeMatrices() { this.clusterMatrix = new double[this.numberOfInputVectors][this.numberOfComponents]; this.clusterMatrixPreviousEpoch = new double[this.numberOfInputVectors][this.numberOfComponents]; + System.out.println("Matrix initialzed."); + System.out.println("Initial cluster matrix: " + Arrays.deepToString(this.clusterMatrix)); } // /** @@ -323,6 +341,8 @@ public int[] getRandomizeVectorIndices() { tmpSampleVectorIndicesInRandomOrder[tmpRandomIndex2]; tmpSampleVectorIndicesInRandomOrder[tmpRandomIndex2] = tmpBuffer; } + System.out.println("Input vectors randomized."); + System.out.println("Randomized vector indices: " + Arrays.toString(tmpSampleVectorIndicesInRandomOrder)); return tmpSampleVectorIndicesInRandomOrder; } // @@ -331,7 +351,7 @@ public int[] getRandomizeVectorIndices() { * Starts the clustering in double machine precision. */ @Override - public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExported, int aSeedValue) throws ConvergenceFailedException { + public IArt2aEuclideanClusteringResult getClusterResult(boolean anIsClusteringResultExported, int aSeedValue) throws ConvergenceFailedException { // this.clusteringProcess = null; this.clusteringResult = null; @@ -363,12 +383,14 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor tmpClusterMatrixRow[tmpCurrentVectorComponentsInClusterMatrixIndex] = tmpInitialClusterVectorWeightValue; tmpClusterMatrixRowOld[tmpCurrentVectorComponentsInClusterMatrixIndex] = tmpInitialClusterVectorWeightValue; } + System.out.println("Cluster matrix after initialization: " + Arrays.deepToString(this.clusterMatrix)); } // // int tmpCurrentNumberOfEpochs = 0; if(anIsClusteringResultExported) { this.clusteringResult.add(String.format("Vigilance parameter: %2f",this.vigilanceParameter)); + System.out.println("Vigilance parameter: %2f"+this.vigilanceParameter); } // // @@ -378,15 +400,33 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor this.clusteringProcess.add(String.format("Art-2a clustering result for vigilance parameter: %2f",this.vigilanceParameter)); this.clusteringProcess.add(String.format("Number of epochs: %d",tmpCurrentNumberOfEpochs)); this.clusteringProcess.add(""); + System.out.println("Art-2a clustering result for vigilance parameter: %2f" + this.vigilanceParameter); } int[] tmpSampleVectorIndicesInRandomOrder = this.getRandomizeVectorIndices(); // // for(int tmpCurrentInput = 0; tmpCurrentInput < this.numberOfInputVectors; tmpCurrentInput++) { double[] tmpInputVector = new double[this.numberOfComponents]; + boolean tmpIsNullVector = true; + for(int tmpCurrentInputVectorComponentsIndex = 0; tmpCurrentInputVectorComponentsIndex < this.numberOfComponents; + tmpCurrentInputVectorComponentsIndex++ ) { + tmpInputVector[tmpCurrentInputVectorComponentsIndex] = + this.dataMatrix[tmpSampleVectorIndicesInRandomOrder[tmpCurrentInput]][tmpCurrentInputVectorComponentsIndex]; + if(tmpInputVector[tmpCurrentInputVectorComponentsIndex] !=0.0) { + tmpIsNullVector = false; + } + } if(anIsClusteringResultExported) { this.clusteringProcess.add(String.format("Input: %d / Vector %d", tmpCurrentInput, tmpSampleVectorIndicesInRandomOrder[tmpCurrentInput])); + System.out.println("Input: %d / Vector %d" + tmpCurrentInput + + tmpSampleVectorIndicesInRandomOrder[tmpCurrentInput]); + } + if(tmpIsNullVector) { + tmpClusterOccupation[tmpSampleVectorIndicesInRandomOrder[tmpCurrentInput]] = -1; + if(anIsClusteringResultExported) { + this.clusteringProcess.add("This input is a null vector"); + } } // else { @@ -398,6 +438,7 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor tmpInputVector[tmpManipulateComponentsIndex] = 0.0; } } + LOGGER.info("Input vector after contrast enhancement: "); // // if(tmpNumberOfDetectedClusters == 0) { @@ -408,6 +449,7 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor if(anIsClusteringResultExported) { this.clusteringProcess.add("Cluster number: 0"); this.clusteringProcess.add(String.format("Number of detected clusters: %d",tmpNumberOfDetectedClusters)); + System.out.println("Cluster number: 0"); } } // @@ -415,32 +457,43 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor // //sum of scaling factor and non normalized components! tmpRho = 0.0; - double tmpDifferenceOfScalingFactorAndInput; - for (int tmpInputVectorComponent = 0; tmpInputVectorComponent < this.numberOfComponents; tmpInputVectorComponent++){ + double tmpDifferenceOfScalingFactorAndInput = 0.0; + for (int tmpInputVectorComponent = 0; tmpInputVectorComponent < this.numberOfComponents; tmpInputVectorComponent++ ){ tmpDifferenceOfScalingFactorAndInput = this.scalingFactor - tmpInputVector[tmpInputVectorComponent]; tmpRho += tmpDifferenceOfScalingFactorAndInput * tmpDifferenceOfScalingFactorAndInput; + System.out.println("Debug: Inside loop, tmpRho = " + tmpRho); } tmpWinnerClusterIndex = tmpNumberOfDetectedClusters; boolean tmpIsMatchingClusterAvailable = true; // // tmpRho) { tmpRho = tmpRhoForExistingClustersSquared; - tmpWinnerClusterIndex = tmpCurrentClusterMatrixRowIndex; + tmpWinnerClusterIndex = tmpCurrentClusterMatrixRow; tmpIsMatchingClusterAvailable = false; + System.out.println("Debug: Updated tmpRho = " + tmpRho + ", tmpWinnerClusterIndex = " + tmpWinnerClusterIndex); } + LOGGER.info("Calculated tmpRho: " + tmpRho); + LOGGER.info("Calculated tmpRhoForExistingClustersSquared for cluster " + tmpCurrentClusterMatrixRow + ": " + tmpRhoForExistingClustersSquared); } // // @@ -469,15 +523,18 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor for(int tmpAdaptedComponentsIndex = 0; tmpAdaptedComponentsIndex < this.numberOfComponents; tmpAdaptedComponentsIndex++) { tmpInputVector[tmpAdaptedComponentsIndex] = - tmpInputVector[tmpAdaptedComponentsIndex] * this.learningParameter + tmpFactor * - this.clusterMatrix[tmpWinnerClusterIndex][tmpAdaptedComponentsIndex]; + (tmpInputVector[tmpAdaptedComponentsIndex] * this.learningParameter) + (tmpFactor * + this.clusterMatrix[tmpWinnerClusterIndex][tmpAdaptedComponentsIndex]); } this.clusterMatrix[tmpWinnerClusterIndex] = tmpInputVector; + System.out.println("Cluster matrix after updating existing cluster: " + Arrays.deepToString(this.clusterMatrix)); tmpClusterOccupation[tmpSampleVectorIndicesInRandomOrder[tmpCurrentInput]] = tmpWinnerClusterIndex; + System.out.println("Updated cluster occupation: " + Arrays.toString(tmpClusterOccupation)); if(anIsClusteringResultExported) { clusteringProcess.add(String.format("Cluster number: %d",tmpWinnerClusterIndex)); clusteringProcess.add(String.format("Number of detected clusters: %d",tmpNumberOfDetectedClusters)); + System.out.println("Cluster number: %d" + tmpWinnerClusterIndex); } // } @@ -495,6 +552,8 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor if(anIsClusteringResultExported) { clusteringProcess.add(String.format("Convergence status: %b",tmpIsSystemConverged)); clusteringProcess.add("---------------------------------------"); + System.out.println("Convergence status: %b"+tmpIsSystemConverged); + } // } @@ -505,14 +564,16 @@ public IArt2aClusteringResult getClusterResult(boolean anIsClusteringResultExpor this.clusteringResult.add(String.format("Number of detected clusters: %d",tmpNumberOfDetectedClusters)); this.clusteringResult.add(String.format("Convergence status: %b",tmpIsSystemConverged)); this.clusteringResult.add("---------------------------------------"); + System.out.println("Number of epochs: %d" + tmpCurrentNumberOfEpochs); + System.out.println("Number of detected clusters: %d"+tmpNumberOfDetectedClusters); } // // + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces; + +import de.unijena.cheminf.clustering.art2a.exceptions.ConvergenceFailedException; +/** + * Interface for implementing float and double Art-2a clustering. + * + * @author Zeynep Dagtekin, inspired by Betuel Sevindik's interface + * @version 1.0.0.0 + */ +public interface IArt2aEuclideanClustering { + /** + * Initialise the cluster matrices. + */ + void initializeMatrices(); + /** + * Since the Art-2a algorithm randomly selects any input vector, the input vectors must first be randomized. + * The input vectors/fingerprints are randomized so that all input vectors can be clustered by random selection. + * + * Here, the Fisher-Yates method is used to randomize the inputs. + * + * @return an array with vector indices in a random order + */ + int[] getRandomizeVectorIndices(); + /** + * Starts the Art-2A clustering algorithm. + * The clustering process begins by randomly selecting an input vector/fingerprint from the data matrix. + * After that the selected vector will be assigned to the first cluster. For all other subsequent + * If there is sufficient similarity to an existing cluster, they are assigned to that cluster. + * Otherwise, a new cluster is formed, and the input is added to it. Null vectors are not clustered. + * + * @param anIsClusteringResultExported If the parameter == true, all information about the + * clustering is exported to 2 text files.The first exported text file is a detailed log of the clustering process + * and the intermediate results and the second file is a rough overview of the final result. + * @param aSeedValue user-defined seed value to randomize input vectors. + * @return IArt2aClusteringResult + * @throws ConvergenceFailedException is thrown, when convergence fails. + */ + IArt2aEuclideanClusteringResult getClusterResult(boolean anIsClusteringResultExported, int aSeedValue) throws ConvergenceFailedException; +} diff --git a/src/main/java/de/unijena/cheminf/clustering/art2a/interfaces/euclideanClusteringInterfaces/IArt2aEuclideanClusteringResult.java b/src/main/java/de/unijena/cheminf/clustering/art2a/interfaces/euclideanClusteringInterfaces/IArt2aEuclideanClusteringResult.java new file mode 100644 index 0000000..9489cd2 --- /dev/null +++ b/src/main/java/de/unijena/cheminf/clustering/art2a/interfaces/euclideanClusteringInterfaces/IArt2aEuclideanClusteringResult.java @@ -0,0 +1,106 @@ +/* + * ART2a Clustering for Java + * Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces; + +import java.io.Writer; + +/** + * Interface for implementing clustering result classes. + * + * @param generic parameter. This parameter is either a Double or a Float. + * The type of the method @code {@link #getDistanceBetweenClusters(int, int)} + * is calculated either as a float or as a double, depending on the clustering precision option. + * + * @author Zeynep Dagtekin, inspired by Betuel Sevindik's interface. + * @version 1.0.0.0 + */ + +public interface IArt2aEuclideanClusteringResult { + /** + * Returns the vigilance parameter for the clustering algorithm. + * + * @return vigilance parameter + */ + T getVigilanceParameter(); + /** + * Returns the number of Epochs. + * + * @return int epoch number + */ + int getNumberOfEpochs(); + /** + * Returns the number of detected classes. + * + * @return int detected cluster number + */ + int getNumberOfDetectedClusters(); + /** + * Returns the input indices assigned to the given cluster. + * + * @param aClusterNumber the given number of clusters + * @return array with the input indices for a given cluster. + * @throws IllegalArgumentException is thrown if the given cluster does not exist + */ + int[] getClusterIndices(int aClusterNumber) throws IllegalArgumentException; + /** + * Calculates the cluster representatives. This means that the input that is the most similar to the cluster vector + * is determined. + * + * @param aClusterNumber Cluster number to calculate the representatives with. + * @return int input indices of the representative input in the cluster. + * @throws IllegalArgumentException is thrown if the given cluster number is invalid. + */ + int getClusterRepresentatives(int aClusterNumber) throws IllegalArgumentException; + /** + * The result of the clustering is additionally exported in two text files. One of these files is a + * very detailed representation of the results (clustering process file), while in the other only the + * most important results are summarized (clustering result file). + * IMPORTANT: In order to additionally export the clustering results into text files, + * the folder must be created first. + * This requires the method call setUpClusteringResultTextFilePrinter(String aPathName, Class) + * or user own Writer and text files. This method call is optional, the folder can also be created by the user. + * + * @see de.unijena.cheminf.clustering.art2a.util.FileUtil#setUpClusteringResultTextFilePrinters(String, Class) + * + * @param aClusteringProcessWriter clustering result (process) writer + * @param aClusteringResultWriter clustering result writer + * @throws NullPointerException is thrown, if the Writers are null. + * + */ + void exportClusteringResultsToTextFiles(Writer aClusteringResultWriter, Writer aClusteringProcessWriter) + throws NullPointerException; + /** + * Calculates the Euclidean distance between two clusters. The alternative calculation to ART-2a based on angles. + * The normalization steps are removed so the length of the vectors can be used for the distance. + * + * @param aFirstCluster first cluster + * @param aSecondCluster second cluster + * @return generic angle double or float. + * @throws IllegalArgumentException if the given parameters are invalid. + * + */ + T getDistanceBetweenClusters(int aFirstCluster, int aSecondCluster) throws IllegalArgumentException; +} diff --git a/src/main/java/de/unijena/cheminf/clustering/art2a/results/euclideanClusteringResult/Art2aEuclideanDoubleClusteringResult.java b/src/main/java/de/unijena/cheminf/clustering/art2a/results/euclideanClusteringResult/Art2aEuclideanDoubleClusteringResult.java new file mode 100644 index 0000000..886ed27 --- /dev/null +++ b/src/main/java/de/unijena/cheminf/clustering/art2a/results/euclideanClusteringResult/Art2aEuclideanDoubleClusteringResult.java @@ -0,0 +1,220 @@ +/* + * ART2a Clustering for Java + * Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a.results.euclideanClusteringResult; + +import de.unijena.cheminf.clustering.art2a.abstractResult.euclideanClusteringAbstractResult.Art2aEuclideanAbstractResult; + +import java.util.Arrays; +import java.util.Objects; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.logging.Logger; + +/** + * Result class for Euclidean double clustering. + * + * @author Zeynep Dagtekin, based on Betuel Sevindik's result class. + * @version 1.0.0.0 + */ +public class Art2aEuclideanDoubleClusteringResult extends Art2aEuclideanAbstractResult { + /** + * Cache for cluster representatives. + */ + private int[] cacheClusterRepresentativesIndices; + /** + * Cache for cluster distances. + */ + private double[][] cacheDistanceBetweenClusters; + // + // + // + /** + * Matrix contains all cluster vectors. + */ + private final double[][] doubleClusterMatrix; + /** + * Matrix contains all input vector/fingerprints to be clustered. + * Each row in the matrix corresponds to an input vector. + */ + private final double[][] dataMatrix; + /** + * The vigilance parameter is above 0. The parameter influences the sensitivity of the clustering. + * A vigilance parameter close to 0 leads to a coarse clustering (few clusters). + */ + private final double vigilanceParameter; + // + /** + * Logger of this class + */ + private static final Logger LOGGER = Logger.getLogger(Art2aEuclideanDoubleClusteringResult.class.getName()); + /** + * Constructor. + * + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aNumberOfEpochs final epoch number. + * @param aNumberOfDetectedClusters final number of detected clusters. + * @param aClusteringProcessQueue clustering result (process) queue of ty String. + * The queue is required to be able to export the cluster results. If it is not specified, they are set to null and + * export is not possible. + * @param aClusteringResultQueue clustering result queue of typ String. See {@code #aClusteringProcessQueue} + * @param aClusterView array for cluster assignment of each input vector. + * @param aClusterMatrix cluster vector matrix. All cluster vectors created after double ART-2a clustering are + * stored in this matrix. + * @param aDataMatrix matrix with all input vectors/fingerprints. + * Each row in the matrix corresponds to an input vector. + * @throws NullPointerException is thrown, if the specified matrices are null. + * @throws IllegalArgumentException is thrown, if the specified vigilance parameter is invalid. + * + */ + public Art2aEuclideanDoubleClusteringResult(double aVigilanceParameter, int aNumberOfEpochs, + int aNumberOfDetectedClusters, int[] aClusterView, + double[][] aClusterMatrix, double [][] aDataMatrix, + ConcurrentLinkedQueue aClusteringProcessQueue, + ConcurrentLinkedQueue aClusteringResultQueue) + throws NullPointerException, IllegalArgumentException { + super (aNumberOfEpochs, aNumberOfDetectedClusters, aClusterView, aClusteringProcessQueue, aClusteringResultQueue); + Objects.requireNonNull(aClusterMatrix, "aClusterMatrix is null."); + Objects.requireNonNull(aDataMatrix, "aDataMatrix is null."); + if (aVigilanceParameter <= 0.0) { + throw new IllegalArgumentException("The vigilance parameter must be greater than 0."); + } + this.vigilanceParameter = aVigilanceParameter; + this.doubleClusterMatrix = aClusterMatrix; + this.dataMatrix = aDataMatrix; + this.cacheClusterRepresentativesIndices = new int[aNumberOfDetectedClusters]; + Arrays.fill(this.cacheClusterRepresentativesIndices, -2); + this.cacheDistanceBetweenClusters = new double[aNumberOfDetectedClusters][aNumberOfDetectedClusters]; + + } + /** + * Constructor. + * + * @param aVigilanceParameter parameter to influence the number of clusters. + * @param aNumberOfEpochs final epoch number. + * @param aNumberOfDetectedClusters final number of detected clusters. + * @param aClusterView array for cluster assignment of each input vector. + * @param aClusterMatrix double cluster vector matrix. All cluster vectors created after double ART-2a clustering are + * stored in this matrix. + * @param aDataMatrix double matrix with all input vectors/fingerprints. + * Each row in the matrix corresponds to an input vector. + * @throws NullPointerException is thrown, if the specified matrices are null. + * @throws IllegalArgumentException is thrown, if the specified vigilance parameter is invalid. + *

+ * + * @see de.unijena.cheminf.clustering.art2a.results.Art2aDoubleClusteringResult#Art2aDoubleClusteringResult(double, + * int, int, int[], double[][], double[][], ConcurrentLinkedQueue, ConcurrentLinkedQueue) + * + */ + public Art2aEuclideanDoubleClusteringResult(double aVigilanceParameter, int aNumberOfEpochs, int aNumberOfDetectedClusters, + int[] aClusterView, double[][] aClusterMatrix, double [][] aDataMatrix) + throws NullPointerException { + this(aVigilanceParameter, aNumberOfEpochs, aNumberOfDetectedClusters, aClusterView, aClusterMatrix, aDataMatrix, + null, null); + } + /** + * {@inheritDoc} + */ + @Override + public Double getVigilanceParameter() { + return this.vigilanceParameter; + } + /** + * {@inheritDoc} + */ + @Override + public int getClusterRepresentatives(int aClusterNumber) throws IllegalArgumentException { + if(aClusterNumber >= this.getNumberOfDetectedClusters() || aClusterNumber < 0) { + throw new IllegalArgumentException("The given cluster number does not exist or is invalid."); + } + if (this.cacheClusterRepresentativesIndices[aClusterNumber] == -2) { + int[] tmpClusterIndices = this.getClusterIndices(aClusterNumber); + double[] tmpCurrentClusterVector = this.doubleClusterMatrix[aClusterNumber]; + double tmpDifference; + double tmpDistance; + double[] tmpMatrixRow; + double [] tmpEuclideanDistanceArray = new double[tmpClusterIndices.length + 1]; + int tmpIterator = 0; + for (int tmpCurrentClusterVectorIndex = 0; tmpCurrentClusterVectorIndex < aClusterNumber; tmpCurrentClusterVectorIndex++) { + tmpMatrixRow = this.dataMatrix[tmpCurrentClusterVectorIndex]; + tmpDistance = 0.0; + for (int i = 0; i < tmpMatrixRow.length; i++) { + tmpDifference = tmpMatrixRow[i] - tmpCurrentClusterVector[i]; + tmpDistance += tmpDifference * tmpDifference; + } + tmpEuclideanDistanceArray[tmpIterator] = tmpDistance; + tmpIterator++; + } + int tmpIndexOfGreatestDistance = 0; + for (int i = 0; i < tmpEuclideanDistanceArray.length; i++) { + if (tmpEuclideanDistanceArray[i] > tmpEuclideanDistanceArray[tmpIndexOfGreatestDistance]) { + tmpIndexOfGreatestDistance = i; + } + } + this.cacheClusterRepresentativesIndices[aClusterNumber] = tmpClusterIndices[tmpIndexOfGreatestDistance]; + return tmpClusterIndices[tmpIndexOfGreatestDistance]; + } else { + return this.cacheClusterRepresentativesIndices[aClusterNumber]; + } + } + // + /** + * {@inheritDoc} + */ + @Override + public Double getDistanceBetweenClusters(int aFirstCluster, int aSecondCluster) throws IllegalArgumentException { + if(aFirstCluster < 0 || aSecondCluster < 0) { + throw new IllegalArgumentException("The given cluster number is less than zero or invalid."); + } + int tmpNumberOfDetectedCluster = this.getNumberOfDetectedClusters(); + if(aFirstCluster == aSecondCluster && (aFirstCluster >= tmpNumberOfDetectedCluster)) { + throw new IllegalArgumentException("The given cluster number(s) do(es) not exist."); + } else if (aFirstCluster == aSecondCluster) { + return 0.0; + } else { + if (aFirstCluster >= tmpNumberOfDetectedCluster || aSecondCluster>= tmpNumberOfDetectedCluster) { + throw new IllegalArgumentException("The given cluster number(s) do(es) not exist."); + } + if(this.cacheDistanceBetweenClusters[aFirstCluster] [aSecondCluster] == 0) { + double[] tmpFirstCluster = this.doubleClusterMatrix[aFirstCluster]; + double[] tmpSecondCluster = this.doubleClusterMatrix[aSecondCluster]; + double tmpDifferenceBetweenClusters; + double tmpDistanceBetweenClusters = 0.0; + for (int tmpFirstClusterVectorIndex = 0; tmpFirstClusterVectorIndex < tmpFirstCluster.length; tmpFirstClusterVectorIndex++) { + + tmpDifferenceBetweenClusters = tmpSecondCluster[tmpFirstClusterVectorIndex] - tmpFirstCluster[tmpFirstClusterVectorIndex]; + tmpDistanceBetweenClusters = tmpDifferenceBetweenClusters * tmpDifferenceBetweenClusters; + } + this.cacheDistanceBetweenClusters[aFirstCluster][aSecondCluster] = tmpDistanceBetweenClusters; + this.cacheDistanceBetweenClusters[aSecondCluster][aFirstCluster] = tmpDistanceBetweenClusters; + return tmpDistanceBetweenClusters; + } else { + return this.cacheDistanceBetweenClusters[aFirstCluster][aSecondCluster]; + } + } + + } + // +} diff --git a/src/test/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClusteringTest.java b/src/test/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClusteringTest.java new file mode 100644 index 0000000..ea52d75 --- /dev/null +++ b/src/test/java/de/unijena/cheminf/clustering/art2a/clustering/euclideanClustering/Art2aEuclideanDoubleClusteringTest.java @@ -0,0 +1,97 @@ +/* + * ART2a Clustering for Java + * Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package de.unijena.cheminf.clustering.art2a.clustering.euclideanClustering; + + +import de.unijena.cheminf.clustering.art2a.Art2aEuclideanClusteringTask; +import de.unijena.cheminf.clustering.art2a.exceptions.ConvergenceFailedException; +import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClusteringResult; +import de.unijena.cheminf.clustering.art2a.util.FileUtil; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.BufferedWriter; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + + +/** + * Test class for double clustering. + * + * @author Betuel Sevindik + * @version 1.0.0.0 + */ +class Art2aEuclideanDoubleClusteringTest { + // + /** + * Clustering result instance + */ + private static IArt2aEuclideanClusteringResult clusteringResult; + /** + * Array for storing number of epochs for all vigilance parameters + */ + private static int[] numberOfEpochsForAllVigilanceParameter; + /** + * Array for storing the number of detected clusters for all vigilance parameters + */ + private static int[] numberOfDetectedClustersForAllVigilanceParameter; + /** + * Matrix for storing the indices in different clusters for certain vigilance parameters + */ + private static int[][] clusterIndicesForAllVigilanceParameter; + /** + * Array for storing the cluster representatives in different clusters for certain vigilance parameters + */ + private static int[] clusterRepresentativesForAllVigilanceParameter; + /** + * Array for storing the angle between different clusters for certain vigilance parameters + */ + private static double[] clusterDistancesForAllVigilanceParameter; + // + /** + * Starts double clustering and stores the results in arrays to check for correctness. + * Clustering is performed for vigilance parameters from 0.1 to 0.9 in 0.1 steps. + * The clustering process for the different vigilance parameters is performed in parallel. + * + */ + + @Test + void TestInstance() throws ConvergenceFailedException { + double[][] tmpTestDataMatrix = FileUtil.importDoubleDataMatrixFromTextFile( + "src/test/resources/de/unijena/cheminf/clustering/art2a/Count_Fingerprints.txt", ','); + Art2aEuclideanDoubleClustering tmpInstance = new Art2aEuclideanDoubleClustering(tmpTestDataMatrix, 50, + 70, 0.99,0.1); + tmpInstance.getClusterResult(true, 2 ); + + } +}