diff --git a/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java b/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java index 91456bc96..306cd5467 100644 --- a/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java +++ b/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java @@ -33,156 +33,6 @@ import java.util.ArrayList; public class Service { - public static void readRow(String line, int offset, int nCols, double[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Double.parseDouble(elements[j]); - } - } - - public static void readRow(String line, int offset, int nCols, long[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Long.parseLong(elements[j]); - } - } - - public static void readRow(String line, int offset, int nCols, float[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Float.parseFloat(elements[j]); - } - } - - public static void readSparseData(String dataset, int nVectors, int nNonZeroValues, - long[] rowOffsets, long[] colIndices, double[] data) { - try { - BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); - readRow(bufferedReader.readLine(), 0, nVectors + 1, rowOffsets); - readRow(bufferedReader.readLine(), 0, nNonZeroValues, colIndices); - readRow(bufferedReader.readLine(), 0, nNonZeroValues, data); - bufferedReader.close(); - } catch (IOException e) { - ErrorHandling.printThrowable(e); - } catch (NumberFormatException e) { - ErrorHandling.printThrowable(e); - } - } - - private static int getRowLength(String line) { - String[] elements = line.split(","); - return elements.length; - } - - public static CSRNumericTable createSparseTable(DaalContext context, - String dataset) throws IOException { - BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); - - String rowIndexLine = bufferedReader.readLine(); - int nVectors = getRowLength(rowIndexLine); - long[] rowOffsets = new long[nVectors]; - - readRow(rowIndexLine, 0, nVectors, rowOffsets); - nVectors = nVectors - 1; - - String columnsLine = bufferedReader.readLine(); - int nCols = getRowLength(columnsLine); - - long[] colIndices = new long[nCols]; - readRow(columnsLine, 0, nCols, colIndices); - - String valuesLine = bufferedReader.readLine(); - int nNonZeros = getRowLength(valuesLine); - - float[] data = new float[nNonZeros]; - readRow(valuesLine, 0, nNonZeros, data); - - bufferedReader.close(); - - long maxCol = 0; - for (int i = 0; i < nCols; i++) { - if (colIndices[i] > maxCol) { - maxCol = colIndices[i]; - } - } - int nFeatures = (int) maxCol; - - if (nCols != nNonZeros || nNonZeros != (rowOffsets[nVectors] - 1) - || nFeatures == 0 || nVectors == 0) { - throw new IOException("Unable to read input dataset"); - } - - return new CSRNumericTable(context, data, colIndices, rowOffsets, nFeatures, nVectors); - } - - public static void printClassificationResult(float[] groundTruth, float[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); - - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+f\t\t%+f\n", groundTruth[i], classificationResults[i]); - } - } - - public static void printClassificationResult(NumericTable groundTruth, - NumericTable classificationResults, - String header1, String header2, - String message, int nMaxRows) { - int nCols = (int) groundTruth.getNumberOfColumns(); - int nRows = Math.min((int) groundTruth.getNumberOfRows(), nMaxRows); - - FloatBuffer dataGroundTruth = FloatBuffer.allocate(nCols * nRows); - FloatBuffer dataClassificationResults = FloatBuffer.allocate(nCols * nRows); - try { - dataGroundTruth = groundTruth.getBlockOfRows(0, nRows, dataGroundTruth); - dataClassificationResults = classificationResults.getBlockOfRows(0, nRows, - dataClassificationResults); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - System.out.println(message); - System.out.println(header1 + "\t" + header2); - for (int i = 0; i < nRows; i++) { - for (int j = 0; j < 1; j++) { - System.out.format("%+.0f\t\t%+.0f\n", dataGroundTruth.get(i * nCols + j), - dataClassificationResults.get(i * nCols + j)); - } - } - } - - public static void printClassificationResult(long[] groundTruth, long[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); - - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); - } - } - - public static void printClassificationResult(long[] groundTruth, int[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); - - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); - } - } public static void printMatrix(double[] matrix, int nCols, int nRows, String header) { System.out.println(header); @@ -397,168 +247,6 @@ public static void printNumericTables(NumericTable dataTable1, NumericTable data System.out.println(builder.toString()); } - public static void printAprioriItemsets(HomogenNumericTable largeItemsetsTable, - HomogenNumericTable largeItemsetsSupportTable) { - /* Get sizes of tables to store large item sets */ - int nItemsInLargeItemsets = (int) largeItemsetsTable.getNumberOfRows(); - int largeItemsetCount = (int) largeItemsetsSupportTable.getNumberOfRows(); - int nItemsetToPrint = 20; - - /* Get item sets and their support values */ - IntBuffer bufLargeItemsets = IntBuffer - .allocate(nItemsInLargeItemsets * (int) largeItemsetsTable.getNumberOfColumns()); - try { - bufLargeItemsets = largeItemsetsTable.getBlockOfRows(0, nItemsInLargeItemsets, - bufLargeItemsets); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] largeItemsets = new int[bufLargeItemsets.capacity()]; - bufLargeItemsets.get(largeItemsets); - - IntBuffer bufLargeItemsetsSupportData = IntBuffer - .allocate(largeItemsetCount * (int) largeItemsetsSupportTable.getNumberOfColumns()); - try { - bufLargeItemsetsSupportData = largeItemsetsSupportTable.getBlockOfRows(0, largeItemsetCount, - bufLargeItemsetsSupportData); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] largeItemsetsSupportData = new int[bufLargeItemsetsSupportData.capacity()]; - bufLargeItemsetsSupportData.get(largeItemsetsSupportData); - - ArrayList> largeItemsetsVector - = new ArrayList>(largeItemsetCount); - - for (int i = 0; i < largeItemsetCount; i++) { - largeItemsetsVector.add(new ArrayList()); - } - - for (int i = 0; i < nItemsInLargeItemsets; i++) { - largeItemsetsVector.get(largeItemsets[2 * i]).add(largeItemsets[2 * i + 1]); - } - - ArrayList supportVector = new ArrayList(largeItemsetCount); - for (int i = 0; i < largeItemsetCount; i++) { - supportVector.add(0); - } - - for (int i = 0; i < largeItemsetCount; i++) { - int index = largeItemsetsSupportData[2 * i]; - supportVector.set(index, largeItemsetsSupportData[2 * i + 1]); - } - - System.out.println("\nApriori example program results"); - System.out.println("\nLast " + nItemsetToPrint + " large itemsets: "); - System.out.println("\nItemset\t\t\tSupport"); - - int iMin = ((largeItemsetCount > nItemsetToPrint) ? largeItemsetCount - nItemsetToPrint : 0); - for (int i = iMin; i < largeItemsetCount; i++) { - System.out.print("{"); - for (int l = 0; l < largeItemsetsVector.get(i).size() - 1; l++) { - System.out.print(largeItemsetsVector.get(i).get(l) + ", "); - } - System.out.print(largeItemsetsVector.get(i).get( - largeItemsetsVector.get(i).size() - 1) + "}\t\t"); - - System.out.println(supportVector.get(i)); - } - } - - public static void printAprioriRules(HomogenNumericTable leftItemsTable, - HomogenNumericTable rightItemsTable, - HomogenNumericTable confidenceTable) { - int nRulesToPrint = 20; - /* Get sizes of tables to store association rules */ - int nLeftItems = (int) leftItemsTable.getNumberOfRows(); - int nRightItems = (int) rightItemsTable.getNumberOfRows(); - int nRules = (int) confidenceTable.getNumberOfRows(); - - /* Get association rules data */ - - IntBuffer bufLeftItems = IntBuffer.allocate( - nLeftItems * (int) leftItemsTable.getNumberOfColumns()); - try { - bufLeftItems = leftItemsTable.getBlockOfRows(0, nLeftItems, bufLeftItems); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] leftItems = new int[bufLeftItems.capacity()]; - bufLeftItems.get(leftItems); - - IntBuffer bufRightItems = IntBuffer.allocate( - nRightItems * (int) rightItemsTable.getNumberOfColumns()); - try { - bufRightItems = rightItemsTable.getBlockOfRows(0, nRightItems, bufRightItems); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] rightItems = new int[bufRightItems.capacity()]; - bufRightItems.get(rightItems); - - FloatBuffer bufConfidence = FloatBuffer.allocate( - nRules * (int) confidenceTable.getNumberOfColumns()); - try { - bufConfidence = confidenceTable.getBlockOfRows(0, nRules, bufConfidence); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - float[] confidence = new float[bufConfidence.capacity()]; - bufConfidence.get(confidence); - - ArrayList> leftItemsVector = new ArrayList>(nRules); - for (int i = 0; i < nRules; i++) { - leftItemsVector.add(new ArrayList()); - } - - if (nRules == 0) { - System.out.println("No association rules were found "); - return; - } - - for (int i = 0; i < nLeftItems; i++) { - leftItemsVector.get((leftItems[2 * i])).add(leftItems[2 * i + 1]); - } - - ArrayList> rightItemsVector = new ArrayList>(nRules); - for (int i = 0; i < nRules; i++) { - rightItemsVector.add(new ArrayList()); - } - - for (int i = 0; i < nRightItems; i++) { - rightItemsVector.get((rightItems[2 * i])).add(rightItems[2 * i + 1]); - } - - ArrayList confidenceVector = new ArrayList(nRules); - for (int i = 0; i < nRules; i++) { - confidenceVector.add(confidence[i]); - } - - System.out.println("\nLast " + nRulesToPrint + " association rules: "); - System.out.println("\nRule" + "\t\t\t\tConfidence"); - - int iMin = ((nRules > nRulesToPrint) ? (nRules - nRulesToPrint) : 0); - for (int i = iMin; i < nRules; i++) { - System.out.print("{"); - for (int l = 0; l < leftItemsVector.get(i).size() - 1; l++) { - System.out.print(leftItemsVector.get(i).get(l) + ", "); - } - System.out.print(leftItemsVector.get(i).get(leftItemsVector.get(i).size() - 1) + "} => {"); - - for (int l = 0; l < rightItemsVector.get(i).size() - 1; l++) { - System.out.print(rightItemsVector.get(i).get(l) + ", "); - } - System.out.print(rightItemsVector.get(i).get(rightItemsVector.get(i).size() - 1) + "}\t\t"); - - System.out.println(confidenceVector.get(i)); - } - } - public static void printALSRatings(NumericTable usersOffsetTable, NumericTable itemsOffsetTable, NumericTable ratings) { long nUsers = ratings.getNumberOfRows();