-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
71 lines (71 loc) · 2.79 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
?mmnorm
library("fpc")
library("data.table")
library("plyr")
install.packages("plyr")
install.packages("data.table")
library("data.table")
library("plyr")
source("modified_dbscan.R")
setwd("CS283COllab")
source("modified_dbscan.R")
kddtrain <- read.table("./RandomPieces_10000/ids10000_0.data",header=FALSE,sep=",")
kddtrain <- read.table("../CS283_Mini_proj/RandomPieces_10000/ids10000_0.data",header=FALSE,sep=",")
kddtrain <- read.table("../CS283_MiniProj/RandomPieces_10000/ids10000_0.data",header=FALSE,sep=",")
kddtest <- read.table("../CS283_MiniProj/RandomPieces_10000/ids10000_1.data",header=FALSE,sep=",")
traindata <- kddtrain[, -42]
testdata <- kddtest[, -42]
classes <- kddtrain[42]
names(classes) <- NULL
minmaxnorm <- function(x) {
maximum <- max(x, na.rm=TRUE)
minimum <- min(x, na.rm=TRUE)
if (maximum == minimum) {
return(rep(0, length(x)))
} else {
return(((x - minimum)/(maximum - minimum)))
}
}
normtraindata <- apply(traindata,2,minmaxnorm)
normtraindata <- as.data.frame(normtraindata)
eps <- 0.1
for(minpts in 2:100) {
model <- mydbscan(normtraindata, 9, eps=eps, MinPts=minpts, method = "raw", showplot=1)
clusters <- predict(model, normtraindata)
print(model)
clusterAssignments <- cbind(clusters, classes)
cat(dim(clusterAssignments))
clusterAssignments <- as.data.frame(clusterAssignments, col.names=c("clusters", "classes"))
clusterAssignments <- clusterAssignments[with(clusterAssignments, order(clusters,classes)), ]
clusterAssignments <- count(clusterAssignments, c("clusters","classes"))
#print(clusterAssignments)
trainingclusters <- aggregate(freq ~ clusters, clustsum,max)
#cat('\n\nPredicted Classes:\n')
#print(trainingclusters)
oldnames <- colnames(trainingclusters)
trainingclusters <- merge(trainingclusters, clustsum)
colnames(trainingclusters) <- c(oldnames, "class")
trainingclusters <- trainingclusters[,c("clusters", "class", "freq")]
cat('\n\nPredicted Classes:\n')
print(trainingclusters)
}
for(minpts in 2:100) {
model <- mydbscan(normtraindata, 9, eps=eps, MinPts=minpts, method = "raw", showplot=1)
clusters <- predict(model, normtraindata)
print(model)
clusterAssignments <- cbind(clusters, classes)
cat(dim(clusterAssignments))
clusterAssignments <- as.data.frame(clusterAssignments, col.names=c("clusters", "classes"))
clusterAssignments <- clusterAssignments[with(clusterAssignments, order(clusters,classes)), ]
clusterAssignments <- count(clusterAssignments, c("clusters","classes"))
#print(clusterAssignments)
trainingclusters <- aggregate(freq ~ clusters, clustsum,max)
#cat('\n\nPredicted Classes:\n')
#print(trainingclusters)
oldnames <- colnames(trainingclusters)
trainingclusters <- merge(trainingclusters, clustsum)
colnames(trainingclusters) <- c(oldnames, "class")
trainingclusters <- trainingclusters[,c("clusters", "class", "freq")]
cat('\n\nPredicted Classes:\n')
print(trainingclusters)
}