-
Notifications
You must be signed in to change notification settings - Fork 0
/
mainNeuralClassifier.py
39 lines (34 loc) · 1.63 KB
/
mainNeuralClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
import calcFeatures
import calcAuthor
import AUTHORS
from classifiers import neural
debug = True
authors = ["Jane Austen", "Walter Scott", "Arthur Conan Doyle", "Charles Dickens", "Mark Twain", "Louisa Alcott"]
#Index coresponds to authors array
textDirs = ["texts/Jane_Austen", "texts/Walter_Scott", "texts/Arthur_Doyle", "texts/Charles_Dickens", "texts/Mark_Twain", "texts/Louisa_Alcott"]
unknownDir = "texts/Unknown"
sampleLength = 5000
#array of CalcAuthorBatch results
featuresCalculated = []
#Features Authors
featuresAuthors = []
authorNum = 0
for directory in textDirs:
c = calcAuthor.CalcAuthorBatch(directory, True, sampleLength, False)
featuresCalculated.append(c.calcFeatures())
featuresAuthors.append(np.zeros(c.getNumSamples(), dtype=np.int))
featuresAuthors[len(featuresAuthors)-1].fill(authorNum)
authorNum+=1
#Numpy Array of feature samples
samples = np.concatenate(featuresCalculated)
#Numpy Array of feature sample's authors
sampleAuthors = np.concatenate(featuresAuthors)
classifier = neural.NeuralNetworkClassifier(sum(calcAuthor.LENGTHS), len(textDirs))
classifier.train(samples, sampleAuthors)
trainingAccuracy = classifier.testAccuracy(samples, sampleAuthors)
testSamples, testSamplesAuthors = AUTHORS.getSamplesAndAuthors("texts/Unknown", authors, debug, sampleLength)
unknownAccuracy = classifier.testAccuracy(testSamples, testSamplesAuthors)
print AUTHORS.calcAttributions(unknownDir, authors, sampleLength, classifier)
print str(round(trainingAccuracy*100)) + "% Accuracy on short training samples with noise"
print str(round(unknownAccuracy*100)) + "% Accuracy on short unknown texts with noise"