You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
path = r"C:\Users\mauro\Google Drive\Análise de Dados\Python\Livro_DS_Zero\Spam\20021010_spam"
data = []
# regex for stripping out the leading "Subject:" and any spaces after it
subject_regex = re.compile(r"^Subject:\s+")
# glob.glob returns every filename that matches the wildcarded path
for fn in glob.glob(path):
is_spam = "ham" not in fn
with open(fn,'r') as file:
for line in file:
if line.startswith("Subject:"):
subject = subject_regex.sub("", line).strip()
data.append((subject, is_spam))
return data
data = get_subject_data(path)
random.seed(0) # just so you get the same answers as me
train_data, test_data = split_data(data, 0.75)
classifier = NaiveBayesClassifier()
classifier.train(train_data)
classified = [(subject, is_spam, classifier.classify(subject))
for subject, is_spam in test_data]
counts = Counter((is_spam, spam_probability > 0.5) # (actual, predicted)
for _, is_spam, spam_probability in classified)
print(counts)
`
The text was updated successfully, but these errors were encountered:
I can't get the counts value.
`#TESTANDO O MODELO
def get_subject_data(path):
def p_spam_given_word(word_prob):
word, prob_if_spam, prob_if_not_spam = word_prob
return prob_if_spam / (prob_if_spam + prob_if_not_spam)
def train_and_test_model(path):
`
The text was updated successfully, but these errors were encountered: