forked from JasonKessler/scattertext
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo_cohens_d.py
23 lines (22 loc) · 886 Bytes
/
demo_cohens_d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import scattertext as st
convention_df = st.SampleCorpora.ConventionData2012.get_data()
corpus = (st.CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
nlp=st.whitespace_nlp_with_sentences)
.build()
.get_unigram_corpus())
term_scorer = st.CohensD(corpus).set_categories('democrat', ['republican'])
print(term_scorer.get_score_df().sort_values(by='cohens_d', ascending=False).head())
html = st.produce_frequency_explorer(
corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
term_scorer=st.CohensD(corpus),
metadata=convention_df['speaker'],
grey_threshold=0
)
file_name = 'demo_cohens_d.html'
open(file_name, 'wb').write(html.encode('utf-8'))
print('Open ./demo_cohens_d.html in Chrome or Firefox.')