Skip to content

Commit

Permalink
Merge pull request #72 from tmu-nlp/naoki
Browse files Browse the repository at this point in the history
Naoki
  • Loading branch information
kiyama-hajime authored May 21, 2024
2 parents 13af179 + 945d999 commit cfd8ecf
Show file tree
Hide file tree
Showing 12 changed files with 10,097 additions and 0 deletions.
24 changes: 24 additions & 0 deletions naoki/chapter04/knock30.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
with open("C:/Users/shish_sf301y1/Desktop/pyファイル/neko.txt.mecab", "r") as f:
morphemes = []
neko_list = []
lines = f.readlines()
for line in lines:
neko_dic = {}
suf = line.split("\t")
if suf[0] == "EOS\n":
continue
#suf[1]には名詞,普通名詞,副詞可能,,,,トキドキ,時々,時々,...
temp = suf[1].split(',')
neko_dic["surface"] = suf[0]
#なぜ7かは不明
if len(temp) <= 7:
neko_dic["base"] = suf[0]
else :
neko_dic["base"] = temp[7]
neko_dic["pos"] = temp[0]
neko_dic["pos1"] = temp[1]
neko_list.append(neko_dic)
if suf[0] == "。":
morphemes.append(neko_list)
neko_list = []
morphemes
24 changes: 24 additions & 0 deletions naoki/chapter04/knock31.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
with open("C:/Users/shish_sf301y1/Desktop/pyファイル/neko.txt.mecab", "r") as f:
morphemes = []
neko_list = []
lines = f.readlines()
for line in lines:
neko_dic = {}
suf = line.split("\t")
if suf[0] == "EOS\n":
continue
#suf[1]には名詞,普通名詞,副詞可能,,,,トキドキ,時々,時々,...
temp = suf[1].split(',')
neko_dic["surface"] = suf[0]
#なぜ7かは不明
if len(temp) <= 7:
neko_dic["base"] = suf[0]
else :
neko_dic["base"] = temp[7]
neko_dic["pos"] = temp[0]
neko_dic["pos1"] = temp[1]
neko_list.append(neko_dic)
if suf[0] == "。":
morphemes.append(neko_list)
neko_list = []
morphemes
7 changes: 7 additions & 0 deletions naoki/chapter04/knock32.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
suf_list = []
for sentense in morphemes:
for text in sentense:
if text["pos"] == "動詞":
suf_list.append(text["base"])
base_verb = set(suf_list)
base_verb
7 changes: 7 additions & 0 deletions naoki/chapter04/knock33.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
suf_list = []
for sentense in morphemes:
#最初と最後は取らないように回数を調整する
for i in range(len(sentense)-2):
if sentense[i+1]['base'] == 'の' and sentense[i]['pos'] == '名詞' and sentense[i+2]['pos'] == '名詞':
suf_list.append(sentense[i]['surface']+sentense[i+1]['surface']+sentense[i+2]['surface'])
suf_list
16 changes: 16 additions & 0 deletions naoki/chapter04/knock34.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
suf_list = []
#自然言語処理100本ノックのような名詞を取得
for sentense in morphemes:
count = 0
sent = ''
for i in range(len(sentense)):
if sentense[i]['pos'] == '名詞' :
count += 1
sent += sentense[i]['surface']
else :
if count >= 2:
suf_list.append(sent)
count = 0
sent = ''
suf_list = set(suf_list)
suf_list
9 changes: 9 additions & 0 deletions naoki/chapter04/knock35.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import collections
word_list = []
for sentense in morphemes:
for text in sentense:
if text['pos'] != '補助記号':
word_list.append(text['surface'])
word_list_count = collections.Counter(word_list)
word_list_rank = word_list_count.most_common()
word_list_rank
11 changes: 11 additions & 0 deletions naoki/chapter04/knock36.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import matplotlib.pyplot as plt
#import japanize_matplotlib
import collections
%matplotlib inline
word_list_top10 = []
word_list_count = []
for i in range(10):
word_list_top10.append(word_list_rank[:10][i][0])
word_list_count.append(word_list_rank[:10][i][1])
plt.bar(x = word_list_top10,height = word_list_count)

18 changes: 18 additions & 0 deletions naoki/chapter04/knock37.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import itertools
import matplotlib.pyplot as plt
%matplotlib inline

related_list = []
for sentense in morphemes:
for i in range(len(sentense)-1):
if sentense[i]['surface'] == '猫' and sentense[i+1]["pos"] != "補助記号" and sentense[i+1]["pos"] != '助詞' and sentense[i+1]["pos"] != '助動詞':
related_list.append(sentense[i+1]['surface'])
all_neko = list(itertools.chain.from_iterable(related_list))
count_list = collections.Counter(all_neko)
word_list = []
height_list = []
print(count_list)
for i in range(10):
word_list.append(count_list.most_common()[:10][i][0])
height_list.append(count_list.most_common()[:10][i][1])
plt.bar(x = word_list, height = height_list)
7 changes: 7 additions & 0 deletions naoki/chapter04/knock38.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import matplotlib.pyplot as plt
word_list = []
for sentense in morphemes:
for text in sentense:
word_list.append(text['surface'])
hist = collections.Counter(word_list)
plt.hist(hist.values(),range(1,30))
7 changes: 7 additions & 0 deletions naoki/chapter04/knock39.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import matplotlib.pyplot as plt
word_list = []
for sentense in morphemes:
for text in sentense:
word_list.append(text['surface'])
hist = collections.Counter(word_list)
plt.hist(hist.values(),range(1,30))
Loading

0 comments on commit cfd8ecf

Please sign in to comment.