Skip to content

Commit

Permalink
Merge pull request #11 from tmu-nlp/ritsu
Browse files Browse the repository at this point in the history
add: chapter01 assignment
  • Loading branch information
kiyama-hajime authored Apr 21, 2024
2 parents 381fcb8 + 77b3cc3 commit c411c69
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ritsu/chapter01/knock00.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
word = "stressed"
# スライスを用いる。sequence[start:stop:step]の中で、step引数を-1を指定すると要素を逆順に取得できる。
reversed_word = word[::-1]
print(reversed_word)
4 changes: 4 additions & 0 deletions ritsu/chapter01/knock01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
word = "パタトクカシーー"
# スライスで一つおきに文字を取得
selected_word = word[::2]
print(selected_word)
9 changes: 9 additions & 0 deletions ritsu/chapter01/knock02.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
word_1 = "パトカー"
word_2 = "タクシー"

# 空の変数を用意
connected_word = ""
# 2つのワードで文字を繰り返し取得する
for i in range(4):
connected_word += word_1[i] + word_2[i]
print(connected_word)
9 changes: 9 additions & 0 deletions ritsu/chapter01/knock03.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
sentence = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."

# 句読点を除去し、単語に分割
words = sentence.replace(',', '').replace('.', '').split()

# 各単語の文字数をカウント
word_lengths = [len(word) for word in words]

print(word_lengths)
21 changes: 21 additions & 0 deletions ritsu/chapter01/knock04.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
sentence = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."

# 単語に分割
words = sentence.replace(".", "").split()

# 先頭1文字を取る単語の位置(1始まりで指定)
one_letter_positions = {1, 5, 6, 7, 8, 9, 15, 16, 19}

# 元素記号の辞書を作成
elements = {}

for index, word in enumerate(words, start=1):
if index in one_letter_positions:
# 指定された位置の単語は1文字を取る
element_symbol = word[0]
else:
# それ以外の単語は2文字を取る
element_symbol = word[:2]
elements[element_symbol] = index

print(elements)
20 changes: 20 additions & 0 deletions ritsu/chapter01/knock05.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# n-gram...「与えられたシーケンスからn個の連続する要素を抽出する手法」

def n_gram(sequence, n):
"""
sequemce: 入力するシーケンス
n: 抽出する数を指定
"""
return [sequence[i:i+n] for i in range(len(sequence) - n + 1)]

sentence = "I am an NLPer"

# 単語bi-gram
words = sentence.split()
word_bi_gram = n_gram(words, 2)

# 文字bi-gram
char_bi_gram = n_gram(sentence.replace(" ", ""), 2) # スペースを除去してからbi-gramを生成

print("単語bi-gram:", word_bi_gram)
print("文字bi-gram:", char_bi_gram)
35 changes: 35 additions & 0 deletions ritsu/chapter01/knock06.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
def n_gram(sequence, n):
"""
sequemce: 入力するシーケンス
n: 抽出する数を指定
"""
return [sequence[i:i+n] for i in range(len(sequence) - n + 1)]

sentence_X = "paraparaparadise"
sentence_Y = "paragraph"

# set を用いて集合にすることで重複を削除
X = set(n_gram(sentence_X, 2))
Y = set(n_gram(sentence_Y, 2))

# 和集合
union = X | Y

# 積集合
intersection = X & Y

# 差集合 (XからYを引いた集合)
difference = X - Y

# 'se'が各集合に含まれるか
se_in_X = 'se' in X
se_in_Y = 'se' in Y

# 結果を出力
print("X:", X)
print("Y:", Y)
print("和集合:", union)
print("積集合:", intersection)
print("差集合:", difference)
print("'se' in X:", se_in_X)
print("'se' in Y:", se_in_Y)
10 changes: 10 additions & 0 deletions ritsu/chapter01/knock07.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
def generate_txt(x, y, z):
"""
x: x時の
y: yは
z: z
"""
return f"{x}時の{y}{z}"

result = generate_txt(12, "気温", 22.4)
print(result)
17 changes: 17 additions & 0 deletions ritsu/chapter01/knock08.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def cipher(text):
"""
与えられたテキストを暗号化/復号化。英小文字は(219 - 文字コード)に置換、その他の文字はそのまま出力。
ord(char) は文字 char のASCIIコードを返す
chr(...)はASCIIコードを文字に変換する
(ASCIIコードとは...アスキーコード。標準体な数値と文字の対応付け)
"""
return ''.join(chr(219 - ord(char)) if 'a' <= char <= 'z' else char for char in text)

# 使用例
original_text = "Hello, World! This is a test message with ONLY lowercase and UPPERCASE."
encrypted_text = cipher(original_text)
decrypted_text = cipher(encrypted_text)

# print("Original:", original_text)
# print("Encrypted:", encrypted_text)
# print("Decrypted:", decrypted_text)
23 changes: 23 additions & 0 deletions ritsu/chapter01/knock09.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import random

def typoglycemia(sentence):
"""
与えられたテキスト内の各単語の内部文字の順序をランダムに並び替える。
各単語の先頭と末尾の文字は固定される。単語の長さが4以下の場合は並び替えない。
"""
words = sentence.split()
shuffled_words = [] # 空のリストを用意
for word in words:
if len(word) > 4: # 文字数が4以下の場合は処理を行わない
middle_chars = list(word[1:-1])
random.shuffle(middle_chars) # randomモジュールのshuffle関数でランダムに並べ替える
word = word[0] + ''.join(middle_chars) + word[-1] # joinでmiddle_charsを結合
shuffled_words.append(word)
return ' '.join(shuffled_words) # word間に空白を入れたいので' 'で結合する

# テストする英語の文
text = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."

result = typoglycemia(text)
print("Original:", text)
print("Shuffled:", result)

0 comments on commit c411c69

Please sign in to comment.