Merge pull request #11 from tmu-nlp/ritsu

add: chapter01 assignment
tmu-nlp · Apr 21, 2024 · c411c69 · c411c69
2 parents 381fcb8 + 77b3cc3
commit c411c69
Show file tree

Hide file tree

Showing 10 changed files with 152 additions and 0 deletions.
diff --git a/ritsu/chapter01/knock00.py b/ritsu/chapter01/knock00.py
@@ -0,0 +1,4 @@
+word = "stressed"
+# スライスを用いる。sequence[start:stop:step]の中で、step引数を-1を指定すると要素を逆順に取得できる。
+reversed_word = word[::-1]
+print(reversed_word)
diff --git a/ritsu/chapter01/knock01.py b/ritsu/chapter01/knock01.py
@@ -0,0 +1,4 @@
+word = "パタトクカシーー"
+# スライスで一つおきに文字を取得
+selected_word = word[::2]
+print(selected_word)
diff --git a/ritsu/chapter01/knock02.py b/ritsu/chapter01/knock02.py
@@ -0,0 +1,9 @@
+word_1 = "パトカー"
+word_2 = "タクシー"
+
+# 空の変数を用意
+connected_word = ""
+# 2つのワードで文字を繰り返し取得する
+for i in range(4):
+    connected_word += word_1[i] + word_2[i]
+print(connected_word)
diff --git a/ritsu/chapter01/knock03.py b/ritsu/chapter01/knock03.py
@@ -0,0 +1,9 @@
+sentence = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
+
+# 句読点を除去し、単語に分割
+words = sentence.replace(',', '').replace('.', '').split()
+
+# 各単語の文字数をカウント
+word_lengths = [len(word) for word in words]
+
+print(word_lengths)
diff --git a/ritsu/chapter01/knock04.py b/ritsu/chapter01/knock04.py
@@ -0,0 +1,21 @@
+sentence = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
+
+# 単語に分割
+words = sentence.replace(".", "").split()
+
+# 先頭1文字を取る単語の位置（1始まりで指定）
+one_letter_positions = {1, 5, 6, 7, 8, 9, 15, 16, 19}
+
+# 元素記号の辞書を作成
+elements = {}
+
+for index, word in enumerate(words, start=1):
+    if index in one_letter_positions:
+        # 指定された位置の単語は1文字を取る
+        element_symbol = word[0]
+    else:
+        # それ以外の単語は2文字を取る
+        element_symbol = word[:2]
+    elements[element_symbol] = index
+
+print(elements)
diff --git a/ritsu/chapter01/knock05.py b/ritsu/chapter01/knock05.py
@@ -0,0 +1,20 @@
+# n-gram...「与えられたシーケンスからn個の連続する要素を抽出する手法」
+
+def n_gram(sequence, n):
+    """
+    sequemce: 入力するシーケンス
+    n: 抽出する数を指定
+    """
+    return [sequence[i:i+n] for i in range(len(sequence) - n + 1)] 
+
+sentence = "I am an NLPer"
+
+# 単語bi-gram
+words = sentence.split()
+word_bi_gram = n_gram(words, 2)
+
+# 文字bi-gram
+char_bi_gram = n_gram(sentence.replace(" ", ""), 2)  # スペースを除去してからbi-gramを生成
+
+print("単語bi-gram:", word_bi_gram)
+print("文字bi-gram:", char_bi_gram)
diff --git a/ritsu/chapter01/knock06.py b/ritsu/chapter01/knock06.py
@@ -0,0 +1,35 @@
+def n_gram(sequence, n):
+    """
+    sequemce: 入力するシーケンス
+    n: 抽出する数を指定
+    """
+    return [sequence[i:i+n] for i in range(len(sequence) - n + 1)] 
+
+sentence_X = "paraparaparadise"
+sentence_Y = "paragraph"
+
+# set を用いて集合にすることで重複を削除
+X = set(n_gram(sentence_X, 2))
+Y = set(n_gram(sentence_Y, 2))
+
+# 和集合
+union = X | Y
+
+# 積集合
+intersection = X & Y
+
+# 差集合 (XからYを引いた集合)
+difference = X - Y
+
+# 'se'が各集合に含まれるか
+se_in_X = 'se' in X
+se_in_Y = 'se' in Y
+
+# 結果を出力
+print("X:", X)
+print("Y:", Y)
+print("和集合:", union)
+print("積集合:", intersection)
+print("差集合:", difference)
+print("'se' in X:", se_in_X)
+print("'se' in Y:", se_in_Y)
diff --git a/ritsu/chapter01/knock07.py b/ritsu/chapter01/knock07.py
@@ -0,0 +1,10 @@
+def generate_txt(x, y, z):
+    """
+    x: x時の
+    y: yは
+    z: z
+    """
+    return f"{x}時の{y}は{z}"
+
+result = generate_txt(12, "気温", 22.4)
+print(result)
diff --git a/ritsu/chapter01/knock08.py b/ritsu/chapter01/knock08.py
@@ -0,0 +1,17 @@
+def cipher(text):
+    """
+    与えられたテキストを暗号化/復号化。英小文字は(219 - 文字コード)に置換、その他の文字はそのまま出力。
+    ord(char) は文字 char のASCIIコードを返す
+    chr(...)はASCIIコードを文字に変換する
+    (ASCIIコードとは...アスキーコード。標準体な数値と文字の対応付け)
+    """
+    return ''.join(chr(219 - ord(char)) if 'a' <= char <= 'z' else char for char in text)
+
+# 使用例
+original_text = "Hello, World! This is a test message with ONLY lowercase and UPPERCASE."
+encrypted_text = cipher(original_text)
+decrypted_text = cipher(encrypted_text)
+
+# print("Original:", original_text)
+# print("Encrypted:", encrypted_text)
+# print("Decrypted:", decrypted_text)
diff --git a/ritsu/chapter01/knock09.py b/ritsu/chapter01/knock09.py
@@ -0,0 +1,23 @@
+import random
+
+def typoglycemia(sentence):
+    """
+    与えられたテキスト内の各単語の内部文字の順序をランダムに並び替える。
+    各単語の先頭と末尾の文字は固定される。単語の長さが4以下の場合は並び替えない。
+    """
+    words = sentence.split()
+    shuffled_words = [] # 空のリストを用意
+    for word in words:
+        if len(word) > 4: # 文字数が4以下の場合は処理を行わない
+            middle_chars = list(word[1:-1])
+            random.shuffle(middle_chars) # randomモジュールのshuffle関数でランダムに並べ替える
+            word = word[0] + ''.join(middle_chars) + word[-1] # joinでmiddle_charsを結合
+        shuffled_words.append(word)
+    return ' '.join(shuffled_words) # word間に空白を入れたいので' 'で結合する
+
+# テストする英語の文
+text = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
+
+result = typoglycemia(text)
+print("Original:", text)
+print("Shuffled:", result)