-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from tmu-nlp/ritsu
add: chapter01 assignment
- Loading branch information
Showing
10 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
word = "stressed" | ||
# スライスを用いる。sequence[start:stop:step]の中で、step引数を-1を指定すると要素を逆順に取得できる。 | ||
reversed_word = word[::-1] | ||
print(reversed_word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
word = "パタトクカシーー" | ||
# スライスで一つおきに文字を取得 | ||
selected_word = word[::2] | ||
print(selected_word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
word_1 = "パトカー" | ||
word_2 = "タクシー" | ||
|
||
# 空の変数を用意 | ||
connected_word = "" | ||
# 2つのワードで文字を繰り返し取得する | ||
for i in range(4): | ||
connected_word += word_1[i] + word_2[i] | ||
print(connected_word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
sentence = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics." | ||
|
||
# 句読点を除去し、単語に分割 | ||
words = sentence.replace(',', '').replace('.', '').split() | ||
|
||
# 各単語の文字数をカウント | ||
word_lengths = [len(word) for word in words] | ||
|
||
print(word_lengths) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
sentence = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can." | ||
|
||
# 単語に分割 | ||
words = sentence.replace(".", "").split() | ||
|
||
# 先頭1文字を取る単語の位置(1始まりで指定) | ||
one_letter_positions = {1, 5, 6, 7, 8, 9, 15, 16, 19} | ||
|
||
# 元素記号の辞書を作成 | ||
elements = {} | ||
|
||
for index, word in enumerate(words, start=1): | ||
if index in one_letter_positions: | ||
# 指定された位置の単語は1文字を取る | ||
element_symbol = word[0] | ||
else: | ||
# それ以外の単語は2文字を取る | ||
element_symbol = word[:2] | ||
elements[element_symbol] = index | ||
|
||
print(elements) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# n-gram...「与えられたシーケンスからn個の連続する要素を抽出する手法」 | ||
|
||
def n_gram(sequence, n): | ||
""" | ||
sequemce: 入力するシーケンス | ||
n: 抽出する数を指定 | ||
""" | ||
return [sequence[i:i+n] for i in range(len(sequence) - n + 1)] | ||
|
||
sentence = "I am an NLPer" | ||
|
||
# 単語bi-gram | ||
words = sentence.split() | ||
word_bi_gram = n_gram(words, 2) | ||
|
||
# 文字bi-gram | ||
char_bi_gram = n_gram(sentence.replace(" ", ""), 2) # スペースを除去してからbi-gramを生成 | ||
|
||
print("単語bi-gram:", word_bi_gram) | ||
print("文字bi-gram:", char_bi_gram) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
def n_gram(sequence, n): | ||
""" | ||
sequemce: 入力するシーケンス | ||
n: 抽出する数を指定 | ||
""" | ||
return [sequence[i:i+n] for i in range(len(sequence) - n + 1)] | ||
|
||
sentence_X = "paraparaparadise" | ||
sentence_Y = "paragraph" | ||
|
||
# set を用いて集合にすることで重複を削除 | ||
X = set(n_gram(sentence_X, 2)) | ||
Y = set(n_gram(sentence_Y, 2)) | ||
|
||
# 和集合 | ||
union = X | Y | ||
|
||
# 積集合 | ||
intersection = X & Y | ||
|
||
# 差集合 (XからYを引いた集合) | ||
difference = X - Y | ||
|
||
# 'se'が各集合に含まれるか | ||
se_in_X = 'se' in X | ||
se_in_Y = 'se' in Y | ||
|
||
# 結果を出力 | ||
print("X:", X) | ||
print("Y:", Y) | ||
print("和集合:", union) | ||
print("積集合:", intersection) | ||
print("差集合:", difference) | ||
print("'se' in X:", se_in_X) | ||
print("'se' in Y:", se_in_Y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
def generate_txt(x, y, z): | ||
""" | ||
x: x時の | ||
y: yは | ||
z: z | ||
""" | ||
return f"{x}時の{y}は{z}" | ||
|
||
result = generate_txt(12, "気温", 22.4) | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
def cipher(text): | ||
""" | ||
与えられたテキストを暗号化/復号化。英小文字は(219 - 文字コード)に置換、その他の文字はそのまま出力。 | ||
ord(char) は文字 char のASCIIコードを返す | ||
chr(...)はASCIIコードを文字に変換する | ||
(ASCIIコードとは...アスキーコード。標準体な数値と文字の対応付け) | ||
""" | ||
return ''.join(chr(219 - ord(char)) if 'a' <= char <= 'z' else char for char in text) | ||
|
||
# 使用例 | ||
original_text = "Hello, World! This is a test message with ONLY lowercase and UPPERCASE." | ||
encrypted_text = cipher(original_text) | ||
decrypted_text = cipher(encrypted_text) | ||
|
||
# print("Original:", original_text) | ||
# print("Encrypted:", encrypted_text) | ||
# print("Decrypted:", decrypted_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import random | ||
|
||
def typoglycemia(sentence): | ||
""" | ||
与えられたテキスト内の各単語の内部文字の順序をランダムに並び替える。 | ||
各単語の先頭と末尾の文字は固定される。単語の長さが4以下の場合は並び替えない。 | ||
""" | ||
words = sentence.split() | ||
shuffled_words = [] # 空のリストを用意 | ||
for word in words: | ||
if len(word) > 4: # 文字数が4以下の場合は処理を行わない | ||
middle_chars = list(word[1:-1]) | ||
random.shuffle(middle_chars) # randomモジュールのshuffle関数でランダムに並べ替える | ||
word = word[0] + ''.join(middle_chars) + word[-1] # joinでmiddle_charsを結合 | ||
shuffled_words.append(word) | ||
return ' '.join(shuffled_words) # word間に空白を入れたいので' 'で結合する | ||
|
||
# テストする英語の文 | ||
text = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ." | ||
|
||
result = typoglycemia(text) | ||
print("Original:", text) | ||
print("Shuffled:", result) |