Skip to content

Commit

Permalink
Merge pull request #10 from zeeguu-ecosystem/refactoring_the_refactoring
Browse files Browse the repository at this point in the history
Refactoring the refactoring
  • Loading branch information
joelgrondman authored May 23, 2018
2 parents 29c54e9 + bc2a993 commit 7ce271b
Show file tree
Hide file tree
Showing 23 changed files with 278 additions and 278 deletions.
Empty file added portability/__init__.py
Empty file.
File renamed without changes.
4 changes: 2 additions & 2 deletions wordstats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from .word_info import WordInfo
from .language_info import LanguageInfo
from .cognate_info import CognateInfo
from .getchunix import _Getch
from .edit_distance_function_factory import WordDistanceFactory
from portability.getchunix import _Getch
from .edit_distance_function_factory import WordDistance

# Create all tables in the engine. equivalent to "Create Table" in SQL
Base.metadata.create_all(BaseService.engine)
Expand Down
16 changes: 0 additions & 16 deletions wordstats/cognate_automatic_evaluation.py

This file was deleted.

67 changes: 0 additions & 67 deletions wordstats/cognate_files_path.py

This file was deleted.

56 changes: 32 additions & 24 deletions wordstats/cognate_info.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
import codecs
import os
from datetime import datetime

from python_translators.translators.glosbe_translator import Translator
from python_translators.translators.glosbe_over_tor_translator import GlosbeOverTorTranslator
from python_translators.translation_query import TranslationQuery

from sqlalchemy import Table
import configparser

from wordstats.loading_from_hermit import load_language_from_hermit
from wordstats.file_handling.file_operations import *
from wordstats.file_handling.loading_from_hermit import *
from wordstats.file_handling.cognate_files_path import *

from .word_info import WordInfo, UnknownWordInfo
from .utils.mem_footprint import total_size
from .base_service import BaseService, Base
from .config import MAX_WORDS, SEPARATOR_PRIMARY, SEPARATOR_SECONDARY
from .metrics_computers import *
from .cognate_files_path import *
from .base_service import BaseService
from .config import SEPARATOR_PRIMARY, SEPARATOR_SECONDARY

from .cognate_db import *
from .getchunix import read_single_keypress
from .edit_distance_function_factory import WordDistanceFactory
from .edit_distance_function_factory import WordDistance
from collections import defaultdict

from time import sleep


class CognateInfo(object):
"""
Expand All @@ -37,7 +30,7 @@ class CognateInfo(object):
"""

def __init__(self, primary, secondary, distance_computer_class: WordDistanceFactory, author:str = ""):
def __init__(self, primary, secondary, distance_computer_class: WordDistance, author:str = ""):
"""
either load from file, or compute if needed
Expand All @@ -62,10 +55,15 @@ def best_guess(self):
return best_dict

# generates candidates based on distance function func and word lists
# def apply_distance_metric(self, wordlist1, wordlist2, func):
def generate_candidates(self):
"""
Generates candidates by comparing each possible pair of words in two
language lists
def compute(self):
...
:return:
"""
wordlist1 = list(load_language_from_hermit(self.primary).word_info_dict.keys())
wordlist2 = list(load_language_from_hermit(self.secondary).word_info_dict.keys())

Expand All @@ -76,9 +74,20 @@ def compute(self):
if self.distance_computer.is_candidate(w1, w2):
self.candidates[w1].append(w2)


def compute_translator(self, translator:Translator, save:Boolean = False):

# generate candidates and automatically evaluates candidates to be cognates
# optionally save candidates to database as they are found
def generate_candidates_translator(self, translator:Translator, save:Boolean = False):
"""
Generates candidates by translating words through an API
the translations are stored in candidates
automatically evaluates candidates to be cognates
optionally save candidates to database as they are found
...
:param: A translator. See python_translators repository for options
:param: Boolean for toggling saving candidate/evaluation after each translation
:return:
"""
wordlist = set(load_language_from_hermit(self.primary).word_info_dict.keys())

translator = translator(source_language=self.primary, target_language=self.secondary)
Expand Down Expand Up @@ -134,7 +143,7 @@ def add_to_blacklist(self, primaryWord, secondaryWord):
# ========================

@classmethod
def load_cached(cls, primary, secondary, distance_computer_class: WordDistanceFactory, author:str = ""):
def load_cached(cls, primary, secondary, distance_computer_class: WordDistance, author:str = ""):

new_registry = cls.load_from_db(primary, secondary,
distance_computer_class, author)
Expand All @@ -151,7 +160,6 @@ def load_cached(cls, primary, secondary, distance_computer_class: WordDistanceFa

new_registry = cls(primary, secondary,
distance_computer_class, author)
new_registry.compute() # compute candidates

return new_registry

Expand Down Expand Up @@ -326,7 +334,7 @@ def add_candidate_to_db(self, primaryWord, secondaryWord):
Try to add one cognate pair to candidate db.
:param primaryWord: word from primary language
:param secondaryWord: word from secundary language
:param secondaryWord: word from secondary language
:return: None
"""
try:
Expand All @@ -343,7 +351,7 @@ def add_to_db(self, primaryWord, secondaryWord, whitelist: Boolean):
Try to add one cognate pair to evaluation db.
:param primaryWord: word from primary language
:param secondaryWord: word from secundary language
:param secondaryWord: word from secondary language
:param whitelist: True to add cognate pair to whitelist otherwise blacklist
:return: None
"""
Expand Down
10 changes: 5 additions & 5 deletions wordstats/edit_distance.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .edit_distance_function_factory import WordDistanceFactory
from .edit_distance_function_factory import WordDistance
from nltk.metrics.distance import edit_distance

class LanguageAwareEditDistance(WordDistanceFactory):
class EditDistance(WordDistance):
def __init__(self, primary, secondary, author:str = ""):
super().__init__(primary, secondary, author)
self.method_name = "edit_distance"
Expand All @@ -10,12 +10,12 @@ def __init__(self, primary, secondary, author:str = ""):

def _initialize_distances(self):

# these might change based on the primary secondayr
# these might change based on the primary secondary
self.replace_distance = 1
self.add_distance = 1

def edit_distance_function(self, word1: str, word2: str):

lengthLongest = max(len(word1),len(word2))
length_longest = max(len(word1),len(word2))

return edit_distance(word1, word2)/lengthLongest
return edit_distance(word1, word2)/length_longest
7 changes: 2 additions & 5 deletions wordstats/edit_distance_absolute.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import codecs
import configparser
from .cognate_files_path import *
from .edit_distance_function_factory import WordDistanceFactory
from .edit_distance_function_factory import WordDistance

class WordDistanceAbsolute(WordDistanceFactory):
class WordDistanceAbsolute(WordDistance):
def __init__(self, primary, secondary, author:str = ""):
super().__init__(primary, secondary, author)
self.replace_distance = 2
Expand Down
6 changes: 3 additions & 3 deletions wordstats/edit_distance_function_factory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
from wordstats.cognate_files_path import *
import configparser
from wordstats.file_handling.cognate_files_path import *
from wordstats.file_handling.file_operations import *
from .rules_db import TransformRules
from sqlalchemy import Table
from .base_service import BaseService, Base
Expand All @@ -9,7 +9,7 @@
# abstract class for creating a distance measure between two rules
# support for loading from .cfg file and loading rules is supplied
# abstract methods are specific to the distance measure
class WordDistanceFactory(ABC):
class WordDistance(ABC):

def __init__(self, primary, secondary, author:str = ""):
super().__init__()
Expand Down
7 changes: 2 additions & 5 deletions wordstats/edit_distance_overlap.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import codecs
import configparser
from .cognate_files_path import *
from .edit_distance_function_factory import WordDistanceFactory
from .edit_distance_function_factory import WordDistance

class WordDistanceOverlap(WordDistanceFactory):
class WordDistanceOverlap(WordDistance):
def __init__(self, primary, secondary, author:str = ""):
super().__init__(primary, secondary, author)
self.method_name = "overlap"
Expand Down
36 changes: 0 additions & 36 deletions wordstats/edit_distance_translate.py

This file was deleted.

Empty file.
38 changes: 38 additions & 0 deletions wordstats/file_handling/cognate_files_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from wordstats.config import DATA_FOLDER_COGNATES, WHITELIST, BLACKLIST, RULES, CANDIDATES
import os


def path_of_cognate_languages(primary, secondary):
package_directory = os.path.dirname(os.path.abspath(__file__)) + os.sep + ".."
file_path = package_directory + os.sep + "{0}{1}{2}{3}".format(DATA_FOLDER_COGNATES, os.sep, primary, secondary)
return file_path


def path_of_cognate_candidates(primary, secondary, method_name):
return _path_to_cognate_file(primary, secondary, CANDIDATES, method_name)


def path_of_cognate_blacklist(primary, secondary, author: str = ""):
return _path_to_cognate_file(primary, secondary,
BLACKLIST if len(author) == 0 else (BLACKLIST + "_" + author))


def path_of_cognate_whitelist(primary, secondary, author: str = ""):
return _path_to_cognate_file(primary, secondary,
WHITELIST if len(author) == 0 else (WHITELIST + "_" + author))


def path_of_cognate_rules(primary, secondary, method_name, author: str = ""):
return _path_to_cognate_file(primary, secondary, RULES if len(author) == 0 else (RULES + "_" + author)
, method_name)


def _path_to_cognate_file(primary, secondary, file_name, method_name=""):
file_path = path_of_cognate_languages(primary, secondary) + os.sep
if method_name is not "":
file_path += method_name + os.sep
file_path += file_name + ".txt"

os.makedirs(os.path.dirname(file_path), exist_ok=True)

return file_path
26 changes: 26 additions & 0 deletions wordstats/file_handling/file_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import codecs


# useful methods for handling files

def load_from_path(path):
try:
with codecs.open(path, encoding="utf8") as file:
content = file.read()

except FileNotFoundError:
print(path + " not found, creating empty file.")
codecs.open(path, encoding="utf8", mode="w")
content = load_from_path(path)

return content


def save_to_file(path, content):
with codecs.open(path, encoding="utf8", mode="w") as words_file:
words_file.write(content)


def append_to_file(path, content):
with codecs.open(path, encoding="utf8", mode="a") as words_file:
words_file.writelines(content + "\n")
Loading

0 comments on commit 7ce271b

Please sign in to comment.