-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Simon David HERNANDEZ <simondhp@git.totum.one>
- Loading branch information
Simon David HERNANDEZ
committed
May 3, 2018
1 parent
72b23b9
commit 11769aa
Showing
19 changed files
with
133 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
src/kpext/kpext_data/corpus/* | ||
*.pycrfsuite | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# -*- coding: utf-8 -*- | ||
""" setup | ||
Package setup for kpext | ||
""" | ||
|
||
from setuptools import setup, find_packages | ||
|
||
with open('README.md') as f: | ||
README = f.read() | ||
|
||
setup(name='python-kpext', | ||
version='v0.1.0.dev3', | ||
description='Python package for keyphrase extraction.', | ||
long_description=README, | ||
long_description_content_type='text/markdown', | ||
url='https://github.com/snovd/keyphrase-extraction', | ||
author='Simon D. Hernandez', | ||
author_email='py.kpext@totum.one', | ||
license='MIT', | ||
classifiers=[ | ||
'Development Status :: 3 - Alpha', | ||
'Intended Audience :: Developers', | ||
'Topic :: Software Development :: Build Tools', | ||
'License :: OSI Approved :: MIT License', | ||
'Programming Language :: Python :: 3.2', | ||
'Programming Language :: Python :: 3.3', | ||
'Programming Language :: Python :: 3.4', | ||
'Programming Language :: Python :: 3.5', | ||
'Programming Language :: Python :: 3.6' | ||
], | ||
keywords='keyphrase extraction', | ||
packages=find_packages('src'), | ||
package_dir={'':'src', 'kpext': 'src/kpext'}, | ||
package_data={'kpext': ['kpext_data/models/*']}, | ||
python_requires='>=3, <4', | ||
platform='any', | ||
install_requires=['nltk', 'python-crfsuite'], | ||
zip_safe=False) |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
"""config/config | ||
Default corpus configs. | ||
""" | ||
import sys | ||
import inspect | ||
from pathlib import Path | ||
|
||
from kpext import kpext_data | ||
|
||
ACLRDTEC = "acl-rd-tec-2.0" | ||
SEMEVAL2017 = "semeval2017-task10" | ||
|
||
KPEXTDATA_PATH = str(Path(inspect.getfile(kpext_data)).parents[1]) | ||
|
||
# Check for default paths for corpus | ||
DEFAULT_CORPUS_PATH = "kpext_data/corpus/" + SEMEVAL2017 + "/" | ||
if Path("./" + DEFAULT_CORPUS_PATH).exists(): | ||
CORPUS_PATH = "./" + DEFAULT_CORPUS_PATH | ||
elif Path("~/" + DEFAULT_CORPUS_PATH).exists(): | ||
CORPUS_PATH = "~/" + DEFAULT_CORPUS_PATH | ||
elif Path(KPEXTDATA_PATH + "/" + DEFAULT_CORPUS_PATH).exists(): | ||
CORPUS_PATH = KPEXTDATA_PATH + "/" + DEFAULT_CORPUS_PATH | ||
else: | ||
print("Warning: SemEval 2017 Task 10 corpus doesn't exists.", file=sys.stderr) | ||
print(" - Download from here https://scienceie.github.io/resources.html", | ||
file=sys.stderr) | ||
print(" - Use one of the following paths.", file=sys.stderr) | ||
print(" + %s" % (KPEXTDATA_PATH + "/" + DEFAULT_CORPUS_PATH), file=sys.stderr) | ||
print(" + ./%s" % DEFAULT_CORPUS_PATH, file=sys.stderr) | ||
print(" + ~/%s" % DEFAULT_CORPUS_PATH, file=sys.stderr) | ||
print(" - You can use pre-trained models.", file=sys.stderr) | ||
CORPUS_PATH = DEFAULT_CORPUS_PATH | ||
|
||
CORPUS = { | ||
ACLRDTEC: { | ||
"_id": "acl-rd-tec-2.0", | ||
"options": {} | ||
}, | ||
SEMEVAL2017: { | ||
"_id": "semeval2017-task10", | ||
"format": "brat", | ||
"format-description": "brat standoff format, http://brat.nlplab.org/standoff.html", | ||
"dataset": { | ||
"train-labeled": CORPUS_PATH + "/train2/", | ||
"train-unlabeled": None, | ||
"dev-labeled": CORPUS_PATH + "/dev/", | ||
"dev-unlabeled": None, | ||
"test-unlabeled": CORPUS_PATH + "/scienceie2017_test_unlabelled/", | ||
"test-labeled": CORPUS_PATH + "/semeval_articles_test/" | ||
}, | ||
"options": {} | ||
}, | ||
"options": {} | ||
} | ||
CORPUS_DEFAULT = CORPUS[SEMEVAL2017] | ||
CORPUS_SEMEVAL2017_TASK10 = CORPUS[SEMEVAL2017] | ||
CORPUS_ACL_RD_TEC_2_0 = CORPUS[ACLRDTEC] | ||
|
||
# Check for default paths for models | ||
DEFAULT_MODELS_PATH = "kpext_data/models/" | ||
if Path("./" + DEFAULT_MODELS_PATH).exists(): | ||
MODELS_PATH = "./" + DEFAULT_MODELS_PATH | ||
elif Path("~/" + DEFAULT_MODELS_PATH).exists(): | ||
MODELS_PATH = "~/" + DEFAULT_MODELS_PATH | ||
elif Path(KPEXTDATA_PATH + "/" + DEFAULT_MODELS_PATH).exists(): | ||
MODELS_PATH = KPEXTDATA_PATH + "/" + DEFAULT_MODELS_PATH | ||
else: | ||
print("Warning: Path to save models doesn't exists.", file=sys.stderr) | ||
print(" - Possible paths are:", file=sys.stderr) | ||
print(" + %s" % (KPEXTDATA_PATH + "/" + DEFAULT_MODELS_PATH), file=sys.stderr) | ||
print(" + %s" % ("./" + DEFAULT_MODELS_PATH), file=sys.stderr) | ||
print(" + %s" % ("~/" + DEFAULT_MODELS_PATH), file=sys.stderr) | ||
print(" - Default will be %s" % DEFAULT_MODELS_PATH, file=sys.stderr) | ||
MODELS_PATH = DEFAULT_MODELS_PATH | ||
|
||
OUTPUT_PATH = "output/" |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters