-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyproject.toml
124 lines (118 loc) · 5.14 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
[build-system]
requires = [
"setuptools>=58.2.0",
"wheel",
]
build-backend = "setuptools.build_meta"
[project]
name = "fite"
version = "0.0.1"
description = "Forecaster Intergrated TAF Environment; a Python package for autocompletion of Terminal Aerodrome Forecasts."
authors = [
{name = "Jason Leaver", email = "jason.leaver@us.af.mil"},
]
license = {file = 'LICENSE'}
requires-python = ">=3.10"
dependencies = [
"numpy==1.23.5",
"pandas==1.5.1",
"torch==1.13.0",
"datasets==2.7.0",
"tokenizers==0.13.2",
"transformers==4.24.0",
"pyarrow==10.0.0",
"fastapi==0.87.0",
"toml==0.10.2",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: End Users/Desktop",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Topic :: Utilities",
]
[tool.fite]
root-path = "store/"
[[tool.fite.models]]
base-model = "gpt2"
dataset = "taf"
version = "base1"
model-name = "gpt2-taf-base1"
# if the model above was to be retrained, as a new revision, the following would be used
# base-model = "gpt2-taf-base1"
# dataset = "taf"
# version = "2022-01-01"
# model-name = "gpt2-taf-base1.2022-01-01"
description = "A GPT-2 model trained on terminal aerodrome forecasts."
# whole word tokens should be prefiexed with the unicode character -> \u0120
additional-tokens = ["TAF", "\u0120BECMG", "\u0120TEMPO"]
additional-special-tokens = ["\u0120LAST", "\u0120NO", "\u0120AMDS", "\u0120RMK", "\u0120COR"] #, "[metadata]"]
# the metadata pattern should be a regex extractable pattern that will be used to extract the metadata from the dataset
# in the TAF example we are extracting the max and minimum temperatures from the TAF
# metadata-pattern = "?(?P<TX>TXM?\\d{2})\\/\\d{4}Z\\sTN?(?P<TN>M?\\d{2})\\/\\d{4}Z$"
metadata-pattern = "(?:(TXM?/\\d{2}).+(TNM?/\\d{2}))$"
prompt-examples = [
"TAF [TX20 TN13] AMD KDAA 251400Z 2514/2616 VRB03KT 9000 BR",
"TAF [TX08 TN02] AMD KHOP 151230Z 1512/1614 23005KT 4800 -SHRA BR OVC004",
"TAF [TXM03 TNM08] AMD KRDR 141452Z 1414/1516",
"TAF [TX26 TN15] KFTK 041900Z 0419/0601 18012G22KT 9999",
"TAF [TX21 TN14] KFAF 031000Z 0310/0416 04006KT 9999 BKN007",
"TAF [TX19 TN09] AMD KFTK 011405Z 0114/0217 24005KT 9999",
"TAF [TX05 TN00] AMD KFTK 151405Z 1514/1618 12006KT 9999 -DZ",
"TAF [TX21 TN06] AMD KADW 130515Z 1305/1407 29015G25KT 9999",
"TAF [TX14 TN02] KFAF 151900Z 1519/1701 13009KT 9999 OVC020",
"TAF [TX04 TNM03] KFFO 130302Z 1303/1409 28009KT 9000 -SN BKN009 OVC020 620098",
"TAF [TX02 TNM03] AMD KFFO 121425Z 1214/1317 29009KT 6000",
"TAF [TX20 TN11] KMUI 311900Z 3119/0201 VRB06KT 9999 FEW015 OVC025",
"TAF [TX20 TN08] KMUI 021100Z 0211/0317 VRB06KT 9999 SCT015",
"TAF [TX02 TNM03] KFFO 121100Z 1211/1317 29012KT 9999 BKN022",
"TAF [TX01 TN00] AMD KMTC 152130Z 1521/1702 VRB06KT 2800 -SN OVC015",
"TAF [TX16 TN02] AMD KFFO 111351Z 1113/1217 11006KT 6000 -RA BR",
"TAF [TXM03 TNM07] KRDR 101000Z 1010/1116 02020G30KT 9999",
"TAF [TX11 TN00] AMD KADW 161523Z 1615/1715 29012G18KT 9999 BKN030 BKN200 510013",
"TAF [TX16 TN12] KLFI 090000Z 0900/1006 04012G18KT 9999 SKC",
"TAF [TXM07 TNM09] PABI 161400Z 1614/1720 10015G25KT 9999 FEW030 510043",
"TAF [TX03 TNM03] KBLV 160000Z 1600/1706 30009KT 9999 OVC020",
"TAF [TXM04 TNM13] KRDR 151000Z 1510/1616 35006KT 9999",
"TAF [TXM13 TNM24] AMD PAFB 011013Z 0110/0212 VRB03KT 9999",
"TAF [TX17 TN06] KGUS 061300Z 0613/0719 18009KT FEW150",
"TAF [TX22 TN13] AMD KWRI 120435Z 1204/1308 18015G25KT 9999 VCSH SCT012",
"TAF [TX06 TN03] PASY 101400Z 1014/1120 02030G40KT 9000 -SHRA SCT003 OVC010",
"TAF [TXM01 TNM07] AMD KRDR 092100Z 0921/1024 36010G20KT 8000 -SN OVC009",
"TAF [TXM08 TNM20] PAFB 160700Z 1607/1713 06010KT",
"TAF [TX18 TN06] KADW 100100Z 1001/1107 08006KT 9999 FEW200",
"TAF [TX11 TN03] AMD KDOV 161144Z 1611/1714",
"TAF [TX24 TN09] KLFI 121800Z 1218/1324 25009KT 9999",
"TAF [TX09 TNM01] KWRI 140200Z 1402/1508 31009KT 9999 SKC QNH3005INS",
"TAF [TXM06 TNM12] AMD KRDR 150742Z 1507/1608",
"TAF [TX19 TN13] KBLV 100000Z 1000/1106 14009KT 9999 FEW050",
"TAF [TX19 TN09] AMD KFTK 011530Z 0115/0217",
"TAF [TX24 TN19] AMD KLFI 111128Z 1111/1215",
"TAF [TXM13 TNM24] PAFB 010600Z 0106/0212 VRB03KT 9999 BKN040 BKN100",
"TAF [TX26 TN07] KDAA 071200Z 0712/0818 VRB06KT",
"TAF [TX02 TNM04] AMD KBLV 161934Z 1619/1722 28012G20KT 9999 SCT030 BKN050 BKN080",
"TAF [TXM09 TNM13] PAFB 151500Z 1515/1621 VRB03KT 8000 BR OVC006"
]
[[tool.fite.models]]
base-model = "gpt2"
dataset = "bullets"
version = "base1"
model-name = "gpt2-bullets-base1"
description = "A GPT-2 model trained on epr bullets."
additional-tokens = []
additional-special-tokens = []
prompt-examples = [
"- Hanchoed",
]
[tool.setuptools]
# https://setuptools.pypa.io/en/latest/userguide/datafiles.html
include-package-data = true
[tool.setuptools.packages.find]
where = ["src","store/models/gpt2-taf-0.1.0"]
[tool.setuptools.package-data]
mypkg = ["*"]