-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
support both old and new fastText model #1319
Changes from 1 commit
9f9dd24
d7725ca
de39ab0
f0c3e25
5f5ace6
1509512
d7e5403
58a66c2
9c9d3ec
8ffb220
06ac316
3deb394
b038fdb
4f6aa4d
5c09bdf
5cdf4e6
55a2d37
aeb05c1
092ef86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -256,7 +256,7 @@ def load_binary_data(self, model_binary_file): | |
self.load_vectors(f) | ||
|
||
def load_model_params(self, file_handle): | ||
(dim, ws, epoch, minCount, neg, _, loss, model, bucket, minn, maxn, _, t) = self.struct_unpack(file_handle, '@12i1d') | ||
(_,_,dim, ws, epoch, minCount, neg, _, loss, model, bucket, minn, maxn, _, t) = self.struct_unpack(file_handle, '@14i1d') | ||
# Parameters stored by [Args::save](https://github.com/facebookresearch/fastText/blob/master/src/args.cc) | ||
self.size = dim | ||
self.window = ws | ||
|
@@ -275,7 +275,7 @@ def load_dict(self, file_handle): | |
# Vocab stored by [Dictionary::save](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc) | ||
assert len(self.wv.vocab) == nwords, 'mismatch between vocab sizes' | ||
assert len(self.wv.vocab) == vocab_size, 'mismatch between vocab sizes' | ||
ntokens, = self.struct_unpack(file_handle, '@q') | ||
ntokens,pruneidx_size = self.struct_unpack(file_handle, '@2q') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space after comma. |
||
for i in range(nwords): | ||
word_bytes = b'' | ||
char_byte = file_handle.read(1) | ||
|
@@ -289,6 +289,11 @@ def load_dict(self, file_handle): | |
assert self.wv.vocab[word].index == i, 'mismatch between gensim word index and fastText word index' | ||
self.wv.vocab[word].count = count | ||
|
||
for j in range(pruneidx_size): | ||
_,_ = self.struct_unpack(file_handle,'@2i') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spaces after commas. |
||
|
||
_ = self.struct_unpack(file_handle,'@?') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dtto There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add a comment clarifying what this is for? |
||
|
||
def load_vectors(self, file_handle): | ||
num_vectors, dim = self.struct_unpack(file_handle, '@2q') | ||
# Vectors stored by [Matrix::save](https://github.com/facebookresearch/fastText/blob/master/src/matrix.cc) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PEP8: space after comma. Outer brackets not needed.