diff --git a/backup/dictionary.pkl b/backup/dictionary.pkl index ec8d41a..8531ce3 100644 Binary files a/backup/dictionary.pkl and b/backup/dictionary.pkl differ diff --git a/backup/processing_progress.txt b/backup/processing_progress.txt index 3f52546..ab9d635 100644 --- a/backup/processing_progress.txt +++ b/backup/processing_progress.txt @@ -1 +1 @@ -26874314,24799999999 \ No newline at end of file +28611477,26399999999 \ No newline at end of file diff --git a/dictionary.msgpack b/dictionary.msgpack index c0c5ab4..43d825e 100644 Binary files a/dictionary.msgpack and b/dictionary.msgpack differ diff --git a/lib/merge_batches.py b/lib/merge_batches.py index 41a4f35..a53ce1c 100644 --- a/lib/merge_batches.py +++ b/lib/merge_batches.py @@ -314,4 +314,4 @@ async def main(): finish_merge() if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/tokens.msgpack b/tokens.msgpack index f9cc2e4..79b754c 100644 Binary files a/tokens.msgpack and b/tokens.msgpack differ diff --git a/train.py b/train.py index e3e09f8..f1b7e58 100644 --- a/train.py +++ b/train.py @@ -106,7 +106,7 @@ async def main(retain=False): gc.collect() if (word_count + 1) % (PRUNE_FREQUENCY * 100) == 0: - await asyncio.run(merge_batches()) + await merge_batches() await create_batch(tree_store, TARGET_DICTIONARY_COUNT)