piskvorky · menshikh-iv · Nov 23, 2017 · Nov 14, 2017 · Nov 19, 2017 · Nov 20, 2017
diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py
@@ -148,9 +148,9 @@ def doc2bow(self, document, allow_update=False, return_missing=False):
 
         token2id = self.token2id
         if allow_update or return_missing:
-            missing = {w: freq for w, freq in iteritems(counter) if w not in token2id}
+            missing = sorted((x for x in iteritems(counter) if x[0] not in token2id), key=lambda x: (x[1], x[0]))
             if allow_update:
-                for w in missing:
+                for w, _ in missing:
                     # new id = number of ids made so far;
                     # NOTE this assumes there are no gaps in the id sequence!
                     token2id[w] = len(token2id)
@@ -169,7 +169,7 @@ def doc2bow(self, document, allow_update=False, return_missing=False):
         # return tokenids, in ascending id order
         result = sorted(iteritems(result))
         if return_missing:
-            return result, missing
+            return result, dict(missing)
         else:
             return result
 
@@ -266,7 +266,7 @@ def compactify(self):
         logger.debug("rebuilding dictionary, shrinking gaps")
 
         # build mapping from old id -> new id
-        idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))
+        idmap = dict(izip(sorted(itervalues(self.token2id)), xrange(len(self.token2id))))
 
         # reassign mappings to new ids
         self.token2id = {token: idmap[tokenid] for token, tokenid in iteritems(self.token2id)}