Skip to content

Commit

Permalink
Better checking of ms marco datasets.
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool committed Nov 29, 2021
1 parent 3365499 commit 5bad21d
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/main/java/io/anserini/search/SearchCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,18 @@ public void run() {
// floating point precision issues). Just to retain exactly the same output as SearchMsmarco (which was used to,
// for example, generate Anserini leaderboard runs), we add an ugly hack here to dump the results in the order
// of the qids in the query files.
boolean isMSMARCO_passage = topics.firstKey().equals(2) && topics.get(2).get("title").equals("Androgen receptor define");
boolean isMAMARCO_doc = topics.firstKey().equals(2) && topics.get(2).get("title").equals("androgen receptor define");
if (isMSMARCO_passage || isMAMARCO_doc) {
boolean isMSMARCOv1_passage = topics.firstKey().equals(2) &&
topics.get(2).get("title").equals("Androgen receptor define") &&
topics.keySet().size() == 6980;
boolean isMAMARCOv1_doc = topics.firstKey().equals(2) &&
topics.get(2).get("title").equals("androgen receptor define") &&
topics.keySet().size() == 5193;

if (isMSMARCOv1_passage || isMAMARCOv1_doc) {
String raw = "";
try {
InputStream inputStream = null;
if (isMSMARCO_passage) {
if (isMSMARCOv1_passage) {
inputStream = TopicReader.class.getClassLoader().getResourceAsStream(Topics.MSMARCO_PASSAGE_DEV_SUBSET.path);
} else {
inputStream = TopicReader.class.getClassLoader().getResourceAsStream(Topics.MSMARCO_DOC_DEV.path);
Expand Down

0 comments on commit 5bad21d

Please sign in to comment.