-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
196 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package models; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.document.Document; | ||
import org.apache.lucene.document.Field.Store; | ||
import org.apache.lucene.document.StoredField; | ||
import org.apache.lucene.document.TextField; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.IndexReader; | ||
import org.apache.lucene.index.IndexWriter; | ||
import org.apache.lucene.index.IndexWriterConfig; | ||
import org.apache.lucene.search.BooleanClause; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.ScoreDoc; | ||
import org.apache.lucene.search.TopDocs; | ||
import org.apache.lucene.store.ByteBuffersDirectory; | ||
import org.apache.lucene.store.Directory; | ||
import org.apache.lucene.util.QueryBuilder; | ||
|
||
import io.ebean.Ebean; | ||
import ninja.lifecycle.Start; | ||
import util.standalone.StemmingAnalyzer; | ||
|
||
public class LuceneSearch implements SearchEngine { | ||
// setup analyzer | ||
Analyzer prodAna; | ||
// setup directory - for now in memory, alternative Option might be desirable | ||
final Directory prodIndex = new ByteBuffersDirectory(); | ||
|
||
// standard constructor that sets up used analyzer as a standard analyzer | ||
public LuceneSearch() { | ||
prodAna = new StemmingAnalyzer(); | ||
} | ||
|
||
@Start(order = 100) | ||
public void firstIndexing() { | ||
setup(); | ||
} | ||
|
||
@Override | ||
public void setup() { | ||
indexData(); | ||
} | ||
|
||
@Override | ||
public List<Integer> search(String searchText, int maxNumberOfHits) { | ||
List<Integer> results = new ArrayList<Integer>(); | ||
// setup query builder | ||
QueryBuilder builder = new QueryBuilder(prodAna); | ||
// setup queries | ||
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder(); | ||
// check names | ||
Query queryN = builder.createBooleanQuery("name", searchText, BooleanClause.Occur.MUST); | ||
fullQuery.add(queryN, BooleanClause.Occur.SHOULD); | ||
// check short description | ||
Query querySD = builder.createBooleanQuery("overview", searchText, BooleanClause.Occur.MUST); | ||
fullQuery.add(querySD, BooleanClause.Occur.SHOULD); | ||
// check long description | ||
Query queryLD = builder.createBooleanQuery("description", searchText, BooleanClause.Occur.MUST); | ||
fullQuery.add(queryLD, BooleanClause.Occur.SHOULD); | ||
// setup index reader and searcher and perform search | ||
try { | ||
// setup | ||
IndexReader reader = DirectoryReader.open(prodIndex); | ||
IndexSearcher searcher = new IndexSearcher(reader); | ||
// search | ||
TopDocs topDocs = searcher.search(fullQuery.build(), maxNumberOfHits); | ||
ScoreDoc[] hits = topDocs.scoreDocs; | ||
for (ScoreDoc scoreDoc : hits) { | ||
results.add(Integer.parseInt(searcher.doc(scoreDoc.doc).get("id"))); | ||
} | ||
} catch (Exception e) { | ||
// TODO: handle exception | ||
e.printStackTrace(); | ||
} | ||
|
||
return results; | ||
} | ||
|
||
// currently indexes only products | ||
private void indexData() { | ||
|
||
// setup index writer + config | ||
IndexWriterConfig config = new IndexWriterConfig(prodAna); | ||
try { | ||
IndexWriter wri = new IndexWriter(prodIndex, config); | ||
// loop through all products to add to the index | ||
List<Product> products = getAllProducts(); | ||
for (Product product : products) { | ||
// create a 'document' (= an indexing target) | ||
Document prodDoc = new Document(); | ||
// Setup the fields in that document, we store the id so we can use it to retrieve search results | ||
StoredField prodId = new StoredField("id", product.getId()); | ||
TextField prodName = new TextField("name", product.getName(), Store.NO); | ||
TextField prodShortDesc = new TextField("overview", product.getDescriptionOverview(), Store.NO); | ||
TextField prodLongDesc = new TextField("description", product.getDescriptionDetail(), Store.NO); | ||
prodDoc.add(prodId); | ||
prodDoc.add(prodName); | ||
prodDoc.add(prodShortDesc); | ||
prodDoc.add(prodLongDesc); | ||
// add the document with the products information to the index writer | ||
wri.addDocument(prodDoc); | ||
} | ||
wri.close(); | ||
} catch (Exception e) { | ||
// TODO: handle exception | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
private List<Product> getAllProducts() { | ||
return Ebean.find(Product.class).findList(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package models; | ||
|
||
import java.util.List; | ||
|
||
public interface SearchEngine { | ||
void setup(); | ||
List<Integer> search(String searchText, int maxNumberOfHits); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package util.standalone; | ||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.LowerCaseFilter; | ||
import org.apache.lucene.analysis.en.PorterStemFilter; | ||
import org.apache.lucene.analysis.standard.StandardTokenizer; | ||
import org.apache.lucene.analysis.Tokenizer; | ||
|
||
public class StemmingAnalyzer extends Analyzer { | ||
@Override | ||
protected TokenStreamComponents createComponents(String fieldName) { | ||
Tokenizer source = new StandardTokenizer(); | ||
return new TokenStreamComponents(source, new PorterStemFilter(new LowerCaseFilter(source))); | ||
} | ||
} |