From d7e220a4dbe4e11eaf882926e706c02ff6040e9e Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 30 Sep 2020 23:17:10 -0400 Subject: [PATCH] also escape the matched term depending on the analyzer used, it is possible for the search term itself to contain characters which should be escaped before generating html output. this commit adds escaping on the term itself, and adds some unit tests as well. --- search/highlight/format/html/html.go | 2 +- search/highlight/format/html/html_test.go | 68 ++++++++++++++++------- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/search/highlight/format/html/html.go b/search/highlight/format/html/html.go index 259a03795..5bf16ca92 100644 --- a/search/highlight/format/html/html.go +++ b/search/highlight/format/html/html.go @@ -60,7 +60,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h // start the tag rv += a.before // add the term itself - rv += string(f.Orig[termLocation.Start:termLocation.End]) + rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End])) // end the tag rv += a.after // update current diff --git a/search/highlight/format/html/html_test.go b/search/highlight/format/html/html_test.go index 6bb49dfeb..0652d477f 100644 --- a/search/highlight/format/html/html_test.go +++ b/search/highlight/format/html/html_test.go @@ -21,11 +21,13 @@ import ( "github.com/blevesearch/bleve/search/highlight" ) -func TestHTMLFragmentFormatter1(t *testing.T) { +func TestHTMLFragmentFormatter(t *testing.T) { tests := []struct { fragment *highlight.Fragment tlm search.TermLocationMap output string + start string + end string }{ { fragment: &highlight.Fragment{ @@ -43,25 +45,9 @@ func TestHTMLFragmentFormatter1(t *testing.T) { }, }, output: "the quick brown fox", + start: "", + end: "", }, - } - - emHTMLFormatter := NewFragmentFormatter("", "") - for _, test := range tests { - otl := highlight.OrderTermLocations(test.tlm) - result := emHTMLFormatter.Format(test.fragment, otl) - if result != test.output { - t.Errorf("expected `%s`, got `%s`", test.output, result) - } - } -} - -func TestHTMLFragmentFormatter2(t *testing.T) { - tests := []struct { - fragment *highlight.Fragment - tlm search.TermLocationMap - output string - }{ { fragment: &highlight.Fragment{ Orig: []byte("the quick brown fox"), @@ -78,11 +64,53 @@ func TestHTMLFragmentFormatter2(t *testing.T) { }, }, output: "the quick brown fox", + start: "", + end: "", + }, + // test html escaping + { + fragment: &highlight.Fragment{ + Orig: []byte(" quick brown & fox"), + Start: 0, + End: 23, + }, + tlm: search.TermLocationMap{ + "quick": []*search.Location{ + { + Pos: 2, + Start: 6, + End: 11, + }, + }, + }, + output: "<the> quick brown & fox", + start: "", + end: "", + }, + // test html escaping inside search term + { + fragment: &highlight.Fragment{ + Orig: []byte(" qu&ick brown & fox"), + Start: 0, + End: 24, + }, + tlm: search.TermLocationMap{ + "qu&ick": []*search.Location{ + { + Pos: 2, + Start: 6, + End: 12, + }, + }, + }, + output: "<the> qu&ick brown & fox", + start: "", + end: "", }, } - emHTMLFormatter := NewFragmentFormatter("", "") for _, test := range tests { + emHTMLFormatter := NewFragmentFormatter(test.start, test.end) otl := highlight.OrderTermLocations(test.tlm) result := emHTMLFormatter.Format(test.fragment, otl) if result != test.output {