Skip to content

Commit

Permalink
also escape the matched term (#1530)
Browse files Browse the repository at this point in the history
depending on the analyzer used, it is possible for the search
term itself to contain characters which should be escaped
before generating html output. this commit adds escaping
on the term itself, and adds some unit tests as well.
  • Loading branch information
mschoch authored Jan 4, 2021
1 parent d6a3fe2 commit 4801b14
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 21 deletions.
2 changes: 1 addition & 1 deletion search/highlight/format/html/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
// start the <mark> tag
rv += a.before
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End]))
// end the <mark> tag
rv += a.after
// update current
Expand Down
68 changes: 48 additions & 20 deletions search/highlight/format/html/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ import (
"github.com/blevesearch/bleve/search/highlight"
)

func TestHTMLFragmentFormatter1(t *testing.T) {
func TestHTMLFragmentFormatter(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
start string
end string
}{
{
fragment: &highlight.Fragment{
Expand All @@ -43,25 +45,9 @@ func TestHTMLFragmentFormatter1(t *testing.T) {
},
},
output: "the <b>quick</b> brown fox",
start: "<b>",
end: "</b>",
},
}

emHTMLFormatter := NewFragmentFormatter("<b>", "</b>")
for _, test := range tests {
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}

func TestHTMLFragmentFormatter2(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Expand All @@ -78,11 +64,53 @@ func TestHTMLFragmentFormatter2(t *testing.T) {
},
},
output: "the <em>quick</em> brown fox",
start: "<em>",
end: "</em>",
},
// test html escaping
{
fragment: &highlight.Fragment{
Orig: []byte("<the> quick brown & fox"),
Start: 0,
End: 23,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 6,
End: 11,
},
},
},
output: "&lt;the&gt; <em>quick</em> brown &amp; fox",
start: "<em>",
end: "</em>",
},
// test html escaping inside search term
{
fragment: &highlight.Fragment{
Orig: []byte("<the> qu&ick brown & fox"),
Start: 0,
End: 24,
},
tlm: search.TermLocationMap{
"qu&ick": []*search.Location{
{
Pos: 2,
Start: 6,
End: 12,
},
},
},
output: "&lt;the&gt; <em>qu&amp;ick</em> brown &amp; fox",
start: "<em>",
end: "</em>",
},
}

emHTMLFormatter := NewFragmentFormatter("<em>", "</em>")
for _, test := range tests {
emHTMLFormatter := NewFragmentFormatter(test.start, test.end)
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
Expand Down

0 comments on commit 4801b14

Please sign in to comment.