Skip to content

Commit

Permalink
#6278: Improved whitespace handling when stripping HTML. Centralize w…
Browse files Browse the repository at this point in the history
…hitespace handling for quickpicks
  • Loading branch information
sdedic committed Aug 2, 2023
1 parent 2f504a8 commit b5e0253
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import javax.lang.model.type.ArrayType;
import javax.lang.model.type.TypeKind;
import javax.lang.model.type.TypeMirror;
import javax.swing.text.BadLocationException;
import javax.swing.text.StyledDocument;
import org.eclipse.lsp4j.Position;
import org.eclipse.lsp4j.Range;
Expand Down Expand Up @@ -399,24 +398,34 @@ public static String encode2JSON(String value) {

/**
* Simple conversion from HTML to plaintext. Removes all html tags incl. attributes,
* replaces BR, P and HR tags with newlines.
* replaces BR, P and HR tags with newlines. The method optionally collapses whitespaces:
* all whitespace characters are replaced by spaces, adjacent spaces collapsed to single one, leading
* and trailing spaces removed.
* @param s html text
* @param collapseWhitespaces to collapse
* @return plaintext
*/
public static String html2plain(String s) {
public static String html2plain(String s, boolean collapseWhitespaces) {
if (s == null) {
return null;
}
boolean inTag = false;
boolean whitespace = false;

int tagStart = -1;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
if (inTag) {
T: if (inTag) {
boolean alpha = Character.isAlphabetic(ch);
if (tagStart > 0 && !alpha) {
String t = s.substring(tagStart, i).toLowerCase(Locale.ENGLISH);
switch (t) {
case "br": case "p": case "hr": // NOI1N
sb.append("\n");
break;
ch ='\n'; // NOI18N
// continues to process 'ch' as if it came from the string, but `inTag` remains
// the same.
break T;
}
// prevent entering tagstart state again
tagStart = -2;
Expand All @@ -426,16 +435,40 @@ public static String html2plain(String s) {
} else if (tagStart == -1 && alpha) {
tagStart = i;
}
continue;
} else {
if (ch == '<') { // NOI18N
tagStart = -1;
inTag = true;
continue;
}
sb.append(ch);
}
if (collapseWhitespaces) {
if (ch == '\n') {
ch = ' ';
}
if (Character.isWhitespace(ch)) {
if (whitespace) {
continue;
}
ch = ' '; // NOI18N
whitespace = true;
} else {
whitespace = false;
}
}
sb.append(ch);
}
return sb.toString();
return collapseWhitespaces ? sb.toString().trim() : sb.toString();
}

/**
* Simple conversion from HTML to plaintext. Removes all html tags incl. attributes,
* replaces BR, P and HR tags with newlines.
* @param s html text
* @return plaintext
*/
public static String html2plain(String s) {
return html2plain(s, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.netbeans.api.project.SourceGroupModifier;
import org.netbeans.api.templates.CreateDescriptor;
import org.netbeans.api.templates.FileBuilder;
import org.netbeans.modules.java.lsp.server.Utils;
import org.netbeans.modules.java.lsp.server.input.QuickPickItem;
import org.netbeans.modules.java.lsp.server.input.ShowQuickPickParams;
import org.netbeans.modules.java.lsp.server.input.ShowInputBoxParams;
Expand Down Expand Up @@ -459,23 +460,7 @@ private static String findDetail(DataObject obj) {
}

static String stripHtml(String s) {
boolean inTag = false;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
if (inTag) {
if (ch == '>') {
inTag = false;
}
} else {
if (ch == '<') {
inTag = true;
continue;
}
sb.append(ch);
}
}
return sb.toString();
return Utils.html2plain(s, true);
}

private static <T extends Exception> T raise(Class<T> clazz, Exception ex) throws T {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import org.eclipse.lsp4j.ApplyWorkspaceEditParams;
import org.eclipse.lsp4j.ApplyWorkspaceEditResponse;
import org.eclipse.lsp4j.ConfigurationParams;
Expand All @@ -37,6 +38,7 @@
import org.eclipse.lsp4j.WorkDoneProgressCreateParams;
import org.eclipse.lsp4j.WorkspaceFolder;
import org.eclipse.lsp4j.jsonrpc.messages.Either;
import org.netbeans.modules.java.lsp.server.Utils;
import org.netbeans.modules.java.lsp.server.input.QuickPickItem;
import org.netbeans.modules.java.lsp.server.input.ShowQuickPickParams;
import org.netbeans.modules.java.lsp.server.input.ShowMutliStepInputParams;
Expand Down Expand Up @@ -86,7 +88,16 @@ public CompletableFuture<String> execInHtmlPage(HtmlPageParams params) {

@Override
public CompletableFuture<List<QuickPickItem>> showQuickPick(ShowQuickPickParams params) {
return remote.showQuickPick(params);
// vscode from version 1.80.2 displays control characters in quickpicks. Let's strip them:
ShowQuickPickParams copy = new ShowQuickPickParams(
params.getTitle(), params.getPlaceHolder(), params.getCanPickMany(),
params.getItems().stream().map(
i -> new QuickPickItem(
i.getLabel(), Utils.html2plain(i.getDescription(), true), Utils.html2plain(i.getDetail(), true),
i.isPicked(), i.getUserData())
).collect(Collectors.toList())
);
return remote.showQuickPick(copy);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.eclipse.lsp4j.MessageType;
import org.eclipse.lsp4j.ShowMessageRequestParams;
import org.eclipse.lsp4j.jsonrpc.messages.Either;
import org.netbeans.modules.java.lsp.server.Utils;
import org.netbeans.modules.java.lsp.server.input.InputBoxStep;
import org.netbeans.modules.java.lsp.server.input.InputCallbackParams;
import org.netbeans.modules.java.lsp.server.input.InputService;
Expand Down Expand Up @@ -372,7 +373,7 @@ public <T extends NotifyDescriptor> CompletableFuture<T> clientNotifyCompletion(
List<QuickPickItem> items = new ArrayList<>(qpItems.size());
for (int i = 0; i < qpItems.size(); i++) {
NotifyDescriptor.QuickPick.Item item = qpItems.get(i);
items.add(new QuickPickItem(item.getLabel(), item.getDescription(), null, item.isSelected(), Integer.toString(i)));
items.add(new QuickPickItem(item.getLabel(), Utils.html2plain(item.getDescription(), true), null, item.isSelected(), Integer.toString(i)));
}
ShowQuickPickParams params = new ShowQuickPickParams(qp.getLabel(), qp.getTitle(), qp.isMultipleSelection(), items);
CompletableFuture<List<QuickPickItem>> qpF = client.showQuickPick(params);
Expand Down Expand Up @@ -431,7 +432,8 @@ public CompletableFuture<Either<QuickPickStep, InputBoxStep>> step(InputCallback
List<QuickPickItem> items = new ArrayList<>();
for (int i = 0; i < qpItems.size(); i++) {
NotifyDescriptor.QuickPick.Item item = qpItems.get(i);
items.add(new QuickPickItem(item.getLabel(), item.getDescription(), null, item.isSelected(), Integer.toString(i)));
items.add(new QuickPickItem(item.getLabel(),
Utils.html2plain(item.getDescription(), true), null, item.isSelected(), Integer.toString(i)));
}
QuickPickStep step = new QuickPickStep(ci.getEstimatedNumberOfInputs(), stepId,
null, input.getTitle(), ((NotifyDescriptor.QuickPick) input).isMultipleSelection(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,36 @@ public void testEncode2JSON() {
assertEquals("abcd", Utils.encode2JSON("abcd"));
assertEquals("'\\\"\\b\\t\\n\\r\\\\", Utils.encode2JSON("'\"\b\t\n\r\\"));
}

@Test
public void testStripHtml() {
String s = "<div>Pre <span>Text</span> Post</div>";
String expResult = "Pre Text Post";
String result = Utils.html2plain(s);
assertEquals(expResult, result);
}

/**
* All newlines should be removed
*/
@Test
public void testStripNewlines() {
String s = "\n<div>Pre <span\n>\nText</span> Post\n</div>";
String expResult = "Pre Text Post";
String result = Utils.html2plain(s, true);
assertEquals(expResult, result);
}


/**
* Consecutive whitespaces should be collapsed to a single space. Leading/trailing whitespaces
* removed.
*/
@Test
public void testStripConsecutiveWhitespces() {
String s = "\t <div> Pre <span> Text\t </span>\t\t Post </div>\t";
String expResult = "Pre Text Post";
String result = Utils.html2plain(s, true);
assertEquals(expResult, result);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,28 @@ public void testStripHtml() {
String result = LspTemplateUI.stripHtml(s);
assertEquals(expResult, result);
}

/**
* All newlines should be removed
*/
@Test
public void testStripNewlines() {
String s = "\n<div>Pre <span\n>\nText</span> Post\n</div>";
String expResult = "Pre Text Post";
String result = LspTemplateUI.stripHtml(s);
assertEquals(expResult, result);
}


/**
* Consecutive whitespaces should be collapsed to a single space. Leading/trailing whitespaces
* removed.
*/
@Test
public void testStripConsecutiveWhitespces() {
String s = "\t <div> Pre <span> Text\t </span>\t\t Post </div>\t";
String expResult = "Pre Text Post";
String result = LspTemplateUI.stripHtml(s);
assertEquals(expResult, result);
}
}

0 comments on commit b5e0253

Please sign in to comment.