Skip to content

Commit

Permalink
Sanitize URLs in file fields to handle invalid pipe characters ('|') C…
Browse files Browse the repository at this point in the history
…loses JabRef#11876.

- Introduced URLUtil.createUri() and URLUtil.create() to handle URL sanitization.
- Replaced direct calls to URI.create() and URI.create().toURL() with the new utility methods.
- URLs containing the pipe character ('|') are now properly encoded as '%7C' to prevent parsing errors.
- Added test cases to URLUtilTest to verify correct sanitization and URL creation.
  • Loading branch information
hitalo-siriano committed Nov 5, 2024
1 parent 4ec8bed commit 63f9562
Show file tree
Hide file tree
Showing 54 changed files with 210 additions and 173 deletions.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.jabref.gui.entryeditor.citationrelationtab.semanticscholar;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.List;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImporterPreferences;
import org.jabref.logic.importer.fetcher.CustomizableKeyFetcher;
Expand Down Expand Up @@ -38,7 +38,7 @@ public List<BibEntry> searchCitedBy(BibEntry entry) throws FetcherException {

URL citationsUrl;
try {
citationsUrl = URI.create(getAPIUrl("citations", entry)).toURL();
citationsUrl = URLUtil.create(getAPIUrl("citations", entry));
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand All @@ -62,7 +62,7 @@ public List<BibEntry> searchCiting(BibEntry entry) throws FetcherException {

URL referencesUrl;
try {
referencesUrl = URI.create(getAPIUrl("references", entry)).toURL();
referencesUrl = URLUtil.create(getAPIUrl("references", entry));
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -242,7 +241,7 @@ private List<LinkedFileViewModel> findAssociatedNotLinkedFiles(BibEntry entry) {

public boolean downloadFile(String urlText) {
try {
URL url = URI.create(urlText).toURL();
URL url = URLUtil.create(urlText);
addFromURLAndDownload(url);
return true;
} catch (MalformedURLException exception) {
Expand Down
34 changes: 26 additions & 8 deletions src/main/java/org/jabref/gui/fieldeditors/URLUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
Expand All @@ -24,8 +25,8 @@ private URLUtil() {
* Cleans URLs returned by Google search.
*
* <example>
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g.
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g.
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg
* </example>
*
* @param url the Google search URL string
Expand All @@ -39,7 +40,7 @@ public static String cleanGoogleSearchURL(String url) {
}
// Extract destination URL
try {
URL searchURL = URI.create(url).toURL();
URL searchURL = URLUtil.create(url);
// URL parameters
String query = searchURL.getQuery();
// no parameters
Expand All @@ -62,7 +63,8 @@ public static String cleanGoogleSearchURL(String url) {
}
}
return url;
} catch (MalformedURLException e) {
} catch (
MalformedURLException e) {
return url;
}
}
Expand All @@ -77,9 +79,11 @@ public static String cleanGoogleSearchURL(String url) {
*/
public static boolean isURL(String url) {
try {
URI.create(url).toURL();
URLUtil.create(url);
return true;
} catch (MalformedURLException | IllegalArgumentException e) {
} catch (
MalformedURLException |
IllegalArgumentException e) {
return false;
}
}
Expand All @@ -96,11 +100,12 @@ public static Optional<String> getSuffix(final String link, ExternalApplications
String strippedLink = link;
try {
// Try to strip the query string, if any, to get the correct suffix:
URL url = URI.create(link).toURL();
URL url = URLUtil.create(link);
if ((url.getQuery() != null) && (url.getQuery().length() < (link.length() - 1))) {
strippedLink = link.substring(0, link.length() - url.getQuery().length() - 1);
}
} catch (MalformedURLException e) {
} catch (
MalformedURLException e) {
// Don't report this error, since this getting the suffix is a non-critical
// operation, and this error will be triggered and reported elsewhere.
}
Expand Down Expand Up @@ -138,4 +143,17 @@ public static Optional<String> getSuffix(final String link, ExternalApplications
return Optional.ofNullable(suffix);
}
}

public static URL create(String url) throws MalformedURLException {
return URLUtil.createUri(url).toURL();
}

public static URI createUri(String url) {
try {
String urlFormat = url.replace("|", "%7C");
return new URI(urlFormat);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.gui.linkedfile;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.Optional;

Expand All @@ -11,6 +10,7 @@
import org.jabref.gui.actions.ActionHelper;
import org.jabref.gui.actions.SimpleCommand;
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.gui.preferences.GuiPreferences;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.TaskExecutor;
Expand Down Expand Up @@ -61,7 +61,7 @@ public void execute() {
}

try {
URL url = URI.create(urlforDownload.get()).toURL();
URL url = URLUtil.create(urlforDownload.get());
LinkedFileViewModel onlineFile = new LinkedFileViewModel(
new LinkedFile(url, ""),
entry,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.gui.externalfiletype.ExternalFileType;
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.gui.externalfiletype.UnknownExternalFileType;
import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.gui.frame.ExternalApplicationsPreferences;
import org.jabref.gui.util.FileDialogConfiguration;
import org.jabref.logic.FilePreferences;
Expand Down Expand Up @@ -139,7 +140,7 @@ public LinkedFile getNewLinkedFile() {

if (LinkedFile.isOnlineLink(link.getValue())) {
try {
return new LinkedFile(description.getValue(), URI.create(link.getValue()).toURL(), fileType, sourceUrl.getValue());
return new LinkedFile(description.getValue(), URLUtil.create(link.getValue()), fileType, sourceUrl.getValue());
} catch (MalformedURLException e) {
return new LinkedFile(description.getValue(), link.getValue(), fileType, sourceUrl.getValue());
}
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/gui/theme/StyleSheetFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
Expand All @@ -11,6 +10,8 @@
import java.util.Optional;
import java.util.concurrent.atomic.AtomicReference;

import org.jabref.gui.fieldeditors.URLUtil;

import com.google.common.base.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -54,7 +55,7 @@ final class StyleSheetFile extends StyleSheet {

StyleSheetFile(URL url) {
this.url = url;
this.path = Path.of(URI.create(url.toExternalForm()));
this.path = Path.of(URLUtil.createUri(url.toExternalForm()));
reload();
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/importer/fetcher/ACS.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand Down Expand Up @@ -45,7 +45,7 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {

if (link != null) {
LOGGER.info("Fulltext PDF found @ ACS.");
return Optional.of(URI.create(source.replaceFirst("/abs/", "/pdf/")).toURL());
return Optional.of(URLUtil.create(source.replaceFirst("/abs/", "/pdf/")));
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand Down Expand Up @@ -48,7 +48,7 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
if (code == 200) {
LOGGER.info("Fulltext PDF found @ APS.");
try {
return Optional.of(URI.create(pdfRequestUrl).toURL());
return Optional.of(URLUtil.create(pdfRequestUrl));
} catch (MalformedURLException e) {
LOGGER.warn("APS returned malformed URL, cannot find PDF.");
}
Expand Down Expand Up @@ -77,7 +77,7 @@ private Optional<String> getId(String doi) {

URLConnection con;
try {
con = URI.create(doiRequest).toURL().openConnection();
con = URLUtil.create(doiRequest).openConnection();
con.connect();
con.getInputStream();
String[] urlParts = con.getURL().toString().split("abstract/");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
Expand All @@ -24,6 +23,7 @@
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.cleanup.EprintCleanup;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand Down Expand Up @@ -710,7 +710,7 @@ public ArXivEntry(Node item) {
if (linkTitle.equals(Optional.of("pdf"))) {
pdfUrlParsed = XMLUtil.getAttributeContent(linkNode, "href").map(url -> {
try {
return URI.create(url).toURL();
return URLUtil.create(url);
} catch (MalformedURLException e) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
Expand Down Expand Up @@ -37,7 +37,7 @@ public static Optional<BibEntry> getEntry(String entryUrl, ImportFormatPreferenc
String cleanURL = entryUrl.replace("%", "%25").replace(":", "%3A").replace("/", "%2F").replace("?", "%3F")
.replace("&", "%26").replace("=", "%3D");

URL url = URI.create(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST).toURL();
URL url = URLUtil.create(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST);
String bibtex = new URLDownload(url).asString();
return BibtexParser.singleFromString(bibtex, importFormatPreferences);
} catch (IOException | FetcherException ex) {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.http.dto.SimpleHttpResponse;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand Down Expand Up @@ -103,13 +103,13 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherExc
Optional<String> id = entry.getField(StandardField.DOI);
if (id.isPresent()) {
String source = PDF_URL.formatted(id.get());
return Optional.of(URI.create(source).toURL());
return Optional.of(URLUtil.create(source));
}

// if using id fails, we can try the source URL
Optional<String> urlString = entry.getField(StandardField.URL);
if (urlString.isPresent()) {
return Optional.of(URI.create(urlString.get()).toURL());
return Optional.of(URLUtil.create(urlString.get()));
}

return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.Collections;
Expand All @@ -13,6 +12,7 @@
import java.util.concurrent.CompletionException;
import java.util.regex.Pattern;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.cleanup.FieldFormatterCleanup;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
Expand Down Expand Up @@ -122,7 +122,7 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc

URL doiURL;
try {
doiURL = URI.create(doi.get().getURIAsASCIIString()).toURL();
doiURL = URLUtil.create(doi.get().getURIAsASCIIString());
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand Down Expand Up @@ -219,7 +219,7 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
public Optional<String> getAgency(DOI doi) throws FetcherException, MalformedURLException {
Optional<String> agency = Optional.empty();
try {
URLDownload download = getUrlDownload(URI.create(DOI.AGENCY_RESOLVER + "/" + doi.getDOI()).toURL());
URLDownload download = getUrlDownload(URLUtil.create(DOI.AGENCY_RESOLVER + "/" + doi.getDOI()));
JSONObject response = new JSONArray(download.asString()).getJSONObject(0);
if (response != null) {
agency = Optional.ofNullable(response.optString("RA"));
Expand Down
Loading

0 comments on commit 63f9562

Please sign in to comment.