Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/1260 Detecting and Cleaning Galleries #8119

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import WordPressShared
/// HTML tags that require special handling before the text is shown in a UITextView.
///
class WPRichTextFormatter {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace)

typealias ParsedSource = (parsedString: String, attachments: [WPTextAttachment])
static let blockquoteIdentifier = "WPBLOCKQUOTEIDENTIFIER"
let blockquoteIndentation = CGFloat(20.0)
Expand Down Expand Up @@ -95,7 +95,7 @@ class WPRichTextFormatter {

let str = attrString.string
let regex = try! NSRegularExpression(pattern: HRTagProcessor.horizontalRuleIdentifier, options: .caseInsensitive)
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.characters.count))
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.count))

for match: NSTextCheckingResult in matches.reversed() {
let range = match.range
Expand Down Expand Up @@ -140,7 +140,7 @@ class WPRichTextFormatter {

let str = attrString.string
let regex = try! NSRegularExpression(pattern: type(of: self).blockquoteIdentifier, options: .caseInsensitive)
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.characters.count))
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.count))

for match: NSTextCheckingResult in matches.reversed() {

Expand Down Expand Up @@ -180,7 +180,7 @@ class WPRichTextFormatter {
func processAndExtractTags(_ string: String) -> ParsedSource {
var attachments = [WPTextAttachment]()

guard string.characters.count > 0 else {
guard string.count > 0 else {
return (string, attachments)
}

Expand Down Expand Up @@ -291,7 +291,7 @@ class HtmlTagProcessor {
parsedString += endTag

// Advance the scanner to account for the closing tag.
scanner.scanLocation += endTag.characters.count
scanner.scanLocation += endTag.count
}

return (success, parsedString)
Expand Down Expand Up @@ -332,12 +332,12 @@ class BlockquoteTagProcessor: HtmlTagProcessor {
if !matched {
return (parsedString, nil)
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace)

// If the blockquote contains no paragraphs just insert the marker after
// the tag.
if !parsedString.contains("<p>") {
var str = parsedString as NSString
let location = "<\(tagName)>".characters.count
let location = "<\(tagName)>".count
str = str.replacingCharacters(in: NSRange(location: location, length: 0), with: WPRichTextFormatter.blockquoteIdentifier) as NSString
parsedString = str as String
return (parsedString, nil)
Expand Down
140 changes: 119 additions & 21 deletions WordPressShared/WordPressShared/Core/Utility/RichContentFormatter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,16 @@ import Foundation

// Gallery Images
static let galleryImgTags = try! NSRegularExpression(pattern: "<img[^>]*data-orig-file[^>]*/>", options: .caseInsensitive)
static let galleryStartIdentifier = "WPGalleryStartIdentifier"
static let galleryEndIdentifier = "WPGalleryEndIdentifier"
static let galleryDivStart = try! NSRegularExpression(pattern: "<div[^>]*?\(galleryStartIdentifier)[^>]*?>", options: .caseInsensitive)
static let galleryDivEnd = try! NSRegularExpression(pattern: "</div[^>]*?\(galleryEndIdentifier)[^>]*?>", options: .caseInsensitive)

// Trailing BR Tags
static let trailingBRTags = try! NSRegularExpression(pattern: "(\\s*<br\\s*(/?)\\s*>\\s*)+$", options: .caseInsensitive)



Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe remove the newly added returns?

}


Expand All @@ -41,12 +48,13 @@ import Foundation
/// - Returns: The formatted string.
///
public class func formatContentString(_ string: String, isPrivateSite isPrivate: Bool) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}

var content = string
content = removeForbiddenTags(content)
content = normalizeGallery(content)
content = normalizeParagraphs(content)
content = removeInlineStyles(content)
content = (content as NSString).replacingHTMLEmoticonsWithEmoji() as String
Expand All @@ -64,24 +72,24 @@ import Foundation
/// - Returns: The formatted string.
///
public class func removeForbiddenTags(_ string: String) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}
var content = string

content = RegEx.styleTags.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: "")

content = RegEx.scriptTags.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: "")

content = RegEx.tableTags.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: "")

return content
Expand All @@ -96,7 +104,7 @@ import Foundation
/// - Returns: The formatted string.
///
public class func normalizeParagraphs(_ string: String) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}
var content = string
Expand All @@ -106,42 +114,132 @@ import Foundation
// Convert div tags to p tags
content = RegEx.divTagsStart.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: openPTag)

content = RegEx.divTagsEnd.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: closePTag)

// Remove duplicate/redundant p tags.
content = RegEx.pTagsStart.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: openPTag)

content = RegEx.pTagsEnd.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: closePTag)

content = filterNewLines(content)

return content
}


public class func normalizeGallery(_ string : String) -> String {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick, no space before :


guard string.count > 0 else {
return string
}

let iconGallery = "class='gallery"
let tiledGallery = "class=\"tiled-gallery"

let galleryStart = "<gallery>"
let galleryEnd = "</gallery>"

var content = string

content = scannerForGalleryBy(galleryType: iconGallery, inString: content)
content = scannerForGalleryBy(galleryType: tiledGallery, inString: content)
content = RegEx.galleryDivStart.stringByReplacingMatches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count), withTemplate: galleryStart)
content = RegEx.galleryDivEnd.stringByReplacingMatches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count), withTemplate: galleryEnd)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious about the use of regular expressions to handle string replacement rather than have it handled by the Scanner. Was there a particular reason that made regex a better choice?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thinking behind this was to the keep the logic a bit more simple. This function scanForGalleryBy(galleryType : String, inString : String) only scans and identifies where a gallery should start and end. Then clean it up with the RegEx, rather than using another Scanner. It could be done in one go, but this felt like a safe, reliable option to me.


return content
}

private class func scannerForGalleryBy(galleryType : String, inString : String) -> String {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method name implies a Scanner would be returned even though the return type is a String. Is there an alternative name that is more descriptive of this method's purpose?


let galleryScanner = Scanner(string: inString)
galleryScanner.charactersToBeSkipped = nil

let divStr = "div"

var str = ""
var tempStr: NSString? = ""

var divCounter = 0
var galleriesFound : Int = 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick, no space before :


while !galleryScanner.isAtEnd {


if galleriesFound <= 0 {
//check for gallery
galleryScanner.scanUpTo(galleryType, into: &tempStr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if and else blocks share some similarity in the way the start. Maybe there is an opportunity for a small refactor to reuse some code?


if let tempStr = tempStr {
str += tempStr as String
}

tempStr = ""
if galleryScanner.isAtEnd {
break //nothing to see here
} else {
galleriesFound += 1
divCounter += 1
}

str += galleryType
str += RegEx.galleryStartIdentifier
galleryScanner.scanLocation += Int(galleryType.count)

} else {

//gallery found, proceed to find divs
galleryScanner.scanUpTo(divStr, into: &tempStr)

if let tempStr = tempStr {
str += tempStr as String
}

tempStr = ""
if galleryScanner.isAtEnd {
break //nothing to see here
}

if str.last == "/" {
divCounter -= 1
} else if str.last == "<" {
divCounter += 1
}

galleryScanner.scanLocation += 3
str += divStr

if divCounter == 0 {
galleriesFound -= 1
str += RegEx.galleryEndIdentifier
}
}
}

return str
}

public class func filterNewLines(_ string: String) -> String {
var content = string

var ranges = [NSRange]()
// We don't want to remove new lines from preformatted tag blocks,
// so get the ranges of such blocks.
let matches = RegEx.preTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.characters.count))
let matches = RegEx.preTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count))
if matches.count == 0 {

// No blocks found, so we'll parse the whole string.
ranges.append(NSRange(location: 0, length: content.characters.count))
ranges.append(NSRange(location: 0, length: content.count))

} else {

Expand All @@ -157,7 +255,7 @@ import Foundation
location = match.range.location + match.range.length
}

length = content.characters.count - location
length = content.count - location
ranges.append(NSRange(location: location, length: length))
}

Expand All @@ -182,14 +280,14 @@ import Foundation
/// - Returns: The formatted string.
///
public class func removeInlineStyles(_ string: String) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}
var content = string

content = RegEx.styleAttr.stringByReplacingMatches(in: content,
options: .reportCompletion,
range: NSRange(location: 0, length: content.characters.count),
range: NSRange(location: 0, length: content.count),
withTemplate: "")

return content
Expand All @@ -205,7 +303,7 @@ import Foundation
/// - Returns: The formatted string.
///
public class func resizeGalleryImageURL(_ string: String, isPrivateSite isPrivate: Bool) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}

Expand Down Expand Up @@ -241,7 +339,7 @@ import Foundation
mImageStr.replaceOccurrences(of: srcImgURLStr,
with: modifiedURL.absoluteString,
options: .literal,
range: NSRange(location: 0, length: imgElementStr.characters.count))
range: NSRange(location: 0, length: imgElementStr.count))

mContent.replaceCharacters(in: match.range, with: mImageStr as String)
}
Expand Down Expand Up @@ -283,13 +381,13 @@ import Foundation
/// - Returns: The formatted string.
///
public class func removeTrailingBreakTags(_ string: String) -> String {
guard string.characters.count > 0 else {
guard string.count > 0 else {
return string
}
var content = string.trim()
let matches = RegEx.trailingBRTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.characters.count))
let matches = RegEx.trailingBRTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count))
if let match = matches.first {
let index = content.characters.index(content.startIndex, offsetBy: match.range.location)
let index = content.index(content.startIndex, offsetBy: match.range.location)
content = content.substring(to: index)
}

Expand Down