-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/1260 Detecting and Cleaning Galleries #8119
Changes from 1 commit
9d73e2a
c7335aa
8ad922e
975fb9b
9f94aca
23b4c60
880fff1
5f28c6f
ef22dcf
0298a00
bdd8e66
6128bb3
88d5b26
d92c1a4
1affd4b
3225d36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ import WordPressShared | |
/// HTML tags that require special handling before the text is shown in a UITextView. | ||
/// | ||
class WPRichTextFormatter { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace) |
||
typealias ParsedSource = (parsedString: String, attachments: [WPTextAttachment]) | ||
static let blockquoteIdentifier = "WPBLOCKQUOTEIDENTIFIER" | ||
let blockquoteIndentation = CGFloat(20.0) | ||
|
@@ -95,7 +95,7 @@ class WPRichTextFormatter { | |
|
||
let str = attrString.string | ||
let regex = try! NSRegularExpression(pattern: HRTagProcessor.horizontalRuleIdentifier, options: .caseInsensitive) | ||
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.characters.count)) | ||
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.count)) | ||
|
||
for match: NSTextCheckingResult in matches.reversed() { | ||
let range = match.range | ||
|
@@ -140,7 +140,7 @@ class WPRichTextFormatter { | |
|
||
let str = attrString.string | ||
let regex = try! NSRegularExpression(pattern: type(of: self).blockquoteIdentifier, options: .caseInsensitive) | ||
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.characters.count)) | ||
let matches = regex.matches(in: str, options: NSRegularExpression.MatchingOptions.reportCompletion, range: NSMakeRange(0, str.count)) | ||
|
||
for match: NSTextCheckingResult in matches.reversed() { | ||
|
||
|
@@ -180,7 +180,7 @@ class WPRichTextFormatter { | |
func processAndExtractTags(_ string: String) -> ParsedSource { | ||
var attachments = [WPTextAttachment]() | ||
|
||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return (string, attachments) | ||
} | ||
|
||
|
@@ -291,7 +291,7 @@ class HtmlTagProcessor { | |
parsedString += endTag | ||
|
||
// Advance the scanner to account for the closing tag. | ||
scanner.scanLocation += endTag.characters.count | ||
scanner.scanLocation += endTag.count | ||
} | ||
|
||
return (success, parsedString) | ||
|
@@ -332,12 +332,12 @@ class BlockquoteTagProcessor: HtmlTagProcessor { | |
if !matched { | ||
return (parsedString, nil) | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace) |
||
// If the blockquote contains no paragraphs just insert the marker after | ||
// the tag. | ||
if !parsedString.contains("<p>") { | ||
var str = parsedString as NSString | ||
let location = "<\(tagName)>".characters.count | ||
let location = "<\(tagName)>".count | ||
str = str.replacingCharacters(in: NSRange(location: location, length: 0), with: WPRichTextFormatter.blockquoteIdentifier) as NSString | ||
parsedString = str as String | ||
return (parsedString, nil) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,16 @@ import Foundation | |
|
||
// Gallery Images | ||
static let galleryImgTags = try! NSRegularExpression(pattern: "<img[^>]*data-orig-file[^>]*/>", options: .caseInsensitive) | ||
static let galleryStartIdentifier = "WPGalleryStartIdentifier" | ||
static let galleryEndIdentifier = "WPGalleryEndIdentifier" | ||
static let galleryDivStart = try! NSRegularExpression(pattern: "<div[^>]*?\(galleryStartIdentifier)[^>]*?>", options: .caseInsensitive) | ||
static let galleryDivEnd = try! NSRegularExpression(pattern: "</div[^>]*?\(galleryEndIdentifier)[^>]*?>", options: .caseInsensitive) | ||
|
||
// Trailing BR Tags | ||
static let trailingBRTags = try! NSRegularExpression(pattern: "(\\s*<br\\s*(/?)\\s*>\\s*)+$", options: .caseInsensitive) | ||
|
||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe remove the newly added returns? |
||
} | ||
|
||
|
||
|
@@ -41,12 +48,13 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func formatContentString(_ string: String, isPrivateSite isPrivate: Bool) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
|
||
var content = string | ||
content = removeForbiddenTags(content) | ||
content = normalizeGallery(content) | ||
content = normalizeParagraphs(content) | ||
content = removeInlineStyles(content) | ||
content = (content as NSString).replacingHTMLEmoticonsWithEmoji() as String | ||
|
@@ -64,24 +72,24 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func removeForbiddenTags(_ string: String) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
var content = string | ||
|
||
content = RegEx.styleTags.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: "") | ||
|
||
content = RegEx.scriptTags.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: "") | ||
|
||
content = RegEx.tableTags.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: "") | ||
|
||
return content | ||
|
@@ -96,7 +104,7 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func normalizeParagraphs(_ string: String) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
var content = string | ||
|
@@ -106,42 +114,132 @@ import Foundation | |
// Convert div tags to p tags | ||
content = RegEx.divTagsStart.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: openPTag) | ||
|
||
content = RegEx.divTagsEnd.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: closePTag) | ||
|
||
// Remove duplicate/redundant p tags. | ||
content = RegEx.pTagsStart.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: openPTag) | ||
|
||
content = RegEx.pTagsEnd.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: closePTag) | ||
|
||
content = filterNewLines(content) | ||
|
||
return content | ||
} | ||
|
||
|
||
public class func normalizeGallery(_ string : String) -> String { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick, no space before |
||
|
||
guard string.count > 0 else { | ||
return string | ||
} | ||
|
||
let iconGallery = "class='gallery" | ||
let tiledGallery = "class=\"tiled-gallery" | ||
|
||
let galleryStart = "<gallery>" | ||
let galleryEnd = "</gallery>" | ||
|
||
var content = string | ||
|
||
content = scannerForGalleryBy(galleryType: iconGallery, inString: content) | ||
content = scannerForGalleryBy(galleryType: tiledGallery, inString: content) | ||
content = RegEx.galleryDivStart.stringByReplacingMatches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count), withTemplate: galleryStart) | ||
content = RegEx.galleryDivEnd.stringByReplacingMatches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count), withTemplate: galleryEnd) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious about the use of regular expressions to handle string replacement rather than have it handled by the Scanner. Was there a particular reason that made regex a better choice? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My thinking behind this was to the keep the logic a bit more simple. This function |
||
|
||
return content | ||
} | ||
|
||
private class func scannerForGalleryBy(galleryType : String, inString : String) -> String { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The method name implies a |
||
|
||
let galleryScanner = Scanner(string: inString) | ||
galleryScanner.charactersToBeSkipped = nil | ||
|
||
let divStr = "div" | ||
|
||
var str = "" | ||
var tempStr: NSString? = "" | ||
|
||
var divCounter = 0 | ||
var galleriesFound : Int = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick, no space before |
||
|
||
while !galleryScanner.isAtEnd { | ||
|
||
|
||
if galleriesFound <= 0 { | ||
//check for gallery | ||
galleryScanner.scanUpTo(galleryType, into: &tempStr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
if let tempStr = tempStr { | ||
str += tempStr as String | ||
} | ||
|
||
tempStr = "" | ||
if galleryScanner.isAtEnd { | ||
break //nothing to see here | ||
} else { | ||
galleriesFound += 1 | ||
divCounter += 1 | ||
} | ||
|
||
str += galleryType | ||
str += RegEx.galleryStartIdentifier | ||
galleryScanner.scanLocation += Int(galleryType.count) | ||
|
||
} else { | ||
|
||
//gallery found, proceed to find divs | ||
galleryScanner.scanUpTo(divStr, into: &tempStr) | ||
|
||
if let tempStr = tempStr { | ||
str += tempStr as String | ||
} | ||
|
||
tempStr = "" | ||
if galleryScanner.isAtEnd { | ||
break //nothing to see here | ||
} | ||
|
||
if str.last == "/" { | ||
divCounter -= 1 | ||
} else if str.last == "<" { | ||
divCounter += 1 | ||
} | ||
|
||
galleryScanner.scanLocation += 3 | ||
str += divStr | ||
|
||
if divCounter == 0 { | ||
galleriesFound -= 1 | ||
str += RegEx.galleryEndIdentifier | ||
} | ||
} | ||
} | ||
|
||
return str | ||
} | ||
|
||
public class func filterNewLines(_ string: String) -> String { | ||
var content = string | ||
|
||
var ranges = [NSRange]() | ||
// We don't want to remove new lines from preformatted tag blocks, | ||
// so get the ranges of such blocks. | ||
let matches = RegEx.preTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.characters.count)) | ||
let matches = RegEx.preTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count)) | ||
if matches.count == 0 { | ||
|
||
// No blocks found, so we'll parse the whole string. | ||
ranges.append(NSRange(location: 0, length: content.characters.count)) | ||
ranges.append(NSRange(location: 0, length: content.count)) | ||
|
||
} else { | ||
|
||
|
@@ -157,7 +255,7 @@ import Foundation | |
location = match.range.location + match.range.length | ||
} | ||
|
||
length = content.characters.count - location | ||
length = content.count - location | ||
ranges.append(NSRange(location: location, length: length)) | ||
} | ||
|
||
|
@@ -182,14 +280,14 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func removeInlineStyles(_ string: String) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
var content = string | ||
|
||
content = RegEx.styleAttr.stringByReplacingMatches(in: content, | ||
options: .reportCompletion, | ||
range: NSRange(location: 0, length: content.characters.count), | ||
range: NSRange(location: 0, length: content.count), | ||
withTemplate: "") | ||
|
||
return content | ||
|
@@ -205,7 +303,7 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func resizeGalleryImageURL(_ string: String, isPrivateSite isPrivate: Bool) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
|
||
|
@@ -241,7 +339,7 @@ import Foundation | |
mImageStr.replaceOccurrences(of: srcImgURLStr, | ||
with: modifiedURL.absoluteString, | ||
options: .literal, | ||
range: NSRange(location: 0, length: imgElementStr.characters.count)) | ||
range: NSRange(location: 0, length: imgElementStr.count)) | ||
|
||
mContent.replaceCharacters(in: match.range, with: mImageStr as String) | ||
} | ||
|
@@ -283,13 +381,13 @@ import Foundation | |
/// - Returns: The formatted string. | ||
/// | ||
public class func removeTrailingBreakTags(_ string: String) -> String { | ||
guard string.characters.count > 0 else { | ||
guard string.count > 0 else { | ||
return string | ||
} | ||
var content = string.trim() | ||
let matches = RegEx.trailingBRTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.characters.count)) | ||
let matches = RegEx.trailingBRTags.matches(in: content, options: .reportCompletion, range: NSRange(location: 0, length: content.count)) | ||
if let match = matches.first { | ||
let index = content.characters.index(content.startIndex, offsetBy: match.range.location) | ||
let index = content.index(content.startIndex, offsetBy: match.range.location) | ||
content = content.substring(to: index) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Trailing Whitespace Violation: Lines should not have trailing whitespace. (trailing_whitespace)