Skip to content

Commit

Permalink
document: text encoding api
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelangel-dev committed Jan 21, 2025
1 parent e7d18e9 commit 71d4408
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 7 deletions.
6 changes: 3 additions & 3 deletions Sources/Automerge/Document.swift
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@ public final class Document: @unchecked Sendable {
/// Retrieve the current text encoding used by the document.
public var textEncoding: TextEncoding {
lock {
self.doc.wrapErrors { $0.textEncoding() }
self.doc.wrapErrors { $0.textEncoding().textEncoding }
}
}

/// Creates an new, empty Automerge document.
/// - Parameters:
/// - textEncoding: The encoding type for text within the document. Defaults to `.unicodeCodePoint`.
/// - logLevel: The level at which to generate logs into unified logging from actions within this document.
public init(textEncoding: TextEncoding = .unicodeCodePoint, logLevel: LogVerbosity = .errorOnly) {
doc = WrappedDoc(Doc.newWithTextEncoding(textEncoding: textEncoding))
public init(textEncoding: TextEncoding = .unicodeScalar, logLevel: LogVerbosity = .errorOnly) {
doc = WrappedDoc(Doc.newWithTextEncoding(textEncoding: textEncoding.ffi_textEncoding))
self.reportingLogLevel = logLevel
}

Expand Down
52 changes: 52 additions & 0 deletions Sources/Automerge/TextEncoding.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import AutomergeUniffi
import Foundation

/// An enumeration representing different types of text encoding.
///
/// Text encodings determine how text is processed across Automerge APIs.
public enum TextEncoding {
/// Text encoding using Grapheme Cluster.
/// Grapheme clusters represent user-perceived characters, which may consist of multiple Unicode scalars.
/// For example:
/// - "é" (a Latin small letter "e" followed by a combining acute accent) is a single grapheme cluster.
/// - Emoji with modifiers, like "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦" (family emoji), are grapheme clusters combining multiple scalars.
case graphemeCluster

/// Text encoding using Unicode scalar values.
/// Unicode scalars are the fundamental building blocks of Unicode text, but they are not directly stored as-is.
/// Instead, they are encoded into binary formats like UTF-8 or UTF-16 for persistence.
case unicodeScalar

/// Text encoding using UTF-8.
/// A variable-width encoding representing characters in 1–4 bytes.
case utf8

/// Text encoding using UTF-16.
/// A variable-width encoding using one or two 16-bit code units.
case utf16
}

// MARK: - Adapters
typealias FfiTextEncoding = AutomergeUniffi.TextEncoding

extension FfiTextEncoding {
var textEncoding: TextEncoding {
switch self {
case .graphemeCluster: return .graphemeCluster
case .unicodeCodePoint: return .unicodeScalar
case .utf16CodeUnit: return .utf16
case .utf8CodeUnit: return .utf8
}
}
}

extension TextEncoding {
var ffi_textEncoding: FfiTextEncoding {
switch self {
case .graphemeCluster: return .graphemeCluster
case .unicodeScalar: return .unicodeCodePoint
case .utf16: return .utf16CodeUnit
case .utf8: return .utf8CodeUnit
}
}
}
6 changes: 3 additions & 3 deletions Tests/AutomergeTests/DocTests/AutomergeDocTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ final class AutomergeDocTests: XCTestCase {
}

func testDocumentTextEncodings_UTF8() throws {
let doc = Document(textEncoding: .utf8CodeUnit)
let doc = Document(textEncoding: .utf8)
let textId = try! doc.putObject(obj: ObjId.ROOT, key: "text", ty: .Text)

try doc.spliceText(obj: textId, start: 0, delete: 0, value: "init: ")
Expand All @@ -307,7 +307,7 @@ final class AutomergeDocTests: XCTestCase {
}

func testDocumentTextEncodings_UTF16() throws {
let doc = Document(textEncoding: .utf16CodeUnit)
let doc = Document(textEncoding: .utf16)
let textId = try! doc.putObject(obj: ObjId.ROOT, key: "text", ty: .Text)
try doc.spliceText(obj: textId, start: 0, delete: 0, value: "init: ")
try doc.spliceText(obj: textId, start: 6, delete: 0, value: "πŸ§‘β€πŸ§‘β€πŸ§’β€πŸ§’")
Expand All @@ -318,7 +318,7 @@ final class AutomergeDocTests: XCTestCase {
}

func testDocumentTextEncodings_UnicodeScalars() throws {
let doc = Document(textEncoding: .unicodeCodePoint)
let doc = Document(textEncoding: .unicodeScalar)
let textId = try! doc.putObject(obj: ObjId.ROOT, key: "text", ty: .Text)
try doc.spliceText(obj: textId, start: 0, delete: 0, value: "init: ")
try doc.spliceText(obj: textId, start: 6, delete: 0, value: "πŸ§‘β€πŸ§‘β€πŸ§’β€πŸ§’")
Expand Down
2 changes: 1 addition & 1 deletion Tests/AutomergeTests/TestText.swift
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class TextTestCase: XCTestCase {
}

func testTextCursorDocumentBoundaries() throws {
let doc = Document(textEncoding: .utf16CodeUnit)
let doc = Document(textEncoding: .utf16)
let content = "Hello family: πŸ§‘β€πŸ§‘β€πŸ§’β€πŸ§’"
let text = try doc.putObject(obj: ObjId.ROOT, key: "text", ty: .Text)
try doc.spliceText(obj: text, start: 0, delete: 0, value: content)
Expand Down

0 comments on commit 71d4408

Please sign in to comment.