JuliaLang · StefanKarpinski · Oct 27, 2014 · Nov 11, 2014 · Nov 11, 2014 · Nov 11, 2014
diff --git a/base/boot.jl b/base/boot.jl
@@ -128,7 +128,7 @@ export
     Int32, Int64, Int128, Ptr, Real, Signed, UInt, UInt8, UInt16, UInt32,
     UInt64, UInt128, Unsigned,
     # string types
-    Char, ASCIIString, ByteString, DirectIndexString, AbstractString, UTF8String,
+    Char, ByteVec, ASCIIString, ByteString, DirectIndexString, AbstractString, UTF8String,
     # errors
     BoundsError, DivideError, DomainError, Exception,
     InexactError, InterruptException, MemoryError, OverflowError,
@@ -195,10 +195,20 @@ bitstype 128 UInt128 <: Unsigned
 
 if is(Int,Int64)
     typealias UInt UInt64
+    immutable ByteVec <: DenseArray{UInt8,1}
+        x::Int128
+    end
 else
     typealias UInt UInt32
+    immutable ByteVec <: DenseArray{UInt8,1}
+        x::Int64
+    end
 end
 
+## kind of want this but doesn't work:
+#
+# bitstype 128 ByteVec <: DenseArray{UInt8,1}
+
 abstract Exception
 
 type BoundsError        <: Exception end

diff --git a/base/bytes.jl b/base/bytes.jl
@@ -0,0 +1,110 @@
+import Core.ByteVec
+export ByteVec, Str
+
+ByteVec(a::Vector{UInt8}) = ccall(:jl_bytevec, ByteVec, (Ptr{UInt8}, Csize_t), a, length(a))
+ByteVec(s::AbstractString) = ByteVec(bytestring(s).data)
+
+size(b::ByteVec) = (length(b),)
+
+function length(b::ByteVec)
+    here = (b.x >>> 8*(sizeof(b.x)-1)) % Int
+    there = -(b.x >> 8*sizeof(Int)) % Int
+    ifelse(b.x < 0, there, here)
+end
+
+getindex(b::ByteVec, i::Real) =
+    box(UInt8, bytevec_ref(unbox(typeof(b.x), b.x), unbox(Int, Int(i))))
+getu32(b::ByteVec, i::Int) =
+    box(UInt32, bytevec_ref32(unbox(typeof(b.x), b.x), unbox(Int, i)))
+
+function ==(a::ByteVec, b::ByteVec)
+    a_hi = (a.x >> 8*sizeof(Int)) % Int
+    b_hi = (b.x >> 8*sizeof(Int)) % Int
+    (a_hi != b_hi) | (a_hi >= 0) | (b_hi >= 0) && return a.x == b.x
+    pa = reinterpret(Ptr{Uint8}, a.x % UInt)
+    pb = reinterpret(Ptr{Uint8}, b.x % UInt)
+    ccall(:memcmp, Cint, (Ptr{Uint8}, Ptr{Uint8}, Csize_t), pa, pb, -a_hi % Uint) == 0
+end
+
+function cmp(a::ByteVec, b::ByteVec)
+    a_x, b_x = a.x, b.x
+    a_here, b_here = a_x >= 0, b_x >= 0
+    if !(a_here & b_here)
+        if b_here
+            a_x = unsafe_load(reinterpret(Ptr{typeof(a_x)}, a_x % UInt))
+        elseif a_here
+            b_x = unsafe_load(reinterpret(Ptr{typeof(b_x)}, b_x % UInt))
+        else
+            pa = reinterpret(Ptr{Uint8}, a_x % UInt)
+            pb = reinterpret(Ptr{Uint8}, b_x % UInt)
+            la = -(a_x >>> 8*sizeof(Int)) % UInt
+            lb = -(b_x >>> 8*sizeof(Int)) % UInt
+            c = Int(ccall(:memcmp, Cint, (Ptr{Uint8}, Ptr{Uint8}, Csize_t), pa, pb, min(la,lb)))
+            return ifelse(c == 0, cmp(la,lb), sign(c))
+        end
+    end
+    cmp(bswap(a_x), bswap(b_x))
+end
+isless(x::ByteVec, y::ByteVec) = cmp(x, y) < 0
+
+start(b::ByteVec) = 1
+next(b::ByteVec, i::Int) = (b[i], i+1)
+done(b::ByteVec, i::Int) = length(b) < i
+
+## ByteVec-based string type ##
+
+immutable Str <: AbstractString
+    data::ByteVec
+end
+Str(s::AbstractString) = Str(ByteVec(s))
+
+@inline function endof(s::Str)
+    n = length(s.data)
+    @inbounds u = getu32(s.data, n-3)
+    x = (u & 0xc0c0c0c0) $ 0x80808080
+    n - leading_zeros(x) >>> 3
+end
+
+function next(s::Str, k::Int)
+    a = bswap(getu32(s.data, k))
+    0 <= reinterpret(Int32, a) && return Char(a >> 24), k + 1
+    l = leading_ones(a)
+    b = (a << l >> l) $ 0x808080
+    r = 32 - 8l
+    c = b >> r
+    t = (l != 1) & (l <= 4) & ((b & 0xc0c0c0) >> r == 0)
+    d = ( (c >> 24)         << 18) |
+        (((c >> 16) & 0xff) << 12) |
+        (((c >>  8) & 0xff) <<  6) | (c & 0xff)
+    ifelse(t, Char(d), '\ufffd'), k + ifelse(t, l, 1)
+end
+
+const mask = div(typemax(UInt),typemax(UInt8))
+
+function length(s::Str)
+    x = s.data.x
+    if 0 <= x
+        lo, hi = x % UInt, (x >>> 8*sizeof(Uint)) % Uint
+        n = (hi >>> 8*(sizeof(Uint)-1)) % Int
+        n -= count_ones((mask & (lo >>> 7)) & ~(mask & (lo >>> 6)))
+        n -= count_ones((mask & (hi >>> 7)) & ~(mask & (hi >>> 6)))
+        return n
+    else
+        p = reinterpret(Ptr{Uint8}, x % UInt)
+        n = -(x >> 8*sizeof(Int)) % Int
+        for i = 1:n
+            b = unsafe_load(p, i)
+            n -= (b & 0xc0) == 0x80
+        end
+        return n
+    end
+end
+
+## overload methods for efficiency ##
+
+sizeof(s::Str) = length(s.data)
+
+    ==(s::Str, t::Str) =     ==(s.data, t.data)
+isless(s::Str, t::Str) = isless(s.data, t.data)
+   cmp(s::Str, t::Str) =    cmp(s.data, t.data)
+
diff --git a/base/exports.jl b/base/exports.jl
@@ -30,6 +30,7 @@ export
     BitArray,
     BitMatrix,
     BitVector,
+    ByteVec,
     CFILE,
     Cmd,
     Colon,

diff --git a/base/operators.jl b/base/operators.jl
@@ -111,9 +111,9 @@ const .≠ = .!=
 <<(x,y::Int32)    = no_op_err("<<", typeof(x))
 >>(x,y::Int32)    = no_op_err(">>", typeof(x))
 >>>(x,y::Int32)   = no_op_err(">>>", typeof(x))
-<<(x,y::Integer)  = x << convert(Int32,y)
->>(x,y::Integer)  = x >> convert(Int32,y)
->>>(x,y::Integer) = x >>> convert(Int32,y)
+<<(x,y::Integer)  = x << (y % Int32)
+>>(x,y::Integer)  = x >> (y % Int32)
+>>>(x,y::Integer) = x >>> (y % Int32)
 
 # fallback div, fld, and cld implementations
 # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,

diff --git a/base/sysimg.jl b/base/sysimg.jl
@@ -80,6 +80,7 @@ const DL_LOAD_PATH = ByteString[]
 
 # strings & printing
 include("char.jl")
+include("bytes.jl")
 include("ascii.jl")
 include("utf8.jl")
 include("utf16.jl")

diff --git a/base/utf8.jl b/base/utf8.jl
@@ -24,7 +24,7 @@ const utf8_trailing = [
     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5,
 ]
 
-is_utf8_start(byte::UInt8) = ((byte&0xc0)!=0x80)
+is_utf8_start(byte::Integer) = (byte & 0xc0) != 0x80
 
 ## required core functionality ##
 
@@ -39,44 +39,19 @@ function endof(s::UTF8String)
 end
 length(s::UTF8String) = int(ccall(:u8_strlen, Csize_t, (Ptr{UInt8},), s.data))
 
-function next(s::UTF8String, i::Int)
-    # potentially faster version
-    # d = s.data
-    # a::UInt32 = d[i]
-    # if a < 0x80; return char(a); end
-    # #if a&0xc0==0x80; return '\ufffd'; end
-    # b::UInt32 = a<<6 + d[i+1]
-    # if a < 0xe0; return char(b - 0x00003080); end
-    # c::UInt32 = b<<6 + d[i+2]
-    # if a < 0xf0; return char(c - 0x000e2080); end
-    # return char(c<<6 + d[i+3] - 0x03c82080)
-
-    d = s.data
-    b = d[i]
-    if !is_utf8_start(b)
-        j = i-1
-        while 0 < j && !is_utf8_start(d[j])
-            j -= 1
-        end
-        if 0 < j && i <= j+utf8_trailing[d[j]+1] <= length(d)
-            # b is a continuation byte of a valid UTF-8 character
-            error("invalid UTF-8 character index")
-        end
-        # move past 1 byte in case the data is actually Latin-1
-        return '\ufffd', i+1
-    end
-    trailing = utf8_trailing[b+1]
-    if length(d) < i + trailing
-        return '\ufffd', i+1
-    end
-    c::UInt32 = 0
-    for j = 1:trailing+1
-        c <<= 6
-        c += d[i]
-        i += 1
-    end
-    c -= utf8_offset[trailing+1]
-    char(c), i
+function next(s::UTF8String, k::Int)
+    p = convert(Ptr{UInt32}, pointer(s.data) + k - 1)
+    a = bswap(unsafe_load(p))
+    l = leading_ones(a)
+    n = l + (~a >> 31)
+    b = (a << n >> n) $ 0x808080
+    r = 32 - 8n
+    c = b >> r
+    t = (l != 1) & (l <= 4) & ((b & 0xc0c0c0) >> r == 0)
+    d = ( (c >> 24)         << 18) |
+        (((c >> 16) & 0xff) << 12) |
+        (((c >>  8) & 0xff) <<  6) | (c & 0xff)
+    ifelse(t, Char(d), '\ufffd'), k + ifelse(t, n, 1)
 end
 
 function first_utf8_byte(ch::Char)

diff --git a/src/alloc.c b/src/alloc.c
@@ -396,6 +396,21 @@ jl_tuple_t *jl_tuple_fill(size_t n, jl_value_t *v)
     return tup;
 }
 
+DLLEXPORT jl_bytevec_struct_t jl_bytevec(const uint8_t *data, size_t n)
+{
+    jl_bytevec_struct_t b;
+    if (n < 2*sizeof(void*)) {
+        memcpy(b.here.data, data, n);
+        memset(b.here.data + n, 0, (2*sizeof(void*)-1) - n);
+        b.here.length = n;
+    } else {
+        b.there.data = allocb(n);
+        memcpy(b.there.data, data, n);
+        b.there.neglen = -n;
+    }
+    return b;
+}
+
 DLLEXPORT jl_function_t *jl_new_closure(jl_fptr_t fptr, jl_value_t *env,
                                         jl_lambda_info_t *linfo)
 {

diff --git a/src/builtins.c b/src/builtins.c
@@ -1016,9 +1016,9 @@ void jl_init_primitives(void)
     add_builtin_func("tupleref",  jl_f_tupleref);
     add_builtin_func("tuplelen",  jl_f_tuplelen);
     add_builtin_func("getfield",  jl_f_get_field);
-    add_builtin_func("setfield!",  jl_f_set_field);
+    add_builtin_func("setfield!", jl_f_set_field);
     add_builtin_func("fieldtype", jl_f_field_type);
-    add_builtin_func("_expr", jl_f_new_expr);
+    add_builtin_func("_expr",     jl_f_new_expr);
 
     add_builtin_func("arraylen", jl_f_arraylen);
     add_builtin_func("arrayref", jl_f_arrayref);

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -215,27 +215,32 @@ static Type *jl_ppvalue_llvmt;
 static Type* jl_parray_llvmt;
 static FunctionType *jl_func_sig;
 static Type *jl_pfptr_llvmt;
-static Type *T_int1;
-static Type *T_int8;
+
+static IntegerType *T_int1;
+static IntegerType *T_int8;
+static IntegerType *T_uint8;
+static IntegerType *T_int16;
+static IntegerType *T_uint16;
+static IntegerType *T_int32;
+static IntegerType *T_uint32;
+static IntegerType *T_int64;
+static IntegerType *T_uint64;
+static IntegerType *T_char;
+static IntegerType *T_size;
+
 static Type *T_pint8;
-static Type *T_uint8;
-static Type *T_int16;
 static Type *T_pint16;
-static Type *T_uint16;
-static Type *T_int32;
 static Type *T_pint32;
-static Type *T_uint32;
-static Type *T_int64;
 static Type *T_pint64;
-static Type *T_uint64;
-static Type *T_char;
-static Type *T_size;
 static Type *T_psize;
+
 static Type *T_float32;
 static Type *T_pfloat32;
 static Type *T_float64;
 static Type *T_pfloat64;
 static Type *T_void;
+static Type *T_vec_2word_ints;
+static Type *T_vec_2word_bytes;
 
 // type-based alias analysis nodes.  Indentation of comments indicates hierarchy.
 static MDNode* tbaa_user;           // User data
@@ -4228,6 +4233,9 @@ static void init_julia_llvm_env(Module *m)
     T_float64 = Type::getDoubleTy(getGlobalContext());
     T_pfloat64 = PointerType::get(T_float64, 0);
     T_void = Type::getVoidTy(jl_LLVMContext);
+    // vector types for byte vector code generation
+    T_vec_2word_ints = VectorType::get(T_size, 2);
+    T_vec_2word_bytes = VectorType::get(T_int8, 2*sizeof(void*));
 
     // This type is used to create undef Values which carry
     // metadata.
@@ -4655,6 +4663,7 @@ static void init_julia_llvm_env(Module *m)
     // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
     FPM->add(createLowerSimdLoopPass());        // Annotate loop marked with "simdloop" as LLVM parallel loop
     FPM->add(createLICMPass());                 // Hoist loop invariants
+    FPM->add(createEarlyCSEPass());             // Common subexpression elimination
     FPM->add(createLoopUnswitchPass());         // Unswitch loops.
     // Subsequent passes not stripping metadata from terminator
 #ifndef INSTCOMBINE_BUG

diff --git a/src/gc.c b/src/gc.c
@@ -764,6 +764,11 @@ static void push_root(jl_value_t *v, int d)
                 gc_push_root(elt, d);
         }
     }
+    else if (vt == (jl_value_t*)jl_bytevec_type) {
+        jl_bytevec_struct_t b = *(jl_bytevec_struct_t*)jl_data_ptr(v);
+        if (b.there.neglen < 0)
+            gc_setmark_buf(b.there.data);
+    }
     else if (((jl_datatype_t*)(vt))->name == jl_array_typename) {
         jl_array_t *a = (jl_array_t*)v;
         if (a->how == 3) {

diff --git a/src/init.c b/src/init.c
@@ -1102,6 +1102,8 @@ void jl_get_builtin_hooks(void)
     jl_bounds_exception    = jl_new_struct((jl_datatype_t*)core("BoundsError"));
     jl_memory_exception    = jl_new_struct((jl_datatype_t*)core("MemoryError"));
 
+    jl_bytevec_type = (jl_datatype_t*)core("ByteVec");
+    jl_bytevec_type->pointerfree = 0;
     jl_ascii_string_type = (jl_datatype_t*)core("ASCIIString");
     jl_utf8_string_type = (jl_datatype_t*)core("UTF8String");
     jl_symbolnode_type = (jl_datatype_t*)core("SymbolNode");