diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java index 178cf11c..b22d4812 100644 --- a/java/src/json/ext/ByteListDirectOutputStream.java +++ b/java/src/json/ext/ByteListDirectOutputStream.java @@ -3,14 +3,72 @@ import org.jcodings.Encoding; import org.jruby.util.ByteList; -import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; + +public class ByteListDirectOutputStream extends OutputStream { + private byte[] buffer; + private int length; -public class ByteListDirectOutputStream extends ByteArrayOutputStream { ByteListDirectOutputStream(int size) { - super(size); + buffer = new byte[size]; } public ByteList toByteListDirect(Encoding encoding) { - return new ByteList(buf, 0, count, encoding, false); + return new ByteList(buffer, 0, length, encoding, false); + } + + @Override + public void write(int b) throws IOException { + int currentLength = this.length; + int newLength = currentLength + 1; + byte[] buffer = ensureBuffer(this, newLength); + buffer[currentLength] = (byte) b; + this.length = newLength; + } + + @Override + public void write(byte[] bytes, int start, int length) throws IOException { + int currentLength = this.length; + int newLength = currentLength + length; + byte[] buffer = ensureBuffer(this, newLength); + System.arraycopy(bytes, start, buffer, currentLength, length); + this.length = newLength; + } + + @Override + public void write(byte[] bytes) throws IOException { + int myLength = this.length; + int moreLength = bytes.length; + int newLength = myLength + moreLength; + byte[] buffer = ensureBuffer(this, newLength); + System.arraycopy(bytes, 0, buffer, myLength, moreLength); + this.length = newLength; + } + + private static byte[] ensureBuffer(ByteListDirectOutputStream self, int minimumLength) { + byte[] buffer = self.buffer; + int myCapacity = buffer.length; + int diff = minimumLength - myCapacity; + if (diff > 0) { + buffer = self.buffer = grow(buffer, myCapacity, diff); + } + + return buffer; + } + + private static byte[] grow(byte[] oldBuffer, int myCapacity, int diff) { + // grow to double current buffer length or capacity + diff, whichever is greater + int newLength = myCapacity + Math.max(myCapacity, diff); + // check overflow + if (newLength < 0) { + // try just diff length in case it can fit + newLength = myCapacity + diff; + if (newLength < 0) { + throw new ArrayIndexOutOfBoundsException("cannot allocate array of size " + myCapacity + "+" + diff); + } + } + return Arrays.copyOf(oldBuffer, newLength); } } diff --git a/java/src/json/ext/ByteListTranscoder.java b/java/src/json/ext/ByteListTranscoder.java index 78d8037c..7ee9de34 100644 --- a/java/src/json/ext/ByteListTranscoder.java +++ b/java/src/json/ext/ByteListTranscoder.java @@ -143,9 +143,11 @@ protected void quoteStart() { * until the character before it. */ protected void quoteStop(int endPos) throws IOException { + int quoteStart = this.quoteStart; if (quoteStart != -1) { + ByteList src = this.src; append(src.unsafeBytes(), src.begin() + quoteStart, endPos - quoteStart); - quoteStart = -1; + this.quoteStart = -1; } } diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 36914b73..c46a1e47 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -5,11 +5,6 @@ */ package json.ext; -import json.ext.RuntimeInfo; - -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -22,23 +17,20 @@ import org.jruby.RubyHash; import org.jruby.RubyString; import org.jruby.RubySymbol; -import org.jruby.RubyException; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; -import org.jruby.exceptions.RaiseException; -import org.jruby.util.ConvertBytes; import org.jruby.util.IOOutputStream; -import org.jruby.util.StringSupport; import org.jruby.util.TypeConverter; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.OutputStream; import java.math.BigInteger; +import java.util.Set; -import static java.nio.charset.StandardCharsets.*; +import static java.nio.charset.StandardCharsets.UTF_8; public final class Generator { @@ -123,11 +115,37 @@ private static Handler getHandlerFor(Ruby run RuntimeInfo info = RuntimeInfo.forRuntime(runtime); RubyClass fragmentClass = info.jsonModule.get().getClass("Fragment"); if (Helpers.metaclass(object) != fragmentClass) break; - return (Handler) FRAGMENT_HANDLER; + return FRAGMENT_HANDLER; } return GENERIC_HANDLER; } + private static void generateFor(ThreadContext context, Session session, T object, OutputStream buffer) throws IOException { + switch (((RubyBasicObject) object).getNativeClassIndex()) { + case NIL : buffer.write(NULL_STRING); return; + case TRUE : buffer.write(TRUE_STRING); return; + case FALSE : buffer.write(FALSE_STRING); return; + case FLOAT : generateFloat(context, session, (RubyFloat) object, buffer); return; + case FIXNUM : generateFixnum(session, (RubyFixnum) object, buffer); return; + case BIGNUM : generateBignum((RubyBignum) object, buffer); return; + case SYMBOL : generateSymbol(context, session, (RubySymbol) object, buffer); return; + case STRING : + if (Helpers.metaclass(object) != context.runtime.getString()) break; + generateString(context, session, (RubyString) object, buffer); return; + case ARRAY : + if (Helpers.metaclass(object) != context.runtime.getArray()) break; + generateArray(context, session, (RubyArray) object, buffer); return; + case HASH : + if (Helpers.metaclass(object) != context.runtime.getHash()) break; + generateHash(context, session, (RubyHash) object, buffer); return; + case STRUCT : + RuntimeInfo info = RuntimeInfo.forRuntime(context.runtime); + RubyClass fragmentClass = info.jsonModule.get().getClass("Fragment"); + if (Helpers.metaclass(object) != fragmentClass) break; + generateFragment(context, session, object, buffer); return; + } + generateGeneric(context, session, object, buffer); + } /* Generator context */ @@ -143,10 +161,12 @@ private static Handler getHandlerFor(Ruby run * won't be part of the session. */ static class Session { + private static final int MAX_LONG_CHARS = Long.toString(Long.MIN_VALUE).length(); private GeneratorState state; private IRubyObject possibleState; private RuntimeInfo info; private StringEncoder stringEncoder; + private byte[] charBytes; Session(GeneratorState state) { this.state = state; @@ -169,10 +189,18 @@ public RuntimeInfo getInfo(ThreadContext context) { return info; } + public byte[] getCharBytes() { + byte[] charBytes = this.charBytes; + if (charBytes == null) charBytes = this.charBytes = new byte[MAX_LONG_CHARS]; + return charBytes; + } + public StringEncoder getStringEncoder(ThreadContext context) { if (stringEncoder == null) { GeneratorState state = getState(context); - stringEncoder = new StringEncoder(state.asciiOnly(), state.scriptSafe()); + stringEncoder = state.asciiOnly() ? + new StringEncoderAsciiOnly(state.scriptSafe()) : + new StringEncoder(state.scriptSafe()); } return stringEncoder; } @@ -216,8 +244,8 @@ private static class KeywordHandler extends Handler { private final byte[] keyword; - private KeywordHandler(String keyword) { - this.keyword = keyword.getBytes(UTF_8); + private KeywordHandler(byte[] keyword) { + this.keyword = keyword; } @Override @@ -239,364 +267,398 @@ void generate(ThreadContext context, Session session, T object, OutputStream buf /* Handlers */ - static final Handler BIGNUM_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyBignum object, OutputStream buffer) throws IOException { - BigInteger bigInt = object.getValue(); - buffer.write(bigInt.toString().getBytes(UTF_8)); - } - }; + static final Handler BIGNUM_HANDLER = new BignumHandler(); + static final Handler FIXNUM_HANDLER = new FixnumHandler(); + static final Handler FLOAT_HANDLER = new FloatHandler(); + static final Handler> ARRAY_HANDLER = new ArrayHandler(); + static final Handler HASH_HANDLER = new HashHandler(); + static final Handler STRING_HANDLER = new StringHandler(); + private static final byte[] TRUE_STRING = "true".getBytes(); + static final Handler TRUE_HANDLER = new KeywordHandler<>(TRUE_STRING); + private static final byte[] FALSE_STRING = "false".getBytes(); + static final Handler FALSE_HANDLER = new KeywordHandler<>(FALSE_STRING); + private static final byte[] NULL_STRING = "null".getBytes(); + static final Handler NIL_HANDLER = new KeywordHandler<>(NULL_STRING); + static final Handler FRAGMENT_HANDLER = new FragmentHandler(); + static final Handler SYMBOL_HANDLER = new SymbolHandler(); - static final Handler FIXNUM_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { - buffer.write(ConvertBytes.longToCharBytes(object.getLongValue())); - } - }; - - static final Handler FLOAT_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { - double value = object.getValue(); - - if (Double.isInfinite(value) || Double.isNaN(value)) { - GeneratorState state = session.getState(context); - - if (!state.allowNaN()) { - if (state.strict() && state.getAsJSON() != null) { - IRubyObject castedValue = state.getAsJSON().call(context, object); - if (castedValue != object) { - getHandlerFor(context.runtime, castedValue).generate(context, session, castedValue, buffer); - return; - } - } - - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } - } + /** + * The default handler (Object#to_json): coerces the object + * to string using #to_s, and serializes that string. + */ + static final Handler OBJECT_HANDLER = new ObjectHandler(); - buffer.write(Double.toString(value).getBytes(UTF_8)); - } - }; + /** + * A handler that simply calls #to_json(state) on the + * given object. + */ + static final Handler GENERIC_HANDLER = new GenericHandler(); - private static final byte[] EMPTY_ARRAY_BYTES = "[]".getBytes(); - static final Handler> ARRAY_HANDLER = - new Handler>() { - @Override - int guessSize(ThreadContext context, Session session, RubyArray object) { - GeneratorState state = session.getState(context); - int depth = state.getDepth(); - int perItem = - 4 // prealloc - + (depth + 1) * state.getIndent().length() // indent - + 1 + state.getArrayNl().length(); // ',' arrayNl - return 2 + object.size() * perItem; - } + private static class BignumHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyBignum object, OutputStream buffer) throws IOException { + generateBignum(object, buffer); + } + } - @Override - void generate(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { - GeneratorState state = session.getState(context); - int depth = state.increaseDepth(context); + private static void generateBignum(RubyBignum object, OutputStream buffer) throws IOException { + BigInteger bigInt = object.getValue(); + buffer.write(bigInt.toString().getBytes(UTF_8)); + } - if (object.isEmpty()) { - buffer.write(EMPTY_ARRAY_BYTES); - state.decreaseDepth(); - return; - } + private static class FixnumHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { + generateFixnum(session, object, buffer); + } + } + + static void generateFixnum(Session session, RubyFixnum object, OutputStream buffer) throws IOException { + long i = object.getLongValue(); + if (i == 0) { + buffer.write('0'); + } else if (i == Long.MIN_VALUE) { + // special case to avoid -i + buffer.write(MIN_VALUE_BYTES_RADIX_10); + } else { + byte[] charBytes = session.getCharBytes(); + appendFixnum(buffer, charBytes, i); + } + } + + private static final byte[] MIN_VALUE_BYTES_RADIX_10 = ByteList.plain(Long.toString(Long.MIN_VALUE, 10)); - Ruby runtime = context.runtime; + // C: fbuffer_append_long + static void appendFixnum(OutputStream buffer, byte[] buf, long number) throws IOException { + int end = buf.length; + int len = fltoa(number, buf, end); + buffer.write(buf, end - len, len); + } + + static int fltoa(long number, byte[] buf, int end) { + boolean negative = number < 0; + int tmp = end; - ByteList indentUnit = state.getIndent(); - byte[] shift = Utils.repeat(indentUnit, depth); + if (negative) number = -number; + do { + buf[--tmp] = (byte) ((int) (number % 10) + '0'); + } while ((number /= 10) != 0); + if (negative) buf[--tmp] = '-'; + return end - tmp; + } + + private static class FloatHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { + generateFloat(context, session, object, buffer); + } + } - ByteList arrayNl = state.getArrayNl(); - byte[] delim = new byte[1 + arrayNl.length()]; - delim[0] = ','; - System.arraycopy(arrayNl.unsafeBytes(), arrayNl.begin(), delim, 1, - arrayNl.length()); + static void generateFloat(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { + double value = object.getValue(); - buffer.write((byte)'['); - buffer.write(arrayNl.bytes()); - boolean firstItem = true; + if (Double.isInfinite(value) || Double.isNaN(value)) { + GeneratorState state = session.getState(context); - for (int i = 0, t = object.getLength(); i < t; i++) { - IRubyObject element = object.eltInternal(i); - if (firstItem) { - firstItem = false; - } else { - buffer.write(delim); + if (!state.allowNaN()) { + if (state.strict() && state.getAsJSON() != null) { + IRubyObject castedValue = state.getAsJSON().call(context, object); + if (castedValue != object) { + getHandlerFor(context.runtime, castedValue).generate(context, session, castedValue, buffer); + return; } - buffer.write(shift); - Handler handler = getHandlerFor(runtime, element); - handler.generate(context, session, element, buffer); } + + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); + } + } - state.decreaseDepth(); - if (!arrayNl.isEmpty()) { - buffer.write(arrayNl.bytes()); - buffer.write(shift, 0, state.getDepth() * indentUnit.length()); - } + buffer.write(Double.toString(value).getBytes(UTF_8)); + } + + private static final byte[] EMPTY_ARRAY_BYTES = "[]".getBytes(); + private static class ArrayHandler extends Handler> { + @Override + int guessSize(ThreadContext context, Session session, RubyArray object) { + GeneratorState state = session.getState(context); + int depth = state.getDepth(); + int perItem = + 4 // prealloc + + (depth + 1) * state.getIndent().length() // indent + + 1 + state.getArrayNl().length(); // ',' arrayNl + return 2 + object.size() * perItem; + } + + @Override + void generate(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { + generateArray(context, session, object, buffer); + } + } - buffer.write((byte)']'); + static void generateArray(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { + GeneratorState state = session.getState(context); + int depth = state.increaseDepth(context); + + if (object.isEmpty()) { + buffer.write(EMPTY_ARRAY_BYTES); + state.decreaseDepth(); + return; + } + + ByteList indentUnit = state.getIndent(); + ByteList arrayNl = state.getArrayNl(); + byte[] arrayNLBytes = arrayNl.unsafeBytes(); + int arrayNLBegin = arrayNl.begin(); + int arrayNLSize = arrayNl.realSize(); + boolean arrayNLEmpty = arrayNLSize == 0; + + buffer.write('['); + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); + + int length = object.getLength(); + for (int i = 0; i < length; i++) { + IRubyObject element = object.eltInternal(i); + if (i > 0) { + buffer.write(','); + if (!arrayNLEmpty) { + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); + } } - }; + Utils.repeatWrite(buffer, indentUnit, depth); + generateFor(context, session, element, buffer); + } + + int oldDepth = state.decreaseDepth(); + if (!arrayNLEmpty) { + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); + Utils.repeatWrite(buffer, indentUnit, oldDepth); + } + + buffer.write((byte) ']'); + } private static final byte[] EMPTY_HASH_BYTES = "{}".getBytes(); - static final Handler HASH_HANDLER = - new Handler() { - @Override - int guessSize(ThreadContext context, Session session, RubyHash object) { - GeneratorState state = session.getState(context); - int perItem = + private static class HashHandler extends Handler { + @Override + int guessSize(ThreadContext context, Session session, RubyHash object) { + GeneratorState state = session.getState(context); + int perItem = 12 // key, colon, comma - + (state.getDepth() + 1) * state.getIndent().length() - + state.getSpaceBefore().length() - + state.getSpace().length(); - return 2 + object.size() * perItem; - } + + (state.getDepth() + 1) * state.getIndent().length() + + state.getSpaceBefore().length() + + state.getSpace().length(); + return 2 + object.size() * perItem; + } - @Override - void generate(ThreadContext context, final Session session, RubyHash object, final OutputStream buffer) throws IOException { - final GeneratorState state = session.getState(context); - final int depth = state.increaseDepth(context); + @Override + void generate(ThreadContext context, final Session session, RubyHash object, final OutputStream buffer) throws IOException { + generateHash(context, session, object, buffer); + } + } - if (object.isEmpty()) { - buffer.write(EMPTY_HASH_BYTES); - state.decreaseDepth(); - return; - } + static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException { + final GeneratorState state = session.getState(context); + final int depth = state.increaseDepth(context); - final ByteList objectNl = state.getObjectNl(); - byte[] objectNLBytes = objectNl.unsafeBytes(); - final byte[] indent = Utils.repeat(state.getIndent(), depth); - final ByteList spaceBefore = state.getSpaceBefore(); - final ByteList space = state.getSpace(); - - buffer.write((byte)'{'); - buffer.write(objectNLBytes); - - final boolean[] firstPair = new boolean[]{true}; - object.visitAll(context, new RubyHash.VisitorWithState() { - @Override - public void visit(ThreadContext context, RubyHash self, IRubyObject key, IRubyObject value, int index, boolean[] firstPair) { - try { - if (firstPair[0]) { - firstPair[0] = false; - } else { - buffer.write((byte) ','); - buffer.write(objectNLBytes); - } - if (!objectNl.isEmpty()) buffer.write(indent); - - Ruby runtime = context.runtime; - - IRubyObject keyStr; - RubyClass keyClass = key.getType(); - if (key instanceof RubyString) { - if (keyClass == runtime.getString()) { - keyStr = key; - } else { - keyStr = key.callMethod(context, "to_s"); - } - } else if (keyClass == runtime.getSymbol()) { - keyStr = key.asString(); - } else { - keyStr = TypeConverter.convertToType(key, runtime.getString(), "to_s"); - } - - if (keyStr.getMetaClass() == runtime.getString()) { - STRING_HANDLER.generate(context, session, (RubyString) keyStr, buffer); - } else { - Utils.ensureString(keyStr); - Handler keyHandler = getHandlerFor(runtime, keyStr); - keyHandler.generate(context, session, keyStr, buffer); - } - - buffer.write(spaceBefore.unsafeBytes()); - buffer.write((byte) ':'); - buffer.write(space.unsafeBytes()); - - Handler valueHandler = getHandlerFor(runtime, value); - valueHandler.generate(context, session, value, buffer); - } catch (Throwable t) { - Helpers.throwException(t); - } - } - }, firstPair); - state.decreaseDepth(); - if (!firstPair[0] && !objectNl.isEmpty()) { - buffer.write(objectNLBytes); - } - buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); - buffer.write((byte)'}'); - } - }; - - static final Handler STRING_HANDLER = - new Handler() { - @Override - int guessSize(ThreadContext context, Session session, RubyString object) { - // for most applications, most strings will be just a set of - // printable ASCII characters without any escaping, so let's - // just allocate enough space for that + the quotes - return 2 + object.getByteList().length(); - } + if (object.isEmpty()) { + buffer.write(EMPTY_HASH_BYTES); + state.decreaseDepth(); + return; + } - @Override - void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { - try { - object = ensureValidEncoding(context, object); - } catch (RaiseException re) { - RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage()); - exc.setCause(re.getException()); - throw exc.toThrowable(); - } + final ByteList objectNl = state.getObjectNl(); + byte[] objectNLBytes = objectNl.unsafeBytes(); + final byte[] indent = Utils.repeat(state.getIndent(), depth); + final ByteList spaceBefore = state.getSpaceBefore(); + final ByteList space = state.getSpace(); - StringEncoder stringEncoder = session.getStringEncoder(context); - ByteList byteList = object.getByteList(); - switch (object.scanForCodeRange()) { - case StringSupport.CR_7BIT: - stringEncoder.encodeASCII(context, byteList, buffer); - break; - case StringSupport.CR_VALID: - stringEncoder.encode(context, byteList, buffer); - break; - default: - throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); - } - } - }; - - static final Handler SYMBOL_HANDLER = - new Handler() { - @Override - int guessSize(ThreadContext context, Session session, RubySymbol object) { - GeneratorState state = session.getState(context); - if (state.strict()) { - return STRING_HANDLER.guessSize(context, session, object.asString()); - } else { - return GENERIC_HANDLER.guessSize(context, session, object); - } + buffer.write('{'); + buffer.write(objectNLBytes); + + boolean firstPair = true; + for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { + processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); + firstPair = false; + } + int oldDepth = state.decreaseDepth(); + if (!firstPair && !objectNl.isEmpty()) { + buffer.write(objectNLBytes); + } + Utils.repeatWrite(buffer, state.getIndent(), oldDepth); + buffer.write('}'); + } + + private static void processEntry(ThreadContext context, Session session, OutputStream buffer, RubyHash.RubyHashEntry entry, boolean firstPair, ByteList objectNl, byte[] indent, ByteList spaceBefore, ByteList space) { + IRubyObject key = (IRubyObject) entry.getKey(); + IRubyObject value = (IRubyObject) entry.getValue(); + + try { + if (!firstPair) { + buffer.write((byte) ','); + buffer.write(objectNl.unsafeBytes()); } + if (!objectNl.isEmpty()) buffer.write(indent); + + Ruby runtime = context.runtime; - @Override - void generate(ThreadContext context, Session session, RubySymbol object, OutputStream buffer) throws IOException { - GeneratorState state = session.getState(context); - if (state.strict()) { - STRING_HANDLER.generate(context, session, object.asString(), buffer); + IRubyObject keyStr; + RubyClass keyClass = key.getType(); + if (key instanceof RubyString) { + if (keyClass == runtime.getString()) { + keyStr = key; } else { - GENERIC_HANDLER.generate(context, session, object, buffer); + keyStr = key.callMethod(context, "to_s"); } + } else if (keyClass == runtime.getSymbol()) { + keyStr = ((RubySymbol) key).id2name(context); + } else { + keyStr = TypeConverter.convertToType(key, runtime.getString(), "to_s"); } - }; - - static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { - Encoding encoding = str.getEncoding(); - RubyString utf8String; - if (!(encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE)) { - if (encoding == ASCIIEncoding.INSTANCE) { - utf8String = str.strDup(context.runtime); - utf8String.setEncoding(UTF8Encoding.INSTANCE); - switch (utf8String.getCodeRange()) { - case StringSupport.CR_7BIT: - return utf8String; - case StringSupport.CR_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8String; - } + + if (keyStr.getMetaClass() == runtime.getString()) { + generateString(context, session, (RubyString) keyStr, buffer); + } else { + Utils.ensureString(keyStr); + generateFor(context, session, keyStr, buffer); } - str = (RubyString) str.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + buffer.write(spaceBefore.unsafeBytes()); + buffer.write((byte) ':'); + buffer.write(space.unsafeBytes()); + + generateFor(context, session, value, buffer); + } catch (Throwable t) { + Helpers.throwException(t); } - return str; } - static final Handler TRUE_HANDLER = - new KeywordHandler<>("true"); - static final Handler FALSE_HANDLER = - new KeywordHandler<>("false"); - static final Handler NIL_HANDLER = - new KeywordHandler<>("null"); + private static class StringHandler extends Handler { + @Override + int guessSize(ThreadContext context, Session session, RubyString object) { + // for most applications, most strings will be just a set of + // printable ASCII characters without any escaping, so let's + // just allocate enough space for that + the quotes + return 2 + object.getByteList().length(); + } - /** - * The default handler (Object#to_json): coerces the object - * to string using #to_s, and serializes that string. - */ - static final Handler FRAGMENT_HANDLER = - new Handler() { - @Override - RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - GeneratorState state = session.getState(context); - IRubyObject result = object.callMethod(context, "to_json", state); - if (result instanceof RubyString) return (RubyString)result; - throw context.runtime.newTypeError("to_json must return a String"); - } + @Override + void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { + generateString(context, session, object, buffer); + } + } - @Override - void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString result = generateNew(context, session, object); - ByteList bytes = result.getByteList(); - buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); - } - }; + static void generateString(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { + session.getStringEncoder(context).generate(context, object, buffer); + } - /** - * The default handler (Object#to_json): coerces the object - * to string using #to_s, and serializes that string. - */ - static final Handler OBJECT_HANDLER = - new Handler() { - @Override - RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - RubyString str = object.asString(); - return STRING_HANDLER.generateNew(context, session, str); - } + private static class FragmentHandler extends Handler { + @Override + RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { + return generateFragmentNew(context, session, object); + } + + @Override + void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + generateFragment(context, session, object, buffer); + } + } + + static RubyString generateFragmentNew(ThreadContext context, Session session, IRubyObject object) { + GeneratorState state = session.getState(context); + IRubyObject result = object.callMethod(context, "to_json", state); + if (result instanceof RubyString) return (RubyString) result; + throw context.runtime.newTypeError("to_json must return a String"); + } + + static void generateFragment(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + RubyString result = generateFragmentNew(context, session, object); + ByteList bytes = result.getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + } - @Override - void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString str = object.asString(); - STRING_HANDLER.generate(context, session, str, buffer); + private static class SymbolHandler extends Handler { + @Override + int guessSize(ThreadContext context, Session session, RubySymbol object) { + GeneratorState state = session.getState(context); + if (state.strict()) { + return STRING_HANDLER.guessSize(context, session, object.asString()); + } else { + return GENERIC_HANDLER.guessSize(context, session, object); } - }; + } - /** - * A handler that simply calls #to_json(state) on the - * given object. - */ - static final Handler GENERIC_HANDLER = - new Handler() { - @Override - RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - GeneratorState state = session.getState(context); - if (state.strict()) { - if (state.getAsJSON() != null ) { - IRubyObject value = state.getAsJSON().call(context, object); - Handler handler = getHandlerFor(context.runtime, value); - if (handler == GENERIC_HANDLER) { - throw Utils.buildGeneratorError(context, object, value + " returned by as_json not allowed in JSON").toThrowable(); - } - return handler.generateNew(context, session, value); - } - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } else if (object.respondsTo("to_json")) { - IRubyObject result = object.callMethod(context, "to_json", state); - if (result instanceof RubyString) return (RubyString)result; - throw context.runtime.newTypeError("to_json must return a String"); - } else { - return OBJECT_HANDLER.generateNew(context, session, object); + @Override + void generate(ThreadContext context, Session session, RubySymbol object, OutputStream buffer) throws IOException { + generateSymbol(context, session, object, buffer); + } + } + + static void generateSymbol(ThreadContext context, Session session, RubySymbol object, OutputStream buffer) throws IOException { + GeneratorState state = session.getState(context); + if (state.strict()) { + STRING_HANDLER.generate(context, session, object.asString(), buffer); + } else { + GENERIC_HANDLER.generate(context, session, object, buffer); + } + } + + private static class ObjectHandler extends Handler { + @Override + RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { + return generateObjectNew(context, session, object); + } + + @Override + void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + generateObject(context, session, object, buffer); + } + } + + static RubyString generateObjectNew(ThreadContext context, Session session, IRubyObject object) { + RubyString str = object.asString(); + return STRING_HANDLER.generateNew(context, session, str); + } + + static void generateObject(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + generateString(context, session, object.asString(), buffer); + } + + private static class GenericHandler extends Handler { + @Override + RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { + return generateGenericNew(context, session, object); + } + + @Override + void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + generateGeneric(context, session, object, buffer); + } + } + + static RubyString generateGenericNew(ThreadContext context, Session session, IRubyObject object) { + GeneratorState state = session.getState(context); + if (state.strict()) { + if (state.getAsJSON() != null ) { + IRubyObject value = state.getAsJSON().call(context, object); + Handler handler = getHandlerFor(context.runtime, value); + if (handler == GENERIC_HANDLER) { + throw Utils.buildGeneratorError(context, object, value + " returned by as_json not allowed in JSON").toThrowable(); } + return handler.generateNew(context, session, value); } + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); + } else if (object.respondsTo("to_json")) { + IRubyObject result = object.callMethod(context, "to_json", state); + if (result instanceof RubyString) return (RubyString)result; + throw context.runtime.newTypeError("to_json must return a String"); + } else { + return OBJECT_HANDLER.generateNew(context, session, object); + } + } - @Override - void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString result = generateNew(context, session, object); - ByteList bytes = result.getByteList(); - buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); - } - }; + static void generateGeneric(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + RubyString result = generateGenericNew(context, session, object); + ByteList bytes = result.getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + } } diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index ec944646..dc07ffa9 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -565,8 +565,8 @@ public int increaseDepth(ThreadContext context) { return depth; } - public void decreaseDepth() { - --depth; + public int decreaseDepth() { + return --depth; } /** diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 68fd81e3..d178d0bd 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -5,139 +5,291 @@ */ package json.ext; +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; +import org.jruby.Ruby; +import org.jruby.RubyException; +import org.jruby.RubyString; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.util.ByteList; +import org.jruby.util.StringSupport; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; /** * An encoder that reads from the given source and outputs its representation * to another ByteList. The source string is fully checked for UTF-8 validity, * and throws a GeneratorError if any problem is found. */ -final class StringEncoder extends ByteListTranscoder { - private final boolean asciiOnly, scriptSafe; +class StringEncoder extends ByteListTranscoder { + protected static final int CHAR_LENGTH_MASK = 7; + private static final byte[] BACKSLASH_DOUBLEQUOTE = {'\\', '"'}; + private static final byte[] BACKSLASH_BACKSLASH = {'\\', '\\'}; + private static final byte[] BACKSLASH_FORWARDSLASH = {'\\', '/'}; + private static final byte[] BACKSLASH_B = {'\\', 'b'}; + private static final byte[] BACKSLASH_F = {'\\', 'f'}; + private static final byte[] BACKSLASH_N = {'\\', 'n'}; + private static final byte[] BACKSLASH_R = {'\\', 'r'}; + private static final byte[] BACKSLASH_T = {'\\', 't'}; + + static final byte[] ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - private OutputStream out; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + static final byte[] ASCII_ONLY_ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, + }; + + static final byte[] SCRIPT_SAFE_ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, + }; + + private static final byte[] BACKSLASH_U2028 = "\\u2028".getBytes(StandardCharsets.US_ASCII); + private static final byte[] BACKSLASH_U2029 = "\\u2029".getBytes(StandardCharsets.US_ASCII); + + protected final byte[] escapeTable; + + OutputStream out; // Escaped characters will reuse this array, to avoid new allocations // or appending them byte-by-byte - private final byte[] aux = + protected final byte[] aux = new byte[] {/* First Unicode character */ '\\', 'u', 0, 0, 0, 0, /* Second unicode character (for surrogate pairs) */ '\\', 'u', 0, 0, 0, 0, /* "\X" characters */ '\\', 0}; - // offsets on the array above - private static final int ESCAPE_UNI1_OFFSET = 0; - private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6; - private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6; - /** Array used for code point decomposition in surrogates */ - private final char[] utf16 = new char[2]; - - private static final byte[] HEX = + + protected static final byte[] HEX = new byte[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - StringEncoder(boolean asciiOnly, boolean scriptSafe) { - this.asciiOnly = asciiOnly; - this.scriptSafe = scriptSafe; + StringEncoder(boolean scriptSafe) { + this(scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE); } - void encode(ThreadContext context, ByteList src, OutputStream out) throws IOException { - init(src); - this.out = out; - append('"'); - while (hasNext()) { - handleChar(readUtf8Char(context)); - } - quoteStop(pos); - append('"'); + StringEncoder(byte[] escapeTable) { + this.escapeTable = escapeTable; } - void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws IOException { - init(src); - this.out = out; + // C: generate_json_string + void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException { + object = ensureValidEncoding(context, object); + + ByteList byteList = object.getByteList(); + init(byteList); + out = buffer; append('"'); - while (hasNext()) { - handleChar(readASCIIChar()); + switch (object.scanForCodeRange()) { + case StringSupport.CR_7BIT: + case StringSupport.CR_VALID: + encode(byteList); + break; + default: + throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); } quoteStop(pos); append('"'); } - protected void append(int b) throws IOException { - out.write(b); + static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { + Encoding encoding = str.getEncoding(); + + if (encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE) { + return str; + } + + return tryWeirdEncodings(context, str, encoding); } - protected void append(byte[] origin, int start, int length) throws IOException { - out.write(origin, start, length); + private static RubyString tryWeirdEncodings(ThreadContext context, RubyString str, Encoding encoding) { + Ruby runtime = context.runtime; + + RubyString utf8String; + + if (encoding == ASCIIEncoding.INSTANCE) { + utf8String = str.strDup(runtime); + utf8String.setEncoding(UTF8Encoding.INSTANCE); + switch (utf8String.getCodeRange()) { + case StringSupport.CR_7BIT: + return utf8String; + case StringSupport.CR_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8String; + } + } + + try { + str = (RubyString) str.encode(context, runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + } catch (RaiseException re) { + RubyException exc = Utils.buildGeneratorError(context, str, re.getMessage()); + exc.setCause(re.getException()); + throw exc.toThrowable(); + } + + return str; } - private void handleChar(int c) throws IOException { - switch (c) { - case '"': - case '\\': - escapeChar((char)c); - break; - case '\n': - escapeChar('n'); - break; - case '\r': - escapeChar('r'); - break; - case '\t': - escapeChar('t'); - break; - case '\f': - escapeChar('f'); - break; - case '\b': - escapeChar('b'); - break; - case '/': - if(scriptSafe) { - escapeChar((char)c); - break; + // C: convert_UTF8_to_JSON + void encode(ByteList src) throws IOException { + byte[] hexdig = HEX; + byte[] scratch = aux; + byte[] escapeTable = this.escapeTable; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = escapeTable[ch]; + /* JSON encoding */ + + if (ch_len > 0) { + switch (ch_len) { + case 9: { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + break; + } + case 11: { + int b2 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 1]); + if (b2 == 0x80) { + int b3 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 2]); + if (b3 == 0xA8) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + append(BACKSLASH_U2028, 0, 6); + break; + } else if (b3 == 0xA9) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + append(BACKSLASH_U2029, 0, 6); + break; + } + } + ch_len = 3; + // fallthrough + } + default: + pos += ch_len; + break; + } + } else { + pos++; } - case 0x2028: - case 0x2029: - if (scriptSafe) { - quoteStop(charStart); - escapeUtf8Char(c); + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } + + protected int flushPos(int pos, int beg, byte[] ptrBytes, int ptr, int size) throws IOException { + if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } + return pos + size; + } + + protected void escapeAscii(int ch, byte[] scratch, byte[] hexdig) throws IOException { + switch (ch) { + case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; + case '\\': appendEscape(BACKSLASH_BACKSLASH); break; + case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; + case '\b': appendEscape(BACKSLASH_B); break; + case '\f': appendEscape(BACKSLASH_F); break; + case '\n': appendEscape(BACKSLASH_N); break; + case '\r': appendEscape(BACKSLASH_R); break; + case '\t': appendEscape(BACKSLASH_T); break; + default: { + scratch[2] = '0'; + scratch[3] = '0'; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + append(scratch, 0, 6); break; } - default: - if (c >= 0x20 && c <= 0x7f || - (c >= 0x80 && !asciiOnly)) { - quoteStart(); - } else { - quoteStop(charStart); - escapeUtf8Char(c); - } } } - private void escapeChar(char c) throws IOException { - quoteStop(charStart); - aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c; - append(aux, ESCAPE_CHAR_OFFSET, 2); + private void appendEscape(byte[] escape) throws IOException { + append(escape, 0, 2); } - private void escapeUtf8Char(int codePoint) throws IOException { - int numChars = Character.toChars(codePoint, utf16, 0); - escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2); - if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2); - append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars); + protected void append(int b) throws IOException { + out.write(b); } - private void escapeCodeUnit(char c, int auxOffset) { - for (int i = 0; i < 4; i++) { - aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf]; - } + protected void append(byte[] origin, int start, int length) throws IOException { + out.write(origin, start, length); } @Override diff --git a/java/src/json/ext/StringEncoderAsciiOnly.java b/java/src/json/ext/StringEncoderAsciiOnly.java new file mode 100644 index 00000000..de1af284 --- /dev/null +++ b/java/src/json/ext/StringEncoderAsciiOnly.java @@ -0,0 +1,116 @@ +/* + * This code is copyrighted work by Daniel Luz . + * + * Distributed under the Ruby license: https://www.ruby-lang.org/en/about/license.txt + */ +package json.ext; + +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; +import org.jruby.RubyException; +import org.jruby.RubyString; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.util.ByteList; +import org.jruby.util.StringSupport; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + +/** + * An encoder that reads from the given source and outputs its representation + * to another ByteList. The source string is fully checked for UTF-8 validity, + * and throws a GeneratorError if any problem is found. + */ +final class StringEncoderAsciiOnly extends StringEncoder { + StringEncoderAsciiOnly(boolean scriptSafe) { + super(scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE); + } + + // C: convert_UTF8_to_ASCII_only_JSON + void encode(ByteList src) throws IOException { + byte[] hexdig = HEX; + byte[] scratch = aux; + byte[] escapeTable = this.escapeTable; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = escapeTable[ch]; + + if (ch_len != 0) { + switch (ch_len) { + case 9: { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + break; + } + default: { + int wchar = 0; + ch_len = ch_len & CHAR_LENGTH_MASK; + + switch(ch_len) { + case 2: + wchar = ptrBytes[ptr + pos] & 0x1F; + break; + case 3: + wchar = ptrBytes[ptr + pos] & 0x0F; + break; + case 4: + wchar = ptrBytes[ptr + pos] & CHAR_LENGTH_MASK; + break; + } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (ptrBytes[ptr + pos +i] & 0x3F); + } + + beg = pos = flushPos(pos, beg, ptrBytes, ptr, ch_len); + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + append(scratch, 0, 6); + } else { + int hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (wchar >> 10); + lo = 0xDC00 + (wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + append(scratch, 0, 12); + } + + break; + } + } + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } +} diff --git a/java/src/json/ext/Utils.java b/java/src/json/ext/Utils.java index 87139cdb..38491d2e 100644 --- a/java/src/json/ext/Utils.java +++ b/java/src/json/ext/Utils.java @@ -16,6 +16,9 @@ import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; +import java.io.IOException; +import java.io.OutputStream; + /** * Library of miscellaneous utility functions */ @@ -81,11 +84,25 @@ static byte[] repeat(ByteList a, int n) { static byte[] repeat(byte[] a, int begin, int length, int n) { if (length == 0) return ByteList.NULL_ARRAY; + + if (n == 1 && begin == 0 && length == a.length) return a; + int resultLen = length * n; byte[] result = new byte[resultLen]; for (int pos = 0; pos < resultLen; pos += length) { System.arraycopy(a, begin, result, pos, length); } + return result; } + + static void repeatWrite(OutputStream out, ByteList a, int n) throws IOException { + byte[] bytes = a.unsafeBytes(); + int begin = a.begin(); + int length = a.length(); + + for (int i = 0; i < n; i++) { + out.write(bytes, begin, length); + } + } }