Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
512-bit experiment
  • Loading branch information
steveatgh committed Aug 11, 2023
commit 940432867a05d1529edafa2ec0322ec0946807b7
40 changes: 15 additions & 25 deletions src/main/java/org/simdjson/CharactersClassifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,33 @@ class CharactersClassifier {

private static final byte LOW_NIBBLE_MASK = 0x0f;
private static final ByteVector WHITESPACE_TABLE = ByteVector.fromArray(
ByteVector.SPECIES_256,
ByteVector.SPECIES_512,
new byte[]{
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100,
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100,
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100,
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100
},
0
);
private static final ByteVector OP_TABLE = ByteVector.fromArray(
ByteVector.SPECIES_256,
ByteVector.SPECIES_512,
new byte[]{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0
},
0
);

JsonCharacterBlock classify(ByteVector chunk0, ByteVector chunk1) {
VectorShuffle<Byte> chunk0Low = extractLowNibble(chunk0).toShuffle();
VectorShuffle<Byte> chunk1Low = extractLowNibble(chunk1).toShuffle();

long whitespace = eq(
chunk0,
WHITESPACE_TABLE.rearrange(chunk0Low),
chunk1,
WHITESPACE_TABLE.rearrange(chunk1Low)
);

ByteVector curlified0 = curlify(chunk0);
ByteVector curlified1 = curlify(chunk1);
long op = eq(
curlified0,
OP_TABLE.rearrange(chunk0Low),
curlified1,
OP_TABLE.rearrange(chunk1Low)
);
JsonCharacterBlock classify(ByteVector chunk) {
VectorShuffle<Byte> chunkLow = extractLowNibble(chunk).toShuffle();

long whitespace = eq(chunk, WHITESPACE_TABLE.rearrange(chunkLow));

ByteVector curlified = curlify(chunk);
long op = eq(curlified, OP_TABLE.rearrange(chunkLow));

return new JsonCharacterBlock(whitespace, op);
}
Expand All @@ -55,9 +47,7 @@ private ByteVector curlify(ByteVector vector) {
return vector.or((byte) 0x20);
}

private long eq(ByteVector chunk0, ByteVector mask0, ByteVector chunk1, ByteVector mask1) {
long rLo = chunk0.eq(mask0).toLong();
long rHi = chunk1.eq(mask1).toLong();
return rLo | (rHi << 32);
private long eq(ByteVector chunk, ByteVector mask) {
return chunk.eq(mask).toLong();
}
}
14 changes: 6 additions & 8 deletions src/main/java/org/simdjson/JsonStringScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,22 @@ class JsonStringScanner {
private long prevEscaped = 0;

JsonStringScanner() {
VectorSpecies<Byte> species = ByteVector.SPECIES_256;
VectorSpecies<Byte> species = ByteVector.SPECIES_512;
this.backslashMask = ByteVector.broadcast(species, (byte) '\\');
this.quoteMask = ByteVector.broadcast(species, (byte) '"');
}

JsonStringBlock next(ByteVector chunk0, ByteVector chunk1) {
long backslash = eq(chunk0, chunk1, backslashMask);
JsonStringBlock next(ByteVector chunk) {
long backslash = eq(chunk, backslashMask);
long escaped = findEscaped(backslash);
long quote = eq(chunk0, chunk1, quoteMask) & ~escaped;
long quote = eq(chunk, quoteMask) & ~escaped;
long inString = prefixXor(quote) ^ prevInString;
prevInString = inString >> 63;
return new JsonStringBlock(quote, inString);
}

private long eq(ByteVector chunk0, ByteVector chunk1, ByteVector mask) {
long rLo = chunk0.eq(mask).toLong();
long rHi = chunk1.eq(mask).toLong();
return rLo | (rHi << 32);
private long eq(ByteVector chunk, ByteVector mask) {
return chunk.eq(mask).toLong();
}

private long findEscaped(long backslash) {
Expand Down
17 changes: 7 additions & 10 deletions src/main/java/org/simdjson/StructuralIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import jdk.incubator.vector.ByteVector;

import static jdk.incubator.vector.ByteVector.SPECIES_256;
import static jdk.incubator.vector.ByteVector.SPECIES_512;
import static jdk.incubator.vector.VectorOperators.UNSIGNED_LE;

class StructuralIndexer {
Expand All @@ -22,17 +22,16 @@ class StructuralIndexer {
}

void step(byte[] buffer, int offset, int blockIndex) {
ByteVector chunk0 = ByteVector.fromArray(SPECIES_256, buffer, offset);
ByteVector chunk1 = ByteVector.fromArray(SPECIES_256, buffer, offset + 32);
ByteVector chunk = ByteVector.fromArray(SPECIES_512, buffer, offset);

JsonStringBlock strings = stringScanner.next(chunk0, chunk1);
JsonCharacterBlock characters = classifier.classify(chunk0, chunk1);
JsonStringBlock strings = stringScanner.next(chunk);
JsonCharacterBlock characters = classifier.classify(chunk);

long scalar = characters.scalar();
long nonQuoteScalar = scalar & ~strings.quote();
long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar;
prevScalar = nonQuoteScalar >>> 63;
long unescaped = lteq(chunk0, chunk1, (byte) 0x1F);
long unescaped = lteq(chunk, (byte) 0x1F);
// TODO: utf-8 validation
long potentialScalarStart = scalar & ~followsNonQuoteScalar;
long potentialStructuralStart = characters.op() | potentialScalarStart;
Expand All @@ -41,10 +40,8 @@ void step(byte[] buffer, int offset, int blockIndex) {
unescapedCharsError |= strings.nonQuoteInsideString(unescaped);
}

private long lteq(ByteVector chunk0, ByteVector chunk1, byte scalar) {
long rLo = chunk0.compare(UNSIGNED_LE, scalar).toLong();
long rHi = chunk1.compare(UNSIGNED_LE, scalar).toLong();
return rLo | (rHi << 32);
private long lteq(ByteVector chunk, byte scalar) {
return chunk.compare(UNSIGNED_LE, scalar).toLong();
}

void finish(int blockIndex) {
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/org/simdjson/TapeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
import static org.simdjson.Tape.START_OBJECT;
import static org.simdjson.Tape.STRING;
import static org.simdjson.Tape.TRUE_VALUE;
import static jdk.incubator.vector.ByteVector.SPECIES_256;
import static jdk.incubator.vector.ByteVector.SPECIES_512;

class TapeBuilder {

private static final byte SPACE = 0x20;
private static final byte BACKSLASH = '\\';
private static final byte QUOTE = '"';
private static final int BYTES_PROCESSED = 32;
// private static final int BYTES_PROCESSED = 32;
private static final int BYTES_PROCESSED = 64;
private static final byte[] ESCAPE_MAP = new byte[]{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -198,7 +199,7 @@ private void visitString(byte[] buffer, int idx) {
int src = idx + 1;
int dst = stringBufferIdx + Integer.BYTES;
while (true) {
ByteVector srcVec = ByteVector.fromArray(SPECIES_256, buffer, src);
ByteVector srcVec = ByteVector.fromArray(SPECIES_512, buffer, src);
srcVec.intoArray(stringBuffer, dst);
long backslashBits = srcVec.eq(BACKSLASH).toLong();
long quoteBits = srcVec.eq(QUOTE).toLong();
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/BenchmarkCorrectnessTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ public void countUniqueTwitterUsersWithDefaultProfile() throws IOException {
private static byte[] loadTwitterJson() throws IOException {
try (InputStream is = BenchmarkCorrectnessTest.class.getResourceAsStream("/twitter.json")) {
return is.readAllBytes();
}
}
}
}
9 changes: 4 additions & 5 deletions src/test/java/org/simdjson/CharactersClassifierTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.StringUtils.chunk0;
import static org.simdjson.StringUtils.chunk1;
import static org.simdjson.StringUtils.chunk;

public class CharactersClassifierTest {

Expand All @@ -16,7 +15,7 @@ public void classifiesOperators() {
String str = "a{bc}1:2,3[efg]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";

// when
JsonCharacterBlock block = classifier.classify(chunk0(str), chunk1(str));
JsonCharacterBlock block = classifier.classify(chunk(str));

// then
assertThat(block.op()).isEqualTo(0x4552);
Expand All @@ -39,7 +38,7 @@ public void classifiesControlCharactersAsOperators() {
}, UTF_8);

// when
JsonCharacterBlock block = classifier.classify(chunk0(str), chunk1(str));
JsonCharacterBlock block = classifier.classify(chunk(str));

// then
assertThat(block.op()).isEqualTo(0x28);
Expand All @@ -53,7 +52,7 @@ public void classifiesWhitespaces() {
String str = "a bc\t1\n2\r3efgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";

// when
JsonCharacterBlock block = classifier.classify(chunk0(str), chunk1(str));
JsonCharacterBlock block = classifier.classify(chunk(str));

// then
assertThat(block.whitespace()).isEqualTo(0x152);
Expand Down
25 changes: 12 additions & 13 deletions src/test/java/org/simdjson/JsonStringScannerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import org.junit.jupiter.params.provider.ValueSource;

import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.StringUtils.chunk0;
import static org.simdjson.StringUtils.chunk1;
import static org.simdjson.StringUtils.chunk;
import static org.simdjson.StringUtils.padWithSpaces;

public class JsonStringScannerTest {
Expand All @@ -18,7 +17,7 @@ public void testUnquotedString() {
String str = padWithSpaces("abc 123");

// when
JsonStringBlock block = stringScanner.next(chunk0(str), chunk1(str));
JsonStringBlock block = stringScanner.next(chunk(str));

// then
assertThat(block.quote()).isEqualTo(0);
Expand All @@ -31,7 +30,7 @@ public void testQuotedString() {
String str = padWithSpaces("\"abc 123\"");

// when
JsonStringBlock block = stringScanner.next(chunk0(str), chunk1(str));
JsonStringBlock block = stringScanner.next(chunk(str));

// then
assertThat(block.quote()).isEqualTo(0x101);
Expand All @@ -44,7 +43,7 @@ public void testStartingQuotes() {
String str = padWithSpaces("\"abc 123");

// when
JsonStringBlock block = stringScanner.next(chunk0(str), chunk1(str));
JsonStringBlock block = stringScanner.next(chunk(str));

// then
assertThat(block.quote()).isEqualTo(0x1);
Expand All @@ -58,8 +57,8 @@ public void testQuotedStringSpanningMultipleBlocks() {
String str1 = " c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 d0 d1 d2 d3 d4 d5 d6 d7 d8 d\" def";

// when
JsonStringBlock firstBlock = stringScanner.next(chunk0(str0), chunk1(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk0(str1), chunk1(str1));
JsonStringBlock firstBlock = stringScanner.next(chunk(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk(str1));

// then
assertThat(firstBlock.quote()).isEqualTo(0x10);
Expand All @@ -77,7 +76,7 @@ public void testEscapedQuote(String str) {
String padded = padWithSpaces(str);

// when
JsonStringBlock block = stringScanner.next(chunk0(padded), chunk1(padded));
JsonStringBlock block = stringScanner.next(chunk(padded));

// then
assertThat(block.quote()).isEqualTo(0);
Expand All @@ -91,8 +90,8 @@ public void testEscapedQuoteSpanningMultipleBlocks() {
String str1 = padWithSpaces("\"def");

// when
JsonStringBlock firstBlock = stringScanner.next(chunk0(str0), chunk1(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk0(str1), chunk1(str1));
JsonStringBlock firstBlock = stringScanner.next(chunk(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk(str1));

// then
assertThat(firstBlock.quote()).isEqualTo(0);
Expand All @@ -110,7 +109,7 @@ public void testUnescapedQuote(String str) {
String padded = padWithSpaces(str);

// when
JsonStringBlock block = stringScanner.next(chunk0(padded), chunk1(padded));
JsonStringBlock block = stringScanner.next(chunk(padded));

// then
assertThat(block.quote()).isEqualTo(0x1L << str.indexOf('"'));
Expand All @@ -124,8 +123,8 @@ public void testUnescapedQuoteSpanningMultipleBlocks() {
String str1 = padWithSpaces("\\\"abc");

// when
JsonStringBlock firstBlock = stringScanner.next(chunk0(str0), chunk1(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk0(str1), chunk1(str1));
JsonStringBlock firstBlock = stringScanner.next(chunk(str0));
JsonStringBlock secondBlock = stringScanner.next(chunk(str1));

// then
assertThat(firstBlock.quote()).isEqualTo(0);
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/SimdJsonParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void testEmptyObject() {
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toBytes("{}");

// when
// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/org/simdjson/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ static String padWithSpaces(String str) {
return new String(padded, UTF_8);
}


static ByteVector chunk(String str) {
return ByteVector.fromArray(ByteVector.SPECIES_512, str.getBytes(UTF_8), 0);
}

static ByteVector chunk0(String str) {
return ByteVector.fromArray(ByteVector.SPECIES_256, str.getBytes(UTF_8), 0);
}
Expand Down