Commit 49ee30a2 authored by Wouter van Oortmerssen's avatar Wouter van Oortmerssen Committed by GitHub

Merge pull request #3978 from TGIshib/key

Find by key on C# and Java (2)
parents 223ebebb 7c69c5dc
......@@ -131,6 +131,36 @@ object are prefixed with `Get`, e.g.:
monster.GetPos(preconstructedPos);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Storing dictionaries in a FlatBuffer
FlatBuffers doesn't support dictionaries natively, but there is support to
emulate their behavior with vectors and binary search, which means you
can have fast lookups directly from a FlatBuffer without having to unpack
your data into a `Dictionary` or similar.
To use it:
- Designate one of the fields in a table as they "key" field. You do this
by setting the `key` attribute on this field, e.g.
`name:string (key)`.
You may only have one key field, and it must be of string or scalar type.
- Write out tables of this type as usual, collect their offsets in an
array.
- Instead of calling standard generated method,
e.g.: `Monster.createTestarrayoftablesVector`,
call `CreateMySortedVectorOfTables` in C# or
`createSortedVectorOfTables` (from the `FlatBufferBuilder` object) in Java,
which will first sort all offsets such that the tables they refer to
are sorted by the key field, then serialize it.
- Now when you're accessing the FlatBuffer, you can use `LookupByKey`
to access elements of the vector, e.g.:
`Monster.lookupByKey(tablesVectorOffset, "Frodo", dataBuffer)`,
which returns an object of the corresponding table type,
or `null` if not found.
`LookupByKey` performs a binary search, so should have a similar speed to
`Dictionary`, though may be faster because of better caching. `LookupByKey`
only works if the vector has been sorted, it will likely not find elements
if it hasn't been sorted.
## Text parsing
There currently is no support for parsing text (Schema's and JSON) directly
......
......@@ -389,6 +389,31 @@ public class FlatBufferBuilder {
return copy;
}
/**
* Create a vector of tables.
*
* @param offsets Offsets of the tables.
* @return Returns offset of the vector.
*/
public int createVectorOfTables(int[] offsets) {
notNested();
startVector(Constants.SIZEOF_INT, offsets.length, Constants.SIZEOF_INT);
for(int i = offsets.length - 1; i >= 0; i--) addOffset(offsets[i]);
return endVector();
}
/**
* Create a vector of sorted by the key tables.
*
* @param obj Instance of the table subclass.
* @param offsets Offsets of the tables.
* @return Returns offset of the sorted vector.
*/
public <T extends Table> int createSortedVectorOfTables(T obj, int[] offsets) {
obj.sortTables(offsets, bb);
return createVectorOfTables(offsets);
}
/**
* Encode the string `s` in the buffer using UTF-8. If {@code s} is
* already a {@link CharBuffer}, this method is allocation free.
......
......@@ -61,6 +61,11 @@ public class Table {
return vtable_offset < bb.getShort(vtable) ? bb.getShort(vtable + vtable_offset) : 0;
}
protected static int __offset(int vtable_offset, int offset, ByteBuffer bb) {
int vtable = bb.array().length - offset;
return bb.getShort(vtable + vtable_offset - bb.getInt(vtable)) + vtable;
}
/**
* Retrieve a relative offset.
*
......@@ -70,6 +75,10 @@ public class Table {
protected int __indirect(int offset) {
return offset + bb.getInt(offset);
}
protected static int __indirect(int offset, ByteBuffer bb) {
return offset + bb.getInt(offset);
}
/**
* Create a Java `String` from UTF-8 data stored inside the FlatBuffer.
......@@ -188,6 +197,72 @@ public class Table {
}
return true;
}
/**
* Sort tables by the key.
*
* @param offsets An 'int' indexes of the tables into the bb.
* @param bb A {@code ByteBuffer} to get the tables.
*/
protected void sortTables(int[] offsets, ByteBuffer bb) {
Integer[] off = new Integer[offsets.length];
for (int i = 0; i < offsets.length; i++) off[i] = offsets[i];
Arrays.sort(off, (Integer o1, Integer o2) -> keysCompare(o1, o2, bb));
for (int i = 0; i < offsets.length; i++) offsets[i] = off[i];
}
/**
* Compare two tables by the key.
*
* @param o1 An 'Integer' index of the first key into the bb.
* @param o2 An 'Integer' index of the second key into the bb.
* @param bb A {@code ByteBuffer} to get the keys.
*/
protected int keysCompare(Integer o1, Integer o2, ByteBuffer bb) { return 0; }
/**
* Compare two strings in the buffer.
*
* @param offset_1 An 'int' index of the first string into the bb.
* @param offset_2 An 'int' index of the second string into the bb.
* @param bb A {@code ByteBuffer} to get the strings.
*/
protected static int compareStrings(int offset_1, int offset_2, ByteBuffer bb) {
offset_1 += bb.getInt(offset_1);
offset_2 += bb.getInt(offset_2);
int len_1 = bb.getInt(offset_1);
int len_2 = bb.getInt(offset_2);
int startPos_1 = offset_1 + SIZEOF_INT;
int startPos_2 = offset_2 + SIZEOF_INT;
int len = Math.min(len_1, len_2);
byte[] bbArray = bb.array();
for(int i = 0; i < len; i++) {
if (bbArray[i + startPos_1] != bbArray[i + startPos_2])
return bbArray[i + startPos_1] - bbArray[i + startPos_2];
}
return len_1 - len_2;
}
/**
* Compare string from the buffer with the 'String' object.
*
* @param offset_1 An 'int' index of the first string into the bb.
* @param key Second string as a byte array.
* @param bb A {@code ByteBuffer} to get the first string.
*/
protected static int compareStrings(int offset_1, byte[] key, ByteBuffer bb) {
offset_1 += bb.getInt(offset_1);
int len_1 = bb.getInt(offset_1);
int len_2 = key.length;
int startPos_1 = offset_1 + Constants.SIZEOF_INT;
int len = Math.min(len_1, len_2);
byte[] bbArray = bb.array();
for (int i = 0; i < len; i++) {
if (bbArray[i + startPos_1] != key[i])
return bbArray[i + startPos_1] - key[i];
}
return len_1 - len_2;
}
}
/// @endcond
......@@ -295,6 +295,18 @@ namespace FlatBuffers
PutInt(_vectorNumElems);
return new VectorOffset(Offset);
}
/// <summary>
/// Creates a vector of tables.
/// </summary>
/// <param name="offsets">Offsets of the tables.</param>
public VectorOffset CreateVectorOfTables<T>(Offset<T>[] offsets) where T : class
{
NotNested();
StartVector(sizeof(int), offsets.Length, sizeof(int));
for (int i = offsets.Length - 1; i >= 0; i--) AddOffset(offsets[i].Value);
return EndVector();
}
/// @cond FLATBUFFERS_INTENRAL
public void Nested(int obj)
......
......@@ -37,11 +37,22 @@ namespace FlatBuffers
return vtableOffset < bb.GetShort(vtable) ? (int)bb.GetShort(vtable + vtableOffset) : 0;
}
protected static int __offset(int vtableOffset, int offset, ByteBuffer bb)
{
int vtable = bb.Length - offset;
return (int)bb.GetShort(vtable + vtableOffset - bb.GetInt(vtable)) + vtable;
}
// Retrieve the relative offset stored at "offset"
protected int __indirect(int offset)
{
return offset + bb.GetInt(offset);
}
protected static int __indirect(int offset, ByteBuffer bb)
{
return offset + bb.GetInt(offset);
}
// Create a .NET String from UTF-8 data stored inside the flatbuffer.
protected string __string(int offset)
......@@ -103,7 +114,40 @@ namespace FlatBuffers
return true;
}
// Compare strings in the ByteBuffer.
protected static int CompareStrings(int offset_1, int offset_2, ByteBuffer bb)
{
offset_1 += bb.GetInt(offset_1);
offset_2 += bb.GetInt(offset_2);
var len_1 = bb.GetInt(offset_1);
var len_2 = bb.GetInt(offset_2);
var startPos_1 = offset_1 + sizeof(int);
var startPos_2 = offset_2 + sizeof(int);
var len = Math.Min(len_1, len_2);
byte[] bbArray = bb.Data;
for(int i = 0; i < len; i++) {
if (bbArray[i + startPos_1] != bbArray[i + startPos_2])
return bbArray[i + startPos_1] - bbArray[i + startPos_2];
}
return len_1 - len_2;
}
// Compare string from the ByteBuffer with the string object
protected static int CompareStrings(int offset_1, byte[] key, ByteBuffer bb)
{
offset_1 += bb.GetInt(offset_1);
var len_1 = bb.GetInt(offset_1);
var len_2 = key.Length;
var startPos_1 = offset_1 + sizeof(int);
var len = Math.Min(len_1, len_2);
byte[] bbArray = bb.Data;
for (int i = 0; i < len; i++) {
if (bbArray[i + startPos_1] != key[i])
return bbArray[i + startPos_1] - key[i];
}
return len_1 - len_2;
}
}
}
......@@ -669,6 +669,89 @@ void GenStructBody(const StructDef &struct_def, std::string *code_ptr, const cha
}
}
std::string GenByteBufferLength(const char *bb_name) {
std::string bb_len = bb_name;
if (lang_.language == IDLOptions::kCSharp) bb_len += ".Length";
else bb_len += ".array().length";
return bb_len;
}
std::string GenOffsetGetter(flatbuffers::FieldDef *key_field, const char *num = nullptr) {
std::string key_offset = "";
key_offset += "__offset(" +
NumToString(key_field->value.offset) + ", ";
if (num) {
key_offset += num;
key_offset += (lang_.language == IDLOptions::kCSharp ?
".Value, builder.DataBuffer)" : ", _bb)");
}
else {
key_offset += GenByteBufferLength("bb");
key_offset += " - tableOffset, bb)";
}
return key_offset;
}
std::string GenLookupKeyGetter(flatbuffers::FieldDef *key_field) {
std::string key_getter = " ";
key_getter += "tableOffset = __indirect(vectorLocation + 4 * (start + middle)";
key_getter += ", bb);\n ";
if (key_field->value.type.base_type == BASE_TYPE_STRING) {
key_getter += "comp = " + FunctionStart('C') + "ompareStrings(";
key_getter += GenOffsetGetter(key_field);
key_getter += ", byteKey, bb);\n";
}
else {
auto get_val = GenGetter(key_field->value.type) +
"(" + GenOffsetGetter(key_field) + ")";
if (lang_.language == IDLOptions::kCSharp) {
key_getter += "comp = " + get_val + ".CompateTo(key);\n";
}
else {
key_getter += GenTypeGet(key_field->value.type) + " val = ";
key_getter += get_val + ";\n";
key_getter += " comp = val > key ? 1 : val < key ? -1 : 0;\n";
}
}
return key_getter;
}
std::string GenKeyGetter(flatbuffers::FieldDef *key_field) {
std::string key_getter = "";
auto data_buffer = (lang_.language == IDLOptions::kCSharp) ?
"builder.DataBuffer" : "_bb";
if (key_field->value.type.base_type == BASE_TYPE_STRING) {
if (lang_.language == IDLOptions::kJava)
key_getter += " return ";
key_getter += FunctionStart('C') + "ompareStrings(";
key_getter += GenOffsetGetter(key_field, "o1") + ", ";
key_getter += GenOffsetGetter(key_field, "o2") + ", " + data_buffer + ")";
if (lang_.language == IDLOptions::kJava)
key_getter += ";";
}
else {
auto field_getter = data_buffer + GenGetter(key_field->value.type).substr(2) +
"(" + GenOffsetGetter(key_field, "o1") + ")";
if (lang_.language == IDLOptions::kCSharp) {
key_getter += field_getter;
field_getter = data_buffer + GenGetter(key_field->value.type).substr(2) +
"(" + GenOffsetGetter(key_field, "o2") + ")";
key_getter += ".CompareTo(" + field_getter + ")";
}
else {
key_getter += "\n " + GenTypeGet(key_field->value.type) + " val_1 = ";
key_getter += field_getter + ";\n " + GenTypeGet(key_field->value.type);
key_getter += " val_2 = ";
field_getter = data_buffer + GenGetter(key_field->value.type).substr(2) +
"(" + GenOffsetGetter(key_field, "o2") + ")";
key_getter += field_getter + ";\n";
key_getter += " return val_1 > val_2 ? 1 : val_1 < val_2 ? -1 : 0;\n ";
}
}
return key_getter;
}
void GenStruct(StructDef &struct_def, std::string *code_ptr) {
if (struct_def.generated) return;
std::string &code = *code_ptr;
......@@ -960,6 +1043,7 @@ void GenStruct(StructDef &struct_def, std::string *code_ptr) {
}
}
code += "\n";
flatbuffers::FieldDef *key_field = nullptr;
if (struct_def.fixed) {
// create a struct constructor function
code += " public static " + GenOffsetType(struct_def) + " ";
......@@ -1048,6 +1132,7 @@ void GenStruct(StructDef &struct_def, std::string *code_ptr) {
it != struct_def.fields.vec.end(); ++it) {
auto &field = **it;
if (field.deprecated) continue;
if (field.key) key_field = &field;
code += " public static void " + FunctionStart('A') + "dd";
code += MakeCamel(field.name);
code += "(FlatBufferBuilder builder, ";
......@@ -1130,6 +1215,53 @@ void GenStruct(StructDef &struct_def, std::string *code_ptr) {
code += "); }\n";
}
}
if (struct_def.has_key) {
if (lang_.language == IDLOptions::kJava) {
code += "\n @Override\n protected int keysCompare(";
code += "Integer o1, Integer o2, ByteBuffer _bb) {";
code += GenKeyGetter(key_field);
code += " }\n";
}
else {
code += "\n public static VectorOffset ";
code += "CreateMySortedVectorOfTables(FlatBufferBuilder builder, ";
code += "Offset<" + struct_def.name + ">";
code += "[] offsets) {\n";
code += " Array.Sort(offsets, (Offset<" + struct_def.name +
"> o1, Offset<" + struct_def.name + "> o2) => " + GenKeyGetter(key_field);
code += ");\n";
code += " return builder.CreateVectorOfTables(offsets);\n }\n";
}
code += "\n public static " + struct_def.name + " " + FunctionStart('L');
code += "ookupByKey(" + GenVectorOffsetType();
code += " vectorOffset, " + GenTypeGet(key_field->value.type);
code += " key, ByteBuffer bb) {\n";
code += " byte[] byteKey = ";
if (lang_.language == IDLOptions::kJava)
code += "key.getBytes(StandardCharsets.UTF_8);\n";
else
code += "System.Text.Encoding.UTF8.GetBytes(key);\n";
code += " int vectorLocation = " + GenByteBufferLength("bb");
code += " - vectorOffset.Value;\n int span = ";
code += "bb." + FunctionStart('G') + "etInt(vectorLocation), ";
code += "middle, start = 0, comp, tableOffset; \n";
code += " vectorLocation += 4;\n";
code += " while (span != 0) {\n";
code += " int middle = span / 2;\n";
code += GenLookupKeyGetter(key_field);
code += " if (comp > 0) span = middle;\n";
code += " else if (comp < 0) {\n";
code += " middle++;\n";
code += " start += middle;\n";
code += " span -= middle;\n";
code += " }\n";
code += " else return new " + struct_def.name;
code += "().__init(tableOffset, bb);\n";
code += " }\n";
code += " return null;\n";
code += " }\n";
}
code += "}";
// Java does not need the closing semi-colon on class definitions.
code += (lang_.language != IDLOptions::kJava) ? ";" : "";
......
......@@ -39,6 +39,19 @@ namespace FlatBuffers.Test
// better for performance.
var fbb = new FlatBufferBuilder(1);
StringOffset[] names = { fbb.CreateString("Frodo"), fbb.CreateString("Barney"), fbb.CreateString("Wilma") };
Offset<Monster>[] off = new Offset<Monster>[3];
Monster.StartMonster(fbb);
Monster.AddName(fbb, names[0]);
off[0] = Monster.EndMonster(fbb);
Monster.StartMonster(fbb);
Monster.AddName(fbb, names[1]);
off[1] = Monster.EndMonster(fbb);
Monster.StartMonster(fbb);
Monster.AddName(fbb, names[2]);
off[2] = Monster.EndMonster(fbb);
var sortMons = Monster.CreateMySortedVectorOfTables(fbb, off);
// We set up the same values as monsterdata.json:
var str = fbb.CreateString("MyMonster");
......@@ -79,6 +92,7 @@ namespace FlatBuffers.Test
Monster.AddTest4(fbb, test4);
Monster.AddTestarrayofstring(fbb, testArrayOfString);
Monster.AddTestbool(fbb, false);
Monster.AddTestarrayoftables(fbb, sortMons);
var mon = Monster.EndMonster(fbb);
Monster.FinishMonsterBuffer(fbb, mon);
......@@ -102,6 +116,16 @@ namespace FlatBuffers.Test
// the mana field should retain its default value
Assert.AreEqual(monster.MutateMana((short)10), false);
Assert.AreEqual(monster.Mana, (short)150);
// Accessing a vector of sorted by the key tables
Assert.AreEqual(monster.GetTestarrayoftables(0).Name, "Barney");
Assert.AreEqual(monster.GetTestarrayoftables(1).Name, "Frodo");
Assert.AreEqual(monster.GetTestarrayoftables(2).Name, "Wilma");
// Example of searching for a table by the key
Assert.IsTrue(Monster.LookupByKey(sortMons, "Frodo", fbb.DataBuffer) != null);
Assert.IsTrue(Monster.LookupByKey(sortMons, "Barney", fbb.DataBuffer) != null);
Assert.IsTrue(Monster.LookupByKey(sortMons, "Wilma", fbb.DataBuffer)!= null);
// testType is an existing field and mutating it should succeed
Assert.AreEqual(monster.TestType, Any.Monster);
......
......@@ -51,6 +51,19 @@ class JavaTest {
// better for performance.
FlatBufferBuilder fbb = new FlatBufferBuilder(1);
int[] names = {fbb.createString("Frodo"), fbb.createString("Barney"), fbb.createString("Wilma")};
int[] off = new int[3];
Monster.startMonster(fbb);
Monster.addName(fbb, names[0]);
off[0] = Monster.endMonster(fbb);
Monster.startMonster(fbb);
Monster.addName(fbb, names[1]);
off[1] = Monster.endMonster(fbb);
Monster.startMonster(fbb);
Monster.addName(fbb, names[2]);
off[2] = Monster.endMonster(fbb);
int sortMons = fbb.createSortedVectorOfTables(new Monster(), off);
// We set up the same values as monsterdata.json:
int str = fbb.createString("MyMonster");
......@@ -84,6 +97,7 @@ class JavaTest {
Monster.addTestarrayofstring(fbb, testArrayOfString);
Monster.addTestbool(fbb, false);
Monster.addTesthashu32Fnv1(fbb, Integer.MAX_VALUE + 1L);
Monster.addTestarrayoftables(fbb, sortMons);
int mon = Monster.endMonster(fbb);
Monster.finishMonsterBuffer(fbb, mon);
......@@ -121,6 +135,16 @@ class JavaTest {
// the mana field should retain its default value
TestEq(monster.mutateMana((short)10), false);
TestEq(monster.mana(), (short)150);
// Accessing a vector of sorted by the key tables
TestEq(monster.testarrayoftables(0).name(), "Barney");
TestEq(monster.testarrayoftables(1).name(), "Frodo");
TestEq(monster.testarrayoftables(2).name(), "Wilma");
// Example of searching for a table by the key
TestEq(Monster.lookupByKey(sortMons, "Frodo", fbb.dataBuffer()).name(), "Frodo");
TestEq(Monster.lookupByKey(sortMons, "Barney", fbb.dataBuffer()).name(), "Barney");
TestEq(Monster.lookupByKey(sortMons, "Wilma", fbb.dataBuffer()).name(), "Wilma");
// testType is an existing field and mutating it should succeed
TestEq(monster.testType(), (byte)Any.Monster);
......
......@@ -129,6 +129,31 @@ public sealed class Monster : Table {
return new Offset<Monster>(o);
}
public static void FinishMonsterBuffer(FlatBufferBuilder builder, Offset<Monster> offset) { builder.Finish(offset.Value, "MONS"); }
public static VectorOffset CreateMySortedVectorOfTables(FlatBufferBuilder builder, Offset<Monster>[] offsets) {
Array.Sort(offsets, (Offset<Monster> o1, Offset<Monster> o2) => CompareStrings(__offset(10, o1.Value, builder.DataBuffer), __offset(10, o2.Value, builder.DataBuffer), builder.DataBuffer));
return builder.CreateVectorOfTables(offsets);
}
public static Monster LookupByKey(VectorOffset vectorOffset, string key, ByteBuffer bb) {
byte[] byteKey = System.Text.Encoding.UTF8.GetBytes(key);
int vectorLocation = bb.Length - vectorOffset.Value;
int span = bb.GetInt(vectorLocation), middle, start = 0, comp, tableOffset;
vectorLocation += 4;
while (span != 0) {
int middle = span / 2;
tableOffset = __indirect(vectorLocation + 4 * (start + middle), bb);
comp = CompareStrings(__offset(10, bb.Length - tableOffset, bb), byteKey, bb);
if (comp > 0) span = middle;
else if (comp < 0) {
middle++;
start += middle;
span -= middle;
}
else return new Monster().__init(tableOffset, bb);
}
return null;
}
};
......
......@@ -135,5 +135,28 @@ public final class Monster extends Table {
return o;
}
public static void finishMonsterBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset, "MONS"); }
@Override
protected int keysCompare(Integer o1, Integer o2, ByteBuffer _bb) { return compareStrings(__offset(10, o1, _bb), __offset(10, o2, _bb), _bb); }
public static Monster lookupByKey(int vectorOffset, String key, ByteBuffer bb) {
byte[] byteKey = key.getBytes(StandardCharsets.UTF_8);
int vectorLocation = bb.array().length - vectorOffset.Value;
int span = bb.getInt(vectorLocation), middle, start = 0, comp, tableOffset;
vectorLocation += 4;
while (span != 0) {
int middle = span / 2;
tableOffset = __indirect(vectorLocation + 4 * (start + middle), bb);
comp = compareStrings(__offset(10, bb.array().length - tableOffset, bb), byteKey, bb);
if (comp > 0) span = middle;
else if (comp < 0) {
middle++;
start += middle;
span -= middle;
}
else return new Monster().__init(tableOffset, bb);
}
return null;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment