Skip to content

Commit

Permalink
GH-40937: [Java] Implement Holder-based functions for ViewVarCharVect…
Browse files Browse the repository at this point in the history
…or & ViewVarBinaryVector (#44187)

* GitHub Issue: #40936
* GitHub Issue: #40937

Authored-by: chenweiguo.vc <chenweiguo.vc@bytedance.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
ViggoC committed Sep 26, 2024
1 parent 9fa78d0 commit bc923bd
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,31 @@ public byte[] getObject(int index) {
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
final int dataLength = getValueLength(index);
if (isSet(index) == 0) {
holder.isSet = 0;
return;
}
holder.isSet = 1;
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
holder.buffer = dataBuffers.get(bufferIndex);
holder.start = dataOffset;
holder.end = dataOffset + dataLength;
} else {
final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
// data is in the value buffer
holder.buffer = viewBuffer;
holder.start = (int) dataOffset;
holder.end = (int) dataOffset + dataLength;
}
}

/*----------------------------------------------------------------*
Expand All @@ -150,8 +173,10 @@ public void get(int index, NullableViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
lastSet = index;
}

/**
Expand All @@ -162,8 +187,9 @@ public void set(int index, ViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand All @@ -174,8 +200,15 @@ public void setSafe(int index, ViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
if (holder.isSet == 0) {
setNull(index);
} else {
BitVectorHelper.setBit(validityBuffer, index);
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
}
lastSet = index;
}

/**
Expand All @@ -186,8 +219,9 @@ public void set(int index, NullableViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/*----------------------------------------------------------------*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,31 @@ public void read(int index, ReusableBuffer<?> buffer) {
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder get operation not supported");
final int dataLength = getValueLength(index);
if (isSet(index) == 0) {
holder.isSet = 0;
return;
}
holder.isSet = 1;
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
holder.buffer = dataBuffers.get(bufferIndex);
holder.start = dataOffset;
holder.end = dataOffset + dataLength;
} else {
final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
// data is in the value buffer
holder.buffer = viewBuffer;
holder.start = (int) dataOffset;
holder.end = (int) dataOffset + dataLength;
}
}

/*----------------------------------------------------------------*
Expand All @@ -162,8 +184,10 @@ public void get(int index, NullableViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException("ViewVarCharHolder set operation not supported");
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
lastSet = index;
}

/**
Expand All @@ -174,8 +198,9 @@ public void set(int index, ViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException("ViewVarCharHolder setSafe operation not supported");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand All @@ -186,9 +211,15 @@ public void setSafe(int index, ViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder set operation not supported");
if (holder.isSet == 0) {
setNull(index);
} else {
BitVectorHelper.setBit(validityBuffer, index);
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
}
lastSet = index;
}

/**
Expand All @@ -199,9 +230,9 @@ public void set(int index, NullableViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder setSafe operation not supported");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.CommonUtil;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder;
import org.apache.arrow.vector.holders.NullableViewVarCharHolder;
import org.apache.arrow.vector.holders.ValueHolder;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.Types;
Expand All @@ -63,7 +67,7 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestVarCharViewVector {
public class TestVariableWidthViewVector {

// short string (length <= 12)
private static final byte[] STR0 = "0123456".getBytes(StandardCharsets.UTF_8);
Expand Down Expand Up @@ -371,6 +375,136 @@ public void testMixedAllocation() {
}
}

@Test
public void testSetNullableViewVarCharHolder() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(0, 0);
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);

NullableViewVarCharHolder stringHolder = new NullableViewVarCharHolder();

// set not null
int size = strings.size();
for (int i = 0; i < size; i++) {
setAndCheck(viewVarCharVector, i, strings.get(i), stringHolder);
}

// set null
setAndCheck(viewVarCharVector, 6, null, stringHolder);

// copy by holder
// len < 12
copyAndCheck(viewVarCharVector, stringHolder, 0, 7);
// len > 12
copyAndCheck(viewVarCharVector, stringHolder, 2, 8);
// null
copyAndCheck(viewVarCharVector, stringHolder, 6, 9);

// test overwrite
for (int i = 0; i < size; i++) {
setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), stringHolder);
}

String longString = generateRandomString(128);
setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
}
}

@Test
public void testSetNullableViewVarBinaryHolder() {
try (final ViewVarBinaryVector viewVarBinaryVector =
new ViewVarBinaryVector("myvector", allocator)) {
viewVarBinaryVector.allocateNew(0, 0);
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);

NullableViewVarBinaryHolder holder = new NullableViewVarBinaryHolder();

// set not null
int size = strings.size();
for (int i = 0; i < size; i++) {
setAndCheck(viewVarBinaryVector, i, strings.get(i), holder);
}

// set null
setAndCheck(viewVarBinaryVector, 6, null, holder);

// copy by holder
// len < 12
copyAndCheck(viewVarBinaryVector, holder, 0, 7);
// len > 12
copyAndCheck(viewVarBinaryVector, holder, 2, 8);
// null
copyAndCheck(viewVarBinaryVector, holder, 6, 9);

// test overwrite
for (int i = 0; i < size; i++) {
setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
}

String longString = generateRandomString(128);
setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
}
}

private static void copyAndCheck(
BaseVariableWidthViewVector vector, ValueHolder holder, int fromIndex, int toIndex) {
if (vector instanceof ViewVarCharVector) {
ViewVarCharVector viewVarCharVector = (ViewVarCharVector) vector;
NullableViewVarCharHolder stringHolder = (NullableViewVarCharHolder) holder;
viewVarCharVector.get(fromIndex, stringHolder);
viewVarCharVector.setSafe(toIndex, stringHolder);
}

if (vector instanceof ViewVarBinaryVector) {
ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector) vector;
NullableViewVarBinaryHolder binaryHolder = (NullableViewVarBinaryHolder) holder;
viewVarBinaryVector.get(fromIndex, binaryHolder);
viewVarBinaryVector.setSafe(toIndex, binaryHolder);
}

assertArrayEquals(vector.get(fromIndex), vector.get(toIndex));
}

private void setAndCheck(
ViewVarCharVector vector, int index, byte[] str, NullableViewVarCharHolder stringHolder) {
ArrowBuf buf = null;
if (null == str) {
stringHolder.isSet = 0;
} else {
buf = allocator.buffer(str.length);
buf.setBytes(0, str);
stringHolder.isSet = 1;
stringHolder.start = 0;
stringHolder.end = str.length;
stringHolder.buffer = buf;
}
vector.setSafe(index, stringHolder);

// verify results
assertArrayEquals(str, vector.get(index));
AutoCloseables.closeNoChecked(buf);
}

private void setAndCheck(
ViewVarBinaryVector vector, int index, byte[] str, NullableViewVarBinaryHolder binaryHolder) {
ArrowBuf buf = null;
if (null == str) {
binaryHolder.isSet = 0;
} else {
buf = allocator.buffer(str.length);
buf.setBytes(0, str);
binaryHolder.isSet = 1;
binaryHolder.start = 0;
binaryHolder.end = str.length;
binaryHolder.buffer = buf;
}
vector.setSafe(index, binaryHolder);

// verify results
assertArrayEquals(str, vector.get(index));
AutoCloseables.closeNoChecked(buf);
}

@Test
public void testAllocationIndexOutOfBounds() {
assertThrows(
Expand Down

0 comments on commit bc923bd

Please sign in to comment.