Skip to content

Commit

Permalink
Saving all the progress for using micro-benchmarks
Browse files Browse the repository at this point in the history
Signed-off-by: Navneet Verma <navneev@amazon.com>
  • Loading branch information
navneet1v committed Mar 28, 2024
1 parent cdd0860 commit e3b890c
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 4 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ buildscript {
version_qualifier = System.getProperty("build.version_qualifier", "")
opensearch_group = "org.opensearch"
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
simd_enabled = System.getProperty("simd.enabled", "true")
simd_enabled = System.getProperty("simd.enabled", "false")

version_tokens = opensearch_version.tokenize('-')
opensearch_build = version_tokens[0] + '.0'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package org.opensearch.knn.index.codec.KNN80Codec;

import lombok.Getter;
import lombok.Setter;
import org.opensearch.knn.index.codec.util.BinaryDocValuesSub;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocIDMerger;
Expand All @@ -15,10 +17,17 @@
/**
* A per-document kNN numeric value.
*/
class KNN80BinaryDocValues extends BinaryDocValues {
public class KNN80BinaryDocValues extends BinaryDocValues {

private DocIDMerger<BinaryDocValuesSub> docIDMerger;

@Setter
private long cost;

@Getter
@Setter
private long liveDocs;

KNN80BinaryDocValues(DocIDMerger<BinaryDocValuesSub> docIdMerger) {
this.docIDMerger = docIdMerger;
}
Expand Down Expand Up @@ -54,7 +63,7 @@ public boolean advanceExact(int target) throws IOException {

@Override
public long cost() {
throw new UnsupportedOperationException();
return cost;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import lombok.NonNull;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.util.Bits;
import org.opensearch.common.StopWatch;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.core.common.bytes.BytesArray;
Expand Down Expand Up @@ -110,6 +111,17 @@ public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer,
throws IOException {
// Get values to be indexed
BinaryDocValues values = valuesProducer.getBinary(field);
// int maxNumberOfDocs = this.state.segmentInfo.maxDoc();
// if (valuesProducer instanceof KNN80DocValuesReader) {
// MergeState mergeState = ((KNN80DocValuesReader) valuesProducer).getMergeState();
// Bits[] liveDocsArray = mergeState.liveDocs;
// int liveDocs = 0;
// for(Bits bit : liveDocsArray) {
//
// }
// }


KNNCodecUtil.Pair pair = KNNCodecUtil.getFloats(values);
if (pair.vectors.length == 0 || pair.docs.length == 0) {
logger.info("Skipping engine index creation as there are no vectors or docs in the documents");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

package org.opensearch.knn.index.codec.KNN80Codec;

import lombok.Getter;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.opensearch.knn.index.codec.util.BinaryDocValuesSub;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
Expand All @@ -15,11 +20,14 @@
import org.apache.lucene.index.MergeState;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;

/**
* Reader for KNNDocValues from the segments
*/
@Getter
@Log4j2
class KNN80DocValuesReader extends EmptyDocValuesProducer {

private final MergeState mergeState;
Expand All @@ -30,6 +38,8 @@ class KNN80DocValuesReader extends EmptyDocValuesProducer {

@Override
public BinaryDocValues getBinary(FieldInfo field) {
long cost = 0;
long liveDocsCount = 0;
try {
List<BinaryDocValuesSub> subs = new ArrayList<>(this.mergeState.docValuesProducers.length);
for (int i = 0; i < this.mergeState.docValuesProducers.length; i++) {
Expand All @@ -41,11 +51,33 @@ public BinaryDocValues getBinary(FieldInfo field) {
values = docValuesProducer.getBinary(readerFieldInfo);
}
if (values != null) {
cost += values.cost();
Bits liveDocs = this.mergeState.liveDocs[i];
if (liveDocs != null) {
log.info("There are some deleted docs present");
// so we counted all the live docs here
int docId;
for(docId = values.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId =
values.nextDoc()) {
if (liveDocs.get(docId)) {
liveDocsCount++;
}
}
// again setting this value as we have already used the older doc values.
values = docValuesProducer.getBinary(readerFieldInfo);
} else {
// no live docs are present so lets use all the docs.
liveDocsCount += values.cost();
}
subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], values));
}
}
}
return new KNN80BinaryDocValues(DocIDMerger.of(subs, mergeState.needsIndexSort));
KNN80BinaryDocValues knn80BinaryDocValues = new KNN80BinaryDocValues(DocIDMerger.of(subs, mergeState.needsIndexSort));
knn80BinaryDocValues.setCost(cost);
knn80BinaryDocValues.setLiveDocs(liveDocsCount);
log.info("There are {} live docs, {} cost", liveDocsCount, cost);
return knn80BinaryDocValues;
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@

package org.opensearch.knn.index.codec.util;

import lombok.extern.log4j.Log4j2;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80BinaryDocValues;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;

@Log4j2
public class KNNCodecUtil {

public static final String HNSW_EXTENSION = ".hnsw";
Expand Down Expand Up @@ -42,6 +45,11 @@ public static KNNCodecUtil.Pair getFloats(BinaryDocValues values) throws IOExcep
ArrayList<float[]> vectorList = new ArrayList<>();
ArrayList<Integer> docIdList = new ArrayList<>();
SerializationMode serializationMode = SerializationMode.COLLECTION_OF_FLOATS;
long liveDocs = 0;
if(values instanceof KNN80BinaryDocValues) {
liveDocs = ((KNN80BinaryDocValues) values).getLiveDocs();
}

for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
BytesRef bytesref = values.binaryValue();
try (ByteArrayInputStream byteStream = new ByteArrayInputStream(bytesref.bytes, bytesref.offset, bytesref.length)) {
Expand All @@ -52,6 +60,8 @@ public static KNNCodecUtil.Pair getFloats(BinaryDocValues values) throws IOExcep
}
docIdList.add(doc);
}
log.info("The cost of the iterator is : {} and docIds are: {} and liveDocs : {}", values.cost(),
docIdList.size(), liveDocs);
return new KNNCodecUtil.Pair(
docIdList.stream().mapToInt(Integer::intValue).toArray(),
vectorList.toArray(new float[][] {}),
Expand Down

0 comments on commit e3b890c

Please sign in to comment.