|
27 | 27 | public class TestOptimizedScalarQuantizer extends LuceneTestCase {
|
28 | 28 | static final byte[] ALL_BITS = new byte[] {1, 2, 3, 4, 5, 6, 7, 8};
|
29 | 29 |
|
| 30 | + static float[] deQuantize(byte[] quantized, byte bits, float[] interval, float[] centroid) { |
| 31 | + float[] dequantized = new float[quantized.length]; |
| 32 | + float a = interval[0]; |
| 33 | + float b = interval[1]; |
| 34 | + int nSteps = (1 << bits) - 1; |
| 35 | + double step = (b - a) / nSteps; |
| 36 | + for (int h = 0; h < quantized.length; h++) { |
| 37 | + double xi = (double) (quantized[h] & 0xFF) * step + a; |
| 38 | + dequantized[h] = (float) (xi + centroid[h]); |
| 39 | + } |
| 40 | + return dequantized; |
| 41 | + } |
| 42 | + |
| 43 | + public void testQuantizationQuality() { |
| 44 | + int dims = 16; |
| 45 | + int numVectors = 32; |
| 46 | + float[][] vectors = new float[numVectors][]; |
| 47 | + float[] centroid = new float[dims]; |
| 48 | + for (int i = 0; i < numVectors; ++i) { |
| 49 | + vectors[i] = new float[dims]; |
| 50 | + for (int j = 0; j < dims; ++j) { |
| 51 | + vectors[i][j] = randomFloat(); |
| 52 | + centroid[j] += vectors[i][j]; |
| 53 | + } |
| 54 | + } |
| 55 | + for (int j = 0; j < dims; ++j) { |
| 56 | + centroid[j] /= numVectors; |
| 57 | + } |
| 58 | + // similarity doesn't matter for this test |
| 59 | + OptimizedScalarQuantizer osq = |
| 60 | + new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT); |
| 61 | + float[] scratch = new float[dims]; |
| 62 | + for (byte bit : ALL_BITS) { |
| 63 | + float eps = (1f / (float) (1 << (bit))); |
| 64 | + byte[] destination = new byte[dims]; |
| 65 | + for (int i = 0; i < numVectors; ++i) { |
| 66 | + System.arraycopy(vectors[i], 0, scratch, 0, dims); |
| 67 | + OptimizedScalarQuantizer.QuantizationResult result = |
| 68 | + osq.scalarQuantize(scratch, destination, bit, centroid); |
| 69 | + assertValidResults(result); |
| 70 | + assertValidQuantizedRange(destination, bit); |
| 71 | + |
| 72 | + float[] dequantized = |
| 73 | + deQuantize( |
| 74 | + destination, |
| 75 | + bit, |
| 76 | + new float[] {result.lowerInterval(), result.upperInterval()}, |
| 77 | + centroid); |
| 78 | + float mae = 0; |
| 79 | + for (int k = 0; k < dims; ++k) { |
| 80 | + mae += Math.abs(dequantized[k] - vectors[i][k]); |
| 81 | + } |
| 82 | + mae /= dims; |
| 83 | + assertTrue("bits: " + bit + " mae: " + mae + " > eps: " + eps, mae <= eps); |
| 84 | + } |
| 85 | + } |
| 86 | + } |
| 87 | + |
30 | 88 | public void testAbusiveEdgeCases() {
|
31 | 89 | // large zero array
|
32 | 90 | for (VectorSimilarityFunction vectorSimilarityFunction : VectorSimilarityFunction.values()) {
|
|
0 commit comments