@@ -454,7 +454,7 @@ import numpy as np
454
454
from simsimd import fma, wsum
455
455
456
456
# Let's take two FullHD video frames
457
- first_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
457
+ first_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
458
458
second_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
459
459
average_frame = np.empty_like(first_frame)
460
460
wsum(first_frame, second_frame, alpha = 0.5 , beta = 0.5 , out = average_frame)
@@ -479,7 +479,7 @@ alpha = 0.7 # Weight for the diffuse component
479
479
beta = 0.3 # Weight for the specular component
480
480
481
481
# Formula: color = alpha * light_intensity * diffuse_component + beta * specular_component
482
- fma(light_intensity, diffuse_component, specular_component,
482
+ fma(light_intensity, diffuse_component, specular_component,
483
483
dtype = " float16" , # Optional, unless it can't be inferred from the input
484
484
alpha = alpha, beta = beta, out = output_color)
485
485
@@ -499,7 +499,7 @@ ndim = 1536 # OpenAI Ada embeddings
499
499
matrix1 = np.packbits(np.random.randint(2 , size = (10_000 , ndim)).astype(np.uint8))
500
500
matrix2 = np.packbits(np.random.randint(2 , size = (1_000 , ndim)).astype(np.uint8))
501
501
502
- distances = simsimd.cdist(matrix1, matrix2,
502
+ distances = simsimd.cdist(matrix1, matrix2,
503
503
metric = " hamming" , # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions
504
504
out_dtype = " uint8" , # so we can use `uint8` instead of `float64` to save memory.
505
505
threads = 0 , # Use all CPU cores with OpenMP.
@@ -541,8 +541,38 @@ with ThreadPoolExecutor(max_workers=num_threads) as executor:
541
541
futures.append(executor.submit(compute_batch, start_idx, end_idx))
542
542
543
543
# Collect results from all threads
544
- results = [future.result() for future in futures]
545
- ```
544
+ results = [future.result() for future in futures]
545
+ ```
546
+
547
+ ### Half-Precision Brain-Float Numbers
548
+
549
+ The "brain-float-16" is a popular machine learning format.
550
+ It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
551
+ [ Unlike NumPy] ( https://github.com/numpy/numpy/issues/19808 ) , you can already use ` bf16 ` datatype in SimSIMD.
552
+ Luckily, to downcast ` f32 ` to ` bf16 ` you only have to drop the last 16 bits:
553
+
554
+ ``` py
555
+ import numpy as np
556
+ import simsimd as simd
557
+
558
+ a = np.random.randn(ndim).astype(np.float32)
559
+ b = np.random.randn(ndim).astype(np.float32)
560
+
561
+ # NumPy doesn't natively support brain-float, so we need a trick!
562
+ # Luckily, it's very easy to reduce the representation accuracy
563
+ # by simply masking the low 16-bits of our 32-bit single-precision
564
+ # numbers. We can also add `0x8000` to round the numbers.
565
+ a_f32rounded = ((a.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
566
+ b_f32rounded = ((b.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
567
+
568
+ # To represent them as brain-floats, we need to drop the second half
569
+ a_bf16 = np.right_shift(a_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
570
+ b_bf16 = np.right_shift(b_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
571
+
572
+ # Now we can compare the results
573
+ expected = np.inner(a_f32rounded, b_f32rounded)
574
+ result = simd.inner(a_bf16, b_bf16, " bf16" )
575
+ ```
546
576
547
577
### Helper Functions
548
578
@@ -693,23 +723,48 @@ Binary similarity functions are available only for `u8` types.
693
723
694
724
### Half-Precision Floating-Point Numbers
695
725
696
- Rust has no native support for half-precision floating-point numbers, but SimSIMD provides a ` f16 ` type.
697
- It has no functionality - it is a ` transparent ` wrapper around ` u16 ` and can be used with ` half ` or any other half-precision library .
726
+ Rust has no native support for half-precision floating-point numbers, but SimSIMD provides a ` f16 ` type with built-in conversion methods .
727
+ The underlying ` u16 ` representation is publicly accessible for direct bit manipulation .
698
728
699
729
``` rust
700
- use simsimd :: SpatialSimilarity ;
701
- use simsimd :: f16 as SimF16 ;
730
+ use simsimd :: {SpatialSimilarity , f16};
731
+
732
+ fn main () {
733
+ // Create f16 vectors using built-in conversion methods
734
+ let vector_a : Vec <f16 > = vec! [1.0 , 2.0 , 3.0 ]. iter (). map (| & x | f16 :: from_f32 (x )). collect ();
735
+ let vector_b : Vec <f16 > = vec! [4.0 , 5.0 , 6.0 ]. iter (). map (| & x | f16 :: from_f32 (x )). collect ();
736
+
737
+ // Compute the cosine similarity
738
+ let cosine_similarity = f16 :: cosine (& vector_a , & vector_b )
739
+ . expect (" Vectors must be of the same length" );
740
+
741
+ println! (" Cosine Similarity: {}" , cosine_similarity );
742
+
743
+ // Direct bit manipulation
744
+ let half = f16 :: from_f32 (3.14159 );
745
+ let bits = half . 0 ; // Access raw u16 representation
746
+ let reconstructed = f16 (bits );
747
+
748
+ // Convert back to f32
749
+ let float_value = half . to_f32 ();
750
+ }
751
+ ```
752
+
753
+ For interoperability with the ` half ` crate:
754
+
755
+ ``` rust
756
+ use simsimd :: {SpatialSimilarity , f16 as SimF16 };
702
757
use half :: f16 as HalfF16 ;
703
758
704
759
fn main () {
705
- let vector_a : Vec <HalfF16 > = ...
706
- let vector_b : Vec <HalfF16 > = ...
760
+ let vector_a : Vec <HalfF16 > = vec! [ 1.0 , 2.0 , 3.0 ] . iter () . map ( | & x | HalfF16 :: from_f32 ( x )) . collect ();
761
+ let vector_b : Vec <HalfF16 > = vec! [ 4.0 , 5.0 , 6.0 ] . iter () . map ( | & x | HalfF16 :: from_f32 ( x )) . collect ();
707
762
708
- let buffer_a : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (a_half . as_ptr () as * const SimF16 , a_half . len ()) };
709
- let buffer_b : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (b_half . as_ptr () as * const SimF16 , b_half . len ()) };
763
+ // Safe reinterpret cast due to identical memory layout
764
+ let buffer_a : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (vector_a . as_ptr () as * const SimF16 , vector_a . len ()) };
765
+ let buffer_b : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (vector_b . as_ptr () as * const SimF16 , vector_b . len ()) };
710
766
711
- // Compute the cosine similarity between vector_a and vector_b
712
- let cosine_similarity = SimF16 :: cosine (& vector_a , & vector_b )
767
+ let cosine_similarity = SimF16 :: cosine (buffer_a , buffer_b )
713
768
. expect (" Vectors must be of the same length" );
714
769
715
770
println! (" Cosine Similarity: {}" , cosine_similarity );
@@ -719,31 +774,41 @@ fn main() {
719
774
### Half-Precision Brain-Float Numbers
720
775
721
776
The "brain-float-16" is a popular machine learning format.
722
- It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
777
+ It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
723
778
[ Unlike NumPy] ( https://github.com/numpy/numpy/issues/19808 ) , you can already use ` bf16 ` datatype in SimSIMD.
724
- Luckily, to downcast ` f32 ` to ` bf16 ` you only have to drop the last 16 bits:
779
+ SimSIMD provides a ` bf16 ` type with built-in conversion methods and direct bit access.
725
780
726
- ``` py
727
- import numpy as np
728
- import simsimd as simd
781
+ ``` rust
782
+ use simsimd :: {SpatialSimilarity , bf16};
729
783
730
- a = np.random.randn(ndim).astype(np.float32)
731
- b = np.random.randn(ndim).astype(np.float32)
784
+ fn main () {
785
+ // Create bf16 vectors using built-in conversion methods
786
+ let vector_a : Vec <bf16 > = vec! [1.0 , 2.0 , 3.0 ]. iter (). map (| & x | bf16 :: from_f32 (x )). collect ();
787
+ let vector_b : Vec <bf16 > = vec! [4.0 , 5.0 , 6.0 ]. iter (). map (| & x | bf16 :: from_f32 (x )). collect ();
732
788
733
- # NumPy doesn't natively support brain-float, so we need a trick!
734
- # Luckily, it's very easy to reduce the representation accuracy
735
- # by simply masking the low 16-bits of our 32-bit single-precision
736
- # numbers. We can also add `0x8000` to round the numbers.
737
- a_f32rounded = ((a.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
738
- b_f32rounded = ((b.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
789
+ // Compute the cosine similarity
790
+ let cosine_similarity = bf16 :: cosine (& vector_a , & vector_b )
791
+ . expect (" Vectors must be of the same length" );
792
+
793
+ println! (" Cosine Similarity: {}" , cosine_similarity );
739
794
740
- # To represent them as brain-floats, we need to drop the second half
741
- a_bf16 = np.right_shift(a_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
742
- b_bf16 = np.right_shift(b_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
795
+ // Direct bit manipulation
796
+ let brain_half = bf16 :: from_f32 (3.14159 );
797
+ let bits = brain_half . 0 ; // Access raw u16 representation
798
+ let reconstructed = bf16 (bits );
799
+
800
+ // Convert back to f32
801
+ let float_value = brain_half . to_f32 ();
743
802
744
- # Now we can compare the results
745
- expected = np.inner(a_f32rounded, b_f32rounded)
746
- result = simd.inner(a_bf16, b_bf16, " bf16" )
803
+ // Compare precision differences
804
+ let original = 3.14159_f32 ;
805
+ let f16_roundtrip = f16 :: from_f32 (original ). to_f32 ();
806
+ let bf16_roundtrip = bf16 :: from_f32 (original ). to_f32 ();
807
+
808
+ println! (" Original: {}" , original );
809
+ println! (" f16 roundtrip: {}" , f16_roundtrip );
810
+ println! (" bf16 roundtrip: {}" , bf16_roundtrip );
811
+ }
747
812
```
748
813
749
814
### Dynamic Dispatch in Rust
@@ -760,6 +825,7 @@ println!("uses ice: {}", capabilities::uses_ice());
760
825
println! (" uses genoa: {}" , capabilities :: uses_genoa ());
761
826
println! (" uses sapphire: {}" , capabilities :: uses_sapphire ());
762
827
println! (" uses turin: {}" , capabilities :: uses_turin ());
828
+ println! (" uses sierra: {}" , capabilities :: uses_sierra ());
763
829
```
764
830
765
831
## Using SimSIMD in JavaScript
@@ -776,13 +842,13 @@ This will automatically happen unless you install the package with the `--ignore
776
842
After you install it, you will be able to call the SimSIMD functions on various ` TypedArray ` variants:
777
843
778
844
``` js
779
- const { sqeuclidean , cosine , inner , hamming , jaccard } = require (' simsimd' );
845
+ const { sqeuclidean , cosine , inner , hamming , jaccard } = require (" simsimd" );
780
846
781
847
const vectorA = new Float32Array ([1.0 , 2.0 , 3.0 ]);
782
848
const vectorB = new Float32Array ([4.0 , 5.0 , 6.0 ]);
783
849
784
850
const distance = sqeuclidean (vectorA, vectorB);
785
- console .log (' Squared Euclidean Distance:' , distance);
851
+ console .log (" Squared Euclidean Distance:" , distance);
786
852
```
787
853
788
854
Other numeric types and precision levels are supported as well.
@@ -798,8 +864,8 @@ When doing machine learning and vector search with high-dimensional vectors you
798
864
You may want to project values from the $[ -1, 1] $ range to the $[ -127, 127] $ range and then cast them to ` Int8Array ` :
799
865
800
866
``` js
801
- const quantizedVectorA = new Int8Array (vectorA .map (v => ( v * 127 ) ));
802
- const quantizedVectorB = new Int8Array (vectorB .map (v => ( v * 127 ) ));
867
+ const quantizedVectorA = new Int8Array (vectorA .map (( v ) => v * 127 ));
868
+ const quantizedVectorB = new Int8Array (vectorB .map (( v ) => v * 127 ));
803
869
const distance = cosine (quantizedVectorA, quantizedVectorB);
804
870
```
805
871
@@ -808,7 +874,7 @@ You can map all positive values to `1` and all negative values and zero to `0`,
808
874
After that, Hamming and Jaccard distances can be computed.
809
875
810
876
``` js
811
- const { toBinary , hamming } = require (' simsimd' );
877
+ const { toBinary , hamming } = require (" simsimd" );
812
878
813
879
const binaryVectorA = toBinary (vectorA);
814
880
const binaryVectorB = toBinary (vectorB);
@@ -919,7 +985,7 @@ int main() {
919
985
simsimd_cos_f32(f32s, f32s, 1536, &distance);
920
986
simsimd_cos_f64(f64s, f64s, 1536, &distance);
921
987
simsimd_cos_bf16(bf16s, bf16s, 1536, &distance);
922
-
988
+
923
989
// Euclidean distance between two vectors
924
990
simsimd_l2sq_i8(i8s, i8s, 1536, &distance);
925
991
simsimd_l2sq_u8(u8s, u8s, 1536, &distance);
@@ -1036,7 +1102,7 @@ To explicitly disable half-precision support, define the following macro before
1036
1102
> This flag does just that and is used to produce the `simsimd.so` shared library, as well as the Python and other bindings.
1037
1103
1038
1104
For Arm: `SIMSIMD_TARGET_NEON`, `SIMSIMD_TARGET_SVE`, `SIMSIMD_TARGET_SVE2`, `SIMSIMD_TARGET_NEON_F16`, `SIMSIMD_TARGET_SVE_F16`, `SIMSIMD_TARGET_NEON_BF16`, `SIMSIMD_TARGET_SVE_BF16`.
1039
- For x86: (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`.
1105
+ For x86: (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`.
1040
1106
1041
1107
> By default, SimSIMD automatically infers the target architecture and pre-compiles as many kernels as possible.
1042
1108
> In some cases, you may want to explicitly disable some of the kernels.
@@ -1064,7 +1130,7 @@ In general there are a few principles that SimSIMD follows:
1064
1130
1065
1131
Possibly, in the future:
1066
1132
1067
- - Best effort computation silencing `NaN` components in low-precision inputs.
1133
+ - Best effort computation silencing `NaN` components in low-precision inputs.
1068
1134
- Detect overflows and report the distance with a "signaling" `NaN`.
1069
1135
1070
1136
Last, but not the least - don't build unless there is a demand for it.
@@ -1199,7 +1265,7 @@ SimSIMD defines `dot` and `vdot` kernels as:
1199
1265
1200
1266
Where $\bar{b_i}$ is the complex conjugate of $b_i$.
1201
1267
Putting that into Python code for scalar arrays:
1202
-
1268
+
1203
1269
``` python
1204
1270
def dot (a : List[number], b : List[number]) -> number:
1205
1271
a_real, a_imaginary = a[0 ::2 ], a[1 ::2 ]
0 commit comments