Skip to content

MAINT: Reuse common routines in covariance #3227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 6 additions & 22 deletions cpp/daal/src/algorithms/covariance/covariance_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,7 @@ public:
/* Sum input array elements in case of non-normalized data */
for (DAAL_INT i = 0; i < nRows; i++)
{
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (DAAL_INT j = 0; j < _nFeatures; j++)
{
sumsPtr[j] += dataBlock[i * _nFeatures + j];
}
daal::internal::MathInst<algorithmFPType, cpu>::vAdd(_nFeatures, sumsPtr, dataBlock + i * _nFeatures, sumsPtr);
}
}
}
Expand Down Expand Up @@ -405,10 +400,7 @@ services::Status updateDenseCrossProductAndSums(bool isNormalized, size_t nFeatu
{
return services::Status(services::ErrorMemoryAllocationFailed);
}
for (size_t i = 0; i < nFeatures; i++)
{
sums[i] = resultSums[i];
}
daal::services::internal::daal_memcpy_s(sums, nFeatures * sizeof(algorithmFPType), resultSums, nFeatures * sizeof(algorithmFPType));
for (size_t i = 0; i < nFeatures; i++)
{
PRAGMA_FORCE_SIMD
Expand All @@ -421,10 +413,8 @@ services::Status updateDenseCrossProductAndSums(bool isNormalized, size_t nFeatu
}
else
{
for (size_t i = 0; i < nFeatures * nFeatures; i++)
{
crossProduct[i] = resultCrossProduct[i];
}
daal::services::internal::daal_memcpy_s(crossProduct, nFeatures * nFeatures * sizeof(algorithmFPType), resultCrossProduct,
nFeatures * nFeatures * sizeof(algorithmFPType));
}
}
else
Expand Down Expand Up @@ -538,10 +528,7 @@ void mergeCrossProductAndSums(size_t nFeatures, const algorithmFPType * partialC
nObservations[0] += partialNObservations[0];

/* Merge sums */
for (size_t i = 0; i < nFeatures; i++)
{
sums[i] += partialSums[i];
}
daal::internal::MathInst<algorithmFPType, cpu>::vAdd(nFeatures, sums, partialSums, sums);
}
}

Expand Down Expand Up @@ -578,10 +565,7 @@ services::Status finalizeCovariance(size_t nFeatures, algorithmFPType nObservati
DAAL_CHECK_MALLOC(diagInvSqrtsArray.get());

algorithmFPType * diagInvSqrts = diagInvSqrtsArray.get();
for (size_t i = 0; i < nFeatures; i++)
{
diagInvSqrts[i] = 1.0 / daal::internal::MathInst<algorithmFPType, cpu>::sSqrt(crossProduct[i * nFeatures + i]);
}
daal::internal::MathInst<algorithmFPType, cpu>::vInvSqrtI(nFeatures, crossProduct, nFeatures + 1, diagInvSqrts, 1);

for (size_t i = 0; i < nFeatures; i++)
{
Expand Down
7 changes: 7 additions & 0 deletions cpp/daal/src/externals/service_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ struct Math

static fpType sCdfNormInv(fpType in) { return _impl<fpType, cpu>::sCdfNormInv(in); }

static void vAdd(SizeType n, const fpType * a, const fpType * b, fpType * y) { _impl<fpType, cpu>::vAdd(n, a, b, y); }

static void vPowx(SizeType n, const fpType * in, fpType in1, fpType * out) { _impl<fpType, cpu>::vPowx(n, in, in1, out); }

static void vPowxAsLnExp(SizeType n, const fpType * in, fpType in1, fpType * out)
Expand All @@ -86,6 +88,11 @@ struct Math

static void vSqrt(SizeType n, const fpType * in, fpType * out) { _impl<fpType, cpu>::vSqrt(n, in, out); }

static void vInvSqrtI(SizeType n, const fpType * a, const SizeType inca, fpType * b, const SizeType incb)
{
_impl<fpType, cpu>::vInvSqrtI(n, a, inca, b, incb);
}

static void vLog(SizeType n, const fpType * in, fpType * out) { _impl<fpType, cpu>::vLog(n, in, out); }

static void vLog1p(SizeType n, const fpType * in, fpType * out) { _impl<fpType, cpu>::vLog1p(n, in, out); }
Expand Down
14 changes: 14 additions & 0 deletions cpp/daal/src/externals/service_math_mkl.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ struct MklMath<double, cpu>
return r;
}

static void vAdd(SizeType n, const double * a, const double * b, double * y) { __DAAL_MKLFN_CALL_MATH(vdAdd, (n, a, b, y)); }

static void vPowx(SizeType n, const double * in, double in1, double * out)
{
__DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
Expand Down Expand Up @@ -138,6 +140,11 @@ struct MklMath<double, cpu>
__DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
}

static void vInvSqrtI(SizeType n, const double * a, const SizeType inca, double * b, const SizeType incb)
{
__DAAL_MKLFN_CALL_MATH(vmdInvSqrtI, (n, a, inca, b, incb, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
}

static void vLog(SizeType n, const double * in, double * out)
{
__DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
Expand Down Expand Up @@ -213,6 +220,8 @@ struct MklMath<float, cpu>
return r;
}

static void vAdd(SizeType n, const float * a, const float * b, float * y) { __DAAL_MKLFN_CALL_MATH(vsAdd, (n, a, b, y)); }

static void vPowx(SizeType n, const float * in, float in1, float * out)
{
__DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
Expand Down Expand Up @@ -250,6 +259,11 @@ struct MklMath<float, cpu>
__DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
}

static void vInvSqrtI(SizeType n, const float * a, const SizeType inca, float * b, const SizeType incb)
{
__DAAL_MKLFN_CALL_MATH(vmsInvSqrtI, (n, a, inca, b, incb, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
}

static void vLog(SizeType n, const float * in, float * out)
{
__DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE)));
Expand Down
24 changes: 24 additions & 0 deletions cpp/daal/src/externals/service_math_ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ struct RefMath<double, cpu>
// Not implemented
static double sCdfNormInv(double in) { return std::numeric_limits<double>::quiet_NaN(); }

static void vAdd(SizeType n, const double * a, const double * b, double * y)
{
#pragma omp simd
for (SizeType i = 0; i < n; ++i) y[i] = a[i] + b[i];
}

static void vPowx(SizeType n, const double * in, double in1, double * out)
{
#pragma omp simd
Expand Down Expand Up @@ -117,6 +123,12 @@ struct RefMath<double, cpu>
for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]);
}

static void vInvSqrtI(SizeType n, const double * a, const SizeType inca, double * b, const SizeType incb)
{
#pragma omp simd
for (SizeType i = 0; i < n; ++i) b[i * incb] = 1.0 / sqrt(a[i * inca]);
}

static void vLog(SizeType n, const double * in, double * out)
{
#pragma omp simd
Expand Down Expand Up @@ -167,6 +179,12 @@ struct RefMath<float, cpu>
// Not implemented
static float sCdfNormInv(float in) { return std::numeric_limits<float>::quiet_NaN(); }

static void vAdd(SizeType n, const float * a, const float * b, float * y)
{
#pragma omp simd
for (SizeType i = 0; i < n; ++i) y[i] = a[i] + b[i];
}

static void vPowx(SizeType n, const float * in, float in1, float * out)
{
#pragma omp simd
Expand Down Expand Up @@ -214,6 +232,12 @@ struct RefMath<float, cpu>
for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]);
}

static void vInvSqrtI(SizeType n, const float * a, const SizeType inca, float * b, const SizeType incb)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this is inca and incb usually 1? If so there might be a better way writing this so the compiler optimizes the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MKL has different variants of these functions. There is a variant in which they are both fixed at 1, but this PR uses it for a case where one of them is not equal to 1.

{
#pragma omp simd
for (SizeType i = 0; i < n; ++i) b[i * incb] = 1.0f / sqrtf(a[i * inca]);
}

static void vLog(SizeType n, const float * in, float * out)
{
#pragma omp simd
Expand Down
Loading