Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BOF in the last channel handling logic of SSE ShuffleChannel #5735

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions src/layer/arm/shufflechannel_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
float32x4_t _p0 = vld1q_f32(ptr0);
float32x4_t _p1 = vld1q_f32(ptr1);
Expand All @@ -130,6 +130,21 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down Expand Up @@ -364,7 +379,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -378,6 +393,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 8;
outptr0 += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down Expand Up @@ -598,7 +628,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -611,6 +641,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down
51 changes: 48 additions & 3 deletions src/layer/x86/shufflechannel_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 8;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m256 _p0 = _mm256_loadu_ps(ptr0);
__m256 _p1 = _mm256_loadu_ps(ptr1);
Expand All @@ -134,6 +134,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 16;
outptr += 16;
}

for (int i = 0; i < 16; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -372,7 +387,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -387,6 +402,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 8;
outptr += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -607,7 +637,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -620,6 +650,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down
Loading