🐛 Fix AVR maths used by Stepper (#25338)

tombrazier · thinkyhead · commit e65e38afafec · 2023-04-07T17:32:30.000-05:00
diff --git a/Marlin/src/HAL/AVR/math.h b/Marlin/src/HAL/AVR/math.h
@@ -27,13 +27,14 @@
 
 // intRes = longIn1 * longIn2 >> 24
 // uses:
-// A[tmp] to store 0
-// B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
-// note that the lower two bytes and the upper byte of the 48bit result are not calculated.
-// this can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
-// B A are bits 24-39 and are the returned value
-// C B A is longIn1
-// D C B A is longIn2
+// r1, r0 for the result of mul.
+// [tmp1] to store 0.
+// [tmp2] to store bits 16-23 of the 56 bit result. The top bit of [tmp2] is used for rounding.
+// Note that the lower two bytes and the upper two bytes of the 56 bit result are not calculated.
+// This can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
+// [intRes] (A B) is bits 24-39 and is the returned value.
+// [longIn1] (C B A) is a 24 bit parameter.
+// [longIn2] (D C B A) is a 32 bit parameter.
 //
 FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) {
   uint8_t tmp1;
@@ -66,11 +67,9 @@ FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2
     A("add %[tmp2], r1")
     A("adc %A[intRes], %[tmp1]")
     A("adc %B[intRes], %[tmp1]")
-    A("lsr %[tmp2]")
-    A("adc %A[intRes], %[tmp1]")
-    A("adc %B[intRes], %[tmp1]")
     A("mul %D[longIn2], %A[longIn1]")
-    A("add %A[intRes], r0")
+    A("lsl %[tmp2]")
+    A("adc %A[intRes], r0")
     A("adc %B[intRes], r1")
     A("mul %D[longIn2], %B[longIn1]")
     A("add %B[intRes], r0")
@@ -85,22 +84,25 @@ FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2
   return intRes;
 }
 
-// intRes = intIn1 * intIn2 >> 16
+// intRes = intIn1 * intIn2 >> 8
 // uses:
-// r26 to store 0
-// r27 to store the byte 1 of the 24 bit result
-FORCE_INLINE static uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) {
+// r1, r0 for the result of mul. After the second mul, r0 holds bits 0-7 of the 24 bit result and
+// the top bit of r0 is used for rounding.
+// [tmp] to store 0.
+// [intRes] (A B) is bits 8-15 and is the returned value.
+// [charIn1] is an 8 bit parameter.
+// [intIn2] (B A) is a 16 bit parameter.
+//
+FORCE_INLINE static uint16_t MultiU8X16toH16(uint8_t charIn1, uint16_t intIn2) {
   uint8_t tmp;
   uint16_t intRes;
   __asm__ __volatile__ (
     A("clr %[tmp]")
     A("mul %[charIn1], %B[intIn2]")
     A("movw %A[intRes], r0")
     A("mul %[charIn1], %A[intIn2]")
-    A("add %A[intRes], r1")
-    A("adc %B[intRes], %[tmp]")
-    A("lsr r0")
-    A("adc %A[intRes], %[tmp]")
+    A("lsl r0")
+    A("adc %A[intRes], r1")
     A("adc %B[intRes], %[tmp]")
     A("clr r1")
       : [intRes] "=&r" (intRes),
diff --git a/Marlin/src/module/stepper.cpp b/Marlin/src/module/stepper.cpp
@@ -2062,7 +2062,7 @@ uint32_t Stepper::calc_timer_interval(uint32_t step_rate) {
         const uint8_t rate_mod_256 = (step_rate & 0x00FF);
         const uintptr_t table_address = uintptr_t(&speed_lookuptable_fast[uint8_t(step_rate >> 8)][0]),
                         gain = uint16_t(pgm_read_word(table_address + 2));
-        return uint16_t(pgm_read_word(table_address)) - MultiU16X8toH16(rate_mod_256, gain);
+        return uint16_t(pgm_read_word(table_address)) - MultiU8X16toH16(rate_mod_256, gain);
       }
       else { // lower step rates
         uintptr_t table_address = uintptr_t(&speed_lookuptable_slow[0][0]);
diff --git a/Marlin/src/module/stepper.h b/Marlin/src/module/stepper.h
@@ -114,11 +114,11 @@
   #define TIMER_READ_ADD_AND_STORE_CYCLES 13UL
 
   // The base ISR
-  #define ISR_BASE_CYCLES  1000UL
+  #define ISR_BASE_CYCLES  996UL
 
   // Linear advance base time is 32 cycles
   #if ENABLED(LIN_ADVANCE)
-    #define ISR_LA_BASE_CYCLES 32UL
+    #define ISR_LA_BASE_CYCLES 30UL
   #else
     #define ISR_LA_BASE_CYCLES 0UL
   #endif

Original file line number	Diff line number	Diff line change
`@@ -2062,7 +2062,7 @@ uint32_t Stepper::calc_timer_interval(uint32_t step_rate) {`
`2062`	`2062`	`const uint8_t rate_mod_256 = (step_rate & 0x00FF);`
`2063`	`2063`	`const uintptr_t table_address = uintptr_t(&speed_lookuptable_fast[uint8_t(step_rate >> 8)][0]),`
`2064`	`2064`	`gain = uint16_t(pgm_read_word(table_address + 2));`
`2065`		`- return uint16_t(pgm_read_word(table_address)) - MultiU16X8toH16(rate_mod_256, gain);`
	`2065`	`+ return uint16_t(pgm_read_word(table_address)) - MultiU8X16toH16(rate_mod_256, gain);`
`2066`	`2066`	`}`
`2067`	`2067`	`else { // lower step rates`
`2068`	`2068`	`uintptr_t table_address = uintptr_t(&speed_lookuptable_slow[0][0]);`