5 * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
9 * each step looks like:
22 * hi = R8 - constrained by hardware
23 * lo = R9 - constrained by hardware
28 TEXT mpvecdigmuladd(SB),$0
33 SUB $4, R3 /* pre decrement for MOVWU's */
34 SUB $4, R6 /* pre decrement for MOVWU's */
40 MOVWU 4(R3),R9 /* lo = b[i] */
41 MOVW 4(R6),R11 /* tmp = p[i] */
42 MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */
43 MULLW R9,R5,R9 /* lo = b[i] * m */
44 ADDC R10,R9 /* lo += oldhi */
45 ADDE R0,R8 /* hi += carry */
46 ADDC R9,R11 /* tmp += lo */
47 ADDE R0,R8 /* hi += carry */
48 MOVWU R11,4(R6) /* p[i] = tmp */
49 MOVW R8,R10 /* oldhi = hi */
52 MOVW 4(R6),R11 /* tmp = p[i] */
54 MOVWU R11,4(R6) /* p[i] = tmp */