4 TMP = 3 /* N and TMP don't overlap */
11 ADD R(N), R(TO), R(TOE) /* to end pointer */
13 CMP $4, R(N) /* need at least 4 bytes to copy */
18 ORR R(4)<<16, R(4) /* replicate to word */
20 _4align: /* align on 4 */
21 AND.S $3, R(TO), R(TMP)
24 MOVBU.P R(4), 1(R(TO)) /* implicit write back */
28 SUB $15, R(TOE), R(TMP) /* do 16-byte chunks if possible */
32 MOVW R4, R5 /* replicate */
40 MOVM.IA.W [R4-R7], (R(TO))
44 SUB $3, R(TOE), R(TMP) /* do remaining words if possible */
49 MOVW.P R(4), 4(R(TO)) /* implicit write back */
56 MOVBU.P R(4), 1(R(TO)) /* implicit write back */