6 * alligned about 1.0us/call and 17.4mb/sec
7 * unalligned is about 3.1mb/sec
10 MOVW n+8(FP), R3 /* R3 is count */
11 MOVW s1+0(FP), R4 /* R4 is pointer1 */
12 MOVW s2+4(FP), R5 /* R5 is pointer2 */
13 ADDU R3,R4, R6 /* R6 is end pointer1 */
16 * if not at least 4 chars,
17 * dont even mess around.
18 * 3 chars to guarantee any
19 * rounding up to a word
20 * boundary and 4 characters
21 * to get at least maybe one
28 * test if both pointers
29 * are similarly word alligned
36 * byte at a time to word allign
49 * turn R3 into end pointer1-15
50 * cmp 16 at a time while theres room
75 * turn R3 into end pointer1-3
76 * cmp 4 at a time while theres room
86 BNE R8,R9, ne /* only works because big endian */
91 * last loop, cmp byte at a time