+ /* Compute the CRC on as many N z_word_t blocks as are available. */
+ blks = len / (N * W);
+ len -= blks * N * W;
+ words = (z_word_t const *)buf;
+
+ /* Do endian check at execution time instead of compile time, since ARM
+ processors can change the endianess at execution time. If the
+ compiler knows what the endianess will be, it can optimize out the
+ check and the unused branch. */
+ endian = 1;
+ if (*(unsigned char *)&endian) {
+ /* Little endian. */
+
+ z_crc_t crc0;
+ z_word_t word0;
+#if N > 1
+ z_crc_t crc1;
+ z_word_t word1;
+#if N > 2
+ z_crc_t crc2;
+ z_word_t word2;
+#if N > 3
+ z_crc_t crc3;
+ z_word_t word3;
+#if N > 4
+ z_crc_t crc4;
+ z_word_t word4;
+#if N > 5
+ z_crc_t crc5;
+ z_word_t word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+ /* Initialize the CRC for each braid. */
+ crc0 = crc;
+#if N > 1
+ crc1 = 0;
+#if N > 2
+ crc2 = 0;
+#if N > 3
+ crc3 = 0;
+#if N > 4
+ crc4 = 0;
+#if N > 5
+ crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+ /*
+ Process the first blks-1 blocks, computing the CRCs on each braid
+ independently.
+ */
+ while (--blks) {
+ /* Load the word for each braid into registers. */
+ word0 = crc0 ^ words[0];
+#if N > 1
+ word1 = crc1 ^ words[1];
+#if N > 2
+ word2 = crc2 ^ words[2];
+#if N > 3
+ word3 = crc3 ^ words[3];
+#if N > 4
+ word4 = crc4 ^ words[4];
+#if N > 5
+ word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+ words += N;
+
+ /* Compute and update the CRC for each word. The loop should
+ get unrolled. */
+ crc0 = crc_braid_table[0][word0 & 0xff];
+#if N > 1
+ crc1 = crc_braid_table[0][word1 & 0xff];
+#if N > 2
+ crc2 = crc_braid_table[0][word2 & 0xff];
+#if N > 3
+ crc3 = crc_braid_table[0][word3 & 0xff];
+#if N > 4
+ crc4 = crc_braid_table[0][word4 & 0xff];
+#if N > 5
+ crc5 = crc_braid_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+ for (k = 1; k < W; k++) {
+ crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+ crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+ crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+ crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+ crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+ crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+ }
+ }
+
+ /*
+ Process the last block, combining the CRCs of the N braids at the
+ same time.
+ */
+ crc = crc_word(crc0 ^ words[0]);
+#if N > 1
+ crc = crc_word(crc1 ^ words[1] ^ crc);
+#if N > 2
+ crc = crc_word(crc2 ^ words[2] ^ crc);
+#if N > 3
+ crc = crc_word(crc3 ^ words[3] ^ crc);
+#if N > 4
+ crc = crc_word(crc4 ^ words[4] ^ crc);
+#if N > 5
+ crc = crc_word(crc5 ^ words[5] ^ crc);
+#endif
+#endif
+#endif
+#endif
+#endif
+ words += N;
+ }
+ else {
+ /* Big endian. */
+
+ z_word_t crc0, word0, comb;
+#if N > 1
+ z_word_t crc1, word1;
+#if N > 2
+ z_word_t crc2, word2;
+#if N > 3
+ z_word_t crc3, word3;
+#if N > 4
+ z_word_t crc4, word4;
+#if N > 5
+ z_word_t crc5, word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+ /* Initialize the CRC for each braid. */
+ crc0 = byte_swap(crc);
+#if N > 1
+ crc1 = 0;
+#if N > 2
+ crc2 = 0;
+#if N > 3
+ crc3 = 0;
+#if N > 4
+ crc4 = 0;
+#if N > 5
+ crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+ /*
+ Process the first blks-1 blocks, computing the CRCs on each braid
+ independently.
+ */
+ while (--blks) {
+ /* Load the word for each braid into registers. */
+ word0 = crc0 ^ words[0];
+#if N > 1
+ word1 = crc1 ^ words[1];
+#if N > 2
+ word2 = crc2 ^ words[2];
+#if N > 3
+ word3 = crc3 ^ words[3];
+#if N > 4
+ word4 = crc4 ^ words[4];
+#if N > 5
+ word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+ words += N;
+
+ /* Compute and update the CRC for each word. The loop should
+ get unrolled. */
+ crc0 = crc_braid_big_table[0][word0 & 0xff];
+#if N > 1
+ crc1 = crc_braid_big_table[0][word1 & 0xff];
+#if N > 2
+ crc2 = crc_braid_big_table[0][word2 & 0xff];
+#if N > 3
+ crc3 = crc_braid_big_table[0][word3 & 0xff];
+#if N > 4
+ crc4 = crc_braid_big_table[0][word4 & 0xff];
+#if N > 5
+ crc5 = crc_braid_big_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+ for (k = 1; k < W; k++) {
+ crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+ crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+ crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+ crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+ crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+ crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+ }
+ }
+
+ /*
+ Process the last block, combining the CRCs of the N braids at the
+ same time.
+ */
+ comb = crc_word_big(crc0 ^ words[0]);
+#if N > 1
+ comb = crc_word_big(crc1 ^ words[1] ^ comb);
+#if N > 2
+ comb = crc_word_big(crc2 ^ words[2] ^ comb);
+#if N > 3
+ comb = crc_word_big(crc3 ^ words[3] ^ comb);
+#if N > 4
+ comb = crc_word_big(crc4 ^ words[4] ^ comb);
+#if N > 5
+ comb = crc_word_big(crc5 ^ words[5] ^ comb);
+#endif
+#endif
+#endif
+#endif
+#endif
+ words += N;
+ crc = byte_swap(comb);
+ }
+
+ /*
+ Update the pointer to the remaining bytes to process.
+ */
+ buf = (unsigned char const *)words;