Here are my best strategies for reversing n-bit words: n a(n) == number of butterfly steps 0 0 1 0 2 1 [ 1, 1 ] 3 1 [ 1, 1, 1 ] 4 2 [ 2, 2 ] 5 2 [ 2, 1, 2 ] 6 2 [ 3, 3 ] 7 2 [ 3 1 3 ] 8 3 [ 4 4 ] 9 2 [ 3 3 3 ] 10 3 [ 5, 5 ] 11 3 [ 1, 9, 1 ] 12 3 [ 3, 3, 3, 3 ] 13 3 [ 6, 1, 6 ] 14 3 [ 7, 7 ] 15 3 [ 5, 5, 5 ] 16 4 [ 8, 8 ] 17 4 [ 8, 1, 8 ] 18 3 [ 9, 9 ] 19 3 [ 9, 1, 9 ] 20 4 [ 10, 10 ] [ 9, 1, 1, 9 ] 21 3 [ 9, 3, 9 ] 22 4 [ 9, 5, 9 ] [ 11, 11 ] 23 4 [ 9, 3, 3, 9 ] [ 11, 1, 11 ] 24 4 [ 8, 8, 8 ] [ 9, 3, 3, 9 ] 25 4 [ 5, 5, 5, 5 ] 26 4 [ 13, 13 ] 27 3 [ 9, 9, 9 ] 28 4 [ 7, 7, 7, 7 ] [ 14, 14 ] 29 4 [ 14, 1, 14 ] [ 9, 1, 9, 1, 9 ] 30 4 [ 10, 10, 10 ] 31 4 [ 5 X 3, 1, 5 X 3 ] 32 5 [ 16, 16 ] 33 4 [ 11, 11, 11 ] 34 5 [ 17, 17 ] 63 4 [ 27, 9, 27 ] 64 6 [ 32, 32 ] 81 4 [ 27, 27, 27 ] 243 5 [ 81, 81, 81 ] This coincides with http://oeis.org/A064415 a(1) = 0, a(n) = iter(n) if n is even, a(n) = iter(n)-1 if n is odd, where iter(n) = A003434(n) = smallest number of iterations of Euler totient function phi needed to reach 1. A064415 ,0,1,1,2,2,2,2,3,2,3,3,3,3,3,3,4,4,3,3,4,3,4,4,4,4,4,3,4,4,4,4,5,4,5,4,4,4,4,4,5,5,4,4,5,4,5,5, ... For all integers m >0 and n>0 a(m*n)=a(m)+a(n). The function a(n) is completely additive. The smallest integer q which satisfy the equation a(q)=n is 2^q, the greatest is 3^q. And apparently also with: http://oeis.org/A054725 a[1]=1; a[m]= sum[a[p-1]], where sum is over all primes (not necessarily distinct), p, that divide m. A054725 ,1,1,1,2,2,2,2,3,2,3,3,3,3,3,3,4,4,3,3,4,3,4,4,4,4,4,3,4,4,4,4,5,4,5,4,4,4,4,4,5,5,4,4,5,4,5,5,5, ... This one has a(0) "wrong". For your viewing pleasure, the 63 and 31 bit reversal routines that have the minimal number of butterfly steps: ulong jj3_revbin63(ulong x) // a(63) = 4 { ulong z; #if 1 z = (x ^ (x >> 2)) & 0x1249249249249249UL; x ^= z ^ (z << 2); // low 3 bits reversed z = (x ^ (x >> 6)) & 0x01c0e070381c0e07UL; x ^= z ^ (z << 6); // low 9 bits reversed z = (x ^ (x >> 18)) & 0x00001ff0000001ffUL; x ^= z ^ (z << 18); // low 27 bits reversed z = (x ^ (x >> 36)) & 0x0000000007ffffffUL; x ^= z ^ (z << 36); // low 63 bits reversed #else // same code using GCC's binary literals z = (x ^ (x >> 2)) & 0b0001001001001001001001001001001001001001001001001001001001001001UL; x ^= z ^ (z << 2); z = (x ^ (x >> 6)) & 0b0000000111000000111000000111000000111000000111000000111000000111UL; x ^= z ^ (z << 6); z = (x ^ (x >> 18)) & 0b0000000000000000000111111111000000000000000000000000000111111111UL; x ^= z ^ (z << 18); z = (x ^ (x >> 36)) & 0b0000000000000000000000000000000000000111111111111111111111111111UL; x ^= z ^ (z << 36); #endif // x = (x>>63) | (x<<1); // rotate left by 1 to have all 64 bits reversed return x; } uint jj3_revbin31(uint x) // a(31) = 4 as well! { uint z; #if 1 z = (x ^ (x >> 2)) & 0x12491249U; x ^= z ^ (z << 2); z = (x ^ (x >> 6)) & 0x00380038U; x ^= z ^ (z << 6); z = (x ^ (x >> 12)) & 0x00070007U; x ^= z ^ (z << 12); z = (x ^ (x >> 16)) & 0x00007fffU; x ^= z ^ (z << 16); #else // same code using GCC's binary literals z = (x ^ (x >> 2)) & 0b00010010010010010001001001001001U; x ^= z ^ (z << 2); z = (x ^ (x >> 6)) & 0b00000000001110000000000000111000U; x ^= z ^ (z << 6); z = (x ^ (x >> 12)) & 0b00000000000001110000000000000111U; x ^= z ^ (z << 12); z = (x ^ (x >> 16)) & 0b00000000000000000111111111111111U; x ^= z ^ (z << 16); #endif // x = (x>>31) | (x<<1); // rotate left by 1 to have all 32 bits reversed return x; } I'd be quite surprised to see anything better! Best, jj