namespace System { 
    // Little-Endian based approaches most likely do not work on big-endian hardware.
    // Code based on examples found in
    // Bit Twiddling hacks: 
    // https://graphics.stanford.edu/~seander/bithacks.html
    // Chess programming wiki:
    // https://chessprogramming.wikispaces.com/BitScan
    public static class Bits
    {

        public static int PopCount(ulong value)
        {
            // Use a SWAR technique to count the population in parrallel.
            // I have no idea about naming these constants.
            const ulong k1 = ~0ul / 3;
            const ulong k2 = ~0ul / 5;
            const ulong k4 = ~0ul / 17;
            const ulong kf = ~0ul / 255;

            value = value - ((value >> 1) & k1);
            value = (value & k2) + ((value >> 2) & k2);
            value = (value + (value >> 4)) & k4;
            value = (value * kf) >> 56; //
            return (int) value;
        }

        public static int PopCount(long value) => PopCount((ulong) value);

        // these might be faster due to the cheaper opcode encoding of constants.
        public static int PopCount(uint value)
        {
            const uint k1 = ~0u / 3;
            const uint k2 = ~0u / 5;
            const uint k4 = ~0u / 17;
            const uint kf = ~0u / 255;

            value = value - ((value >> 1) & k1);
            value = (value & k2) + ((value >> 2) & k2);
            value = (value + (value >> 4)) & k4;
            value = (value * kf) >> 24;
            return (int) value;
        }

        public static int PopCount(int value) => PopCount((uint) value);
        // CoreCLR optimizes these rotations into rol/ror opcodes:
        // See CoreCLR#1830
        public static ulong RotateLeft(ulong value, int offset)
        {
            return (value << (offset & 63)) | (value >> ((64 - offset) & 63));
        }

        public static ulong RotateLeft(uint value, int offset)
        {
            return (value << (offset & 31)) | (value >> ((32 - offset) & 31));
        }

        public static ulong RotateRight(ulong value, int offset)
        {
            return (value >> (offset & 63)) | (value << ((64 - offset) & 63));
        }

        public static ulong RotateRight(uint value, int offset)
        {
            return (value >> (offset & 31)) | (value << ((32 - offset) & 31));
        }

        // How this works::
        // In the count trailing zeros function we perform least significant bit (lsb) isolation.
        // Doing so we take our long value and convert it into something like (2^N), where n is the lsb. 
        // Multiplying anything by an integer than only has the n-th bit set is equivalent to
        // a left shift by n. This constant represents 64 overlapping 6 bit sequences (0-63) which are 
        // accessible by shifting, and then extracting the highest byte in the word. These unique values
        // essentially allow us to treat this as a minimal perfect hash into the table below to look up
        // the bit positions.
        // We don't use traditional lsb isolation (x & (x-1)) which just has the n-th bit set, but a form
        // that sets all lower bits as well. Thus we now have 2*(2^N)-1. For the most part this just means
        // we have to increase the value we right shift by one (e.g. instead of shifting by 57 we shift by 58).
        // This form is slightly faster on intel core architecture.
        // THe other reason to do this, is that we can reuse the same const and table for count leading zeroes.
        private const ulong Debruijn64 = 0x03F79D71B4CB0A89UL;

        private static readonly int[] Debruijn64Table =
        {
            0,  47, 1,  56, 48, 27, 2,  60,
            57, 49, 41, 37, 28, 16, 3,  61,
            54, 58, 35, 52, 50, 42, 21, 44,
            38, 32, 29, 23, 17, 11, 4,  62,
            46, 55, 26, 59, 40, 36, 15, 53,
            34, 51, 20, 43, 31, 22, 10, 45,
            25, 39, 14, 33, 19, 30, 9,  24,
            13, 18, 8,  12, 7,   6, 5,  63
        };

        private const uint Debruijn32 = 0x07C4ACDDU;

        private static readonly int[] Debruijn32Table =
        {
            0,  9,  1,  10, 13, 21, 2,  29,
            11, 14, 16, 18, 22, 25, 3,  30,
            8,  12, 20, 28, 15, 17, 24,  7,
            19, 27, 23,  6, 26,  5,  4, 31
        };

        // BitScanReverse/BitScanForward could be an interesting compliment to these methods as we then would not need
        // this ^ 63 or the check against zero (bitscans are undefined on zero).
        public static int CountLeadingZeroes(ulong value)
        {
            if (value == 0) return 64;
            value |= value >> 1;
            value |= value >> 2;
            value |= value >> 4;
            value |= value >> 8;
            value |= value >> 16;
            value |= value >> 32;
            return Debruijn64Table[(value * Debruijn64) >> 58] ^ 63;
        }

        public static int CountLeadingZeroes(long value) => CountLeadingZeroes((ulong) value);

        public static int CountLeadingZeroes(uint value)
        {
            if (value == 0) return 32;
            value |= value >> 1;
            value |= value >> 2;
            value |= value >> 4;
            value |= value >> 8;
            value |= value >> 16;
            return Debruijn32Table[(value * Debruijn32) >> 27] ^ 31;
        }

        public static int CountLeadingZeroes(int value) => CountLeadingZeroes((uint) value);

        public static int CountTrailingZeroes(ulong value)
        {
            if (value == 0) return 64;
            return Debruijn64Table[((value ^ (value - 1)) * Debruijn64) >> 58];
        }

        public static int CountTrailingZeroes(long value) => CountTrailingZeroes((ulong) value);

        public static int CountTrailingZeroes(uint value)
        {
            if (value == 0) return 32;
            return Debruijn32Table[((value ^ (value - 1)) * Debruijn32) >> 27];
        }

        public static int CountTrailingZeroes(int value) => CountTrailingZeroes((uint) value);

        public static bool ReadBit(ulong value, int offset)
        {
            return (value & (1ul << offset)) != 0;
        }

        public static bool ReadBit(long value, int offset) => ReadBit((ulong) value, offset);


        public static bool ReadBit(uint value, int offset)
        {
            return (value & (1u << offset)) != 0;
        }

        public static bool ReadBit(int value, int offset) => ReadBit((uint) value, offset);


        public static ulong WriteBit(ulong value, int offset, bool toSet)
        {
            return (value & ~(0x1ul << offset)) | ((toSet ? 1ul : 0ul) << offset);
        }

        public static long WriteBit(long value, int offset, bool toSet) => (long) WriteBit((ulong) value, offset, toSet);

        public static uint WriteBit(uint value, int offset, bool toSet)
        {
            return (value & ~(0x1u << offset)) | ((toSet ? 1u : 0u) << offset);
        }

        public static int WriteBit(int value, int offset, bool toSet) => (int) WriteBit((uint) value, offset, toSet);


        public static byte ReadByte(ulong value, int offset)
        {
            return (byte) ((value & (0xFFul << (offset * 8))) >> (offset * 8));
        }

        public static byte ReadByte(long value, int offset) => ReadByte((ulong) value, offset);

        public static byte ReadByte(uint value, int offset)
        {
            return (byte) ((value & (0xFFu << (offset * 8))) >> (offset * 8));
        }

        public static byte ReadByte(int value, int offset) => ReadByte((uint) value, offset);

        public static short ReadInt16(ulong value, int offset)
        {
            return (short) ((value & (0xFFFFul << (offset * 16))) >> (offset * 16));
        }

        public static short ReadInt16(long value, int offset) => ReadInt16((ulong) value, offset);

        public static short ReadInt16(uint value, int offset)
        {
            return (short) ((value & (0xFFFFu << (offset * 16))) >> (offset * 16));
        }

        public static short ReadInt16(int value, int offset) => ReadInt16((uint) value, offset);


        public static int ReadInt32(ulong value, int offset)
        {
            return (int) ((value & (~0u << (offset * 32))) >> (offset * 32));
        }

        public static int ReadInt32(long value, int offset) => ReadInt32((ulong) value, offset);


        public static ulong WriteByte(ulong value, int offset, byte toSet)
        {
            return (value & ~(0xFFul << (offset * 8))) | ((ulong) toSet << (offset * 8));
        }

        public static long WriteByte(long value, int offset, byte toSet)
            => (long) WriteByte((ulong) value, offset, toSet);

        public static uint WriteByte(uint value, int offset, byte toSet)
        {
            return (value & ~(0xFFu << (offset * 8))) | ((uint) toSet << (offset * 8));
        }

        public static int WriteByte(int value, int offset, byte toSet) => (int) WriteByte((uint) value, offset, toSet);

        // sign extension is the devil and screws this all up.
        // Essentially, when converting a smaller signed type to ulong, the highest bit is extended to
        // the remaining bits. For a positive value this works as expected, but for negative number
        // this has the unexpected result of setting all the higher bits, thus using something like
        // setInt16(value,0,unchecked((short)0x8000)) will result setting the value to 0xFFFFFFFFFF8000
        // to fix this we first convert to the unsigned of the same type.
        public static ulong WriteInt16(ulong value, int offset, short toSet)
        {
            return (value & ~(0xFFFFul << (offset * 16))) | ((ulong) (ushort) toSet << (offset * 16));
        }

        public static long WriteInt16(long value, int offset, short toSet)
            => (long) WriteInt16((ulong) value, offset, toSet);

        public static uint WriteInt16(uint value, int offset, short toSet)
        {
            return (value & ~(0xFFFFu << (offset * 16))) | ((uint) (ushort) toSet << (offset * 16));
        }

        public static int WriteInt16(int value, int offset, short toSet)
            => (int) WriteInt16((uint) value, offset, toSet);


        public static ulong WriteInt32(ulong value, int offset, int toSet)
        {
            return (value & ~(0xFFFFFFFFul << (offset * 32))) | ((ulong) (uint) toSet << (offset * 32));
        }

        public static long WriteInt32(long value, int offset, int toSet)
            => (long) WriteInt32((ulong) value, offset, toSet);
    }
}