Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save OrganizationUsername/ff2c30e4c761a41ec6533e95bef097a1 to your computer and use it in GitHub Desktop.

Select an option

Save OrganizationUsername/ff2c30e4c761a41ec6533e95bef097a1 to your computer and use it in GitHub Desktop.
max float value's index
#LINQPad optimize+
//https://blog.marcgravell.com/2018/01/sorting-myself-out-extreme-edition.html
//https://stackoverflow.com/questions/66631585/efficient-comparison-of-2-integers-against-another-2
//https://github.com/Treit/MiscBenchmarks/blob/main/MinMaxNumericValues/Benchmark.cs
//https://medium.com/@norm.bryar/faster-use-of-bool-f541d349fce4
//https://www.extremeoptimization.com/Documentation/Vector-and-Matrix/Vectors/Accessing-vector-elements.aspx
//SmallTest st = new SmallTest();
//st.GlobalSetup();
//st.OrdinaryMax().Dump();
//st.VectorMax().Dump();
Main();
void Main() { var summary = BenchmarkRunner.Run<SmallTest>(); }
//[ShortRunJob]
[MemoryDiagnoser]
public class SmallTest
{
[Params(80_000_000)] public int IterationCount { get; set; }
Random ran;
float[] numbers;
int[] indices;
[GlobalSetup]
public void GlobalSetup()
{
ran = new Random(1);
numbers = new float[IterationCount];
indices = new int[IterationCount];
for (var i = 0; i < IterationCount; i++) { numbers[i] = (ran.NextInt64()); indices[i] = i; }
}
//[Benchmark]
public float OrdinaryMax()
{
var result = float.MinValue;
foreach (var n in numbers) { result = Math.Max(n, result); }
return result;
}
//[Benchmark]
public float IfMax()
{
var result = float.MinValue;
foreach (var n in numbers) { if (n > result) { result = n; } }
return result;
}
[Benchmark(Baseline = true)]
public int IfMaxIndex()
{
var result = float.MinValue;
var index = -1;
for (var i = 0; i < numbers.Length; i++)
{
var n = numbers[i];
if (n > result) { result = n; index = i; }
}
return index;
}
//[Benchmark]
public int IfMaxIndexSpan()
{
var result = float.MinValue;
var index = -1;
var numberSpan = numbers.AsSpan();
for (var i = 0; i < numberSpan.Length; i++)
{
var n = numberSpan[i];
if (n > result) { result = n; index = i; }
}
return index;
}
//[Benchmark]
public float TernaryMax()
{
var result = float.MinValue;
foreach (var n in numbers) { result = (n > result) ? n : result; /* https://github.com/dotnet/fsharp/issues/13098 */}
return result;
}
//[Benchmark]
public int TernaryMaxIndex()
{
var result = float.MinValue;
var index = -1;
for (var i = 0; i < numbers.Length; i++)
{
var n = numbers[i];
index = ((n > result) ? i : index);
result = (n > result) ? n : result;
/* https://github.com/dotnet/fsharp/issues/13098 */
}
return index;
}
//[Benchmark]
public float VectorMax() /* https://github.com/CBGonzalez/SIMDIntro */
{
var result = float.MinValue;
var vResult = new Vector<float>(float.MinValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
{
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count) { vResult = Vector.Max(new Vector<float>(numbers, i), vResult); }
for (; i < numbers.Length; i++) { result = (numbers[i] > result) ? numbers[i] : result; }
}
for (var i = 0; i < Vector<float>.Count; i++) { result = (vResult[i] > result) ? vResult[i] : result; }
return result;
}
//[Benchmark]
public int VectorMaxIndex()
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MaxValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var tempIndices = new Vector<int>();
{
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count)
{
tempIndices = new Vector<int>(new[] { 0 + i, 1 + i, 2 + i, 3 + i, 4 + i, 5 + i, 6 + i, 7 + i });
var nextThing = new Vector<float>(numbers, i);
var greaterMask = Vector.GreaterThan(nextThing, vResult);
vResult = Vector.Max(nextThing, vResult);
vIndices = Vector.Min(Vector.Multiply(greaterMask, tempIndices), vIndices);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
}
for (var i = 0; i < Vector<float>.Count; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (vResult[i] > result) ? vResult[i] : result;
}
return -indexResult;
}
//[Benchmark]
public int VectorMaxIndexArray()
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MaxValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var tempIndices = new Vector<int>();
var tempArray = new int[8];
{
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count)
{
tempArray[0] = 0 + i;
tempArray[1] = 1 + i;
tempArray[2] = 2 + i;
tempArray[3] = 3 + i;
tempArray[4] = 4 + i;
tempArray[5] = 5 + i;
tempArray[6] = 6 + i;
tempArray[7] = 7 + i;
tempIndices = new Vector<int>(tempArray);
var nextThing = new Vector<float>(numbers, i);
var greaterMask = Vector.GreaterThan(nextThing, vResult);
vResult = Vector.Max(nextThing, vResult);
vIndices = Vector.Min(Vector.Multiply(greaterMask, tempIndices), vIndices);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
}
for (var i = 0; i < Vector<float>.Count; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (vResult[i] > result) ? vResult[i] : result;
}
return -indexResult;
}
//[Benchmark]
public int VectorMaxIndexArrayBitwiseAnd()
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MaxValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var tempIndices = new Vector<int>();
var tempArray = new int[8];
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count)
{
tempArray[0] = 0 + i;
tempArray[1] = 1 + i;
tempArray[2] = 2 + i;
tempArray[3] = 3 + i;
tempArray[4] = 4 + i;
tempArray[5] = 5 + i;
tempArray[6] = 6 + i;
tempArray[7] = 7 + i;
tempIndices = new Vector<int>(tempArray);
var nextThing = new Vector<float>(numbers, i);
var greaterThan = Vector.GreaterThan(nextThing, vResult);
var multiplication = Vector.BitwiseAnd(greaterThan, tempIndices);
vIndices = Vector.Max(multiplication, vIndices);
vResult = Vector.Max(nextThing, vResult);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
for (var j = 0; j < Vector<float>.Count; j++)
{
indexResult = (numbers[j] > result) ? vIndices[j] : indexResult;
result = (vResult[j] > result) ? vResult[j] : result;
}
return -indexResult;
}
//[Benchmark]
public unsafe int VectorMaxIndexArrayBitwiseSpan()
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MinValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var tempArray = new int[8];
var numberSpan = numbers.AsSpan();
var indicesSpan = indices.AsSpan();
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count)
{
var nextThing = new Vector<float>(numberSpan.Slice(i, 8));
var greaterThan = Vector.GreaterThan(nextThing, vResult);
var multiplication = Vector.BitwiseAnd(greaterThan, new Vector<int>(indicesSpan.Slice(i, 8)));
vIndices = Vector.Max(multiplication, vIndices);
vResult = Vector.Max(nextThing, vResult);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
for (var j = 0; j < Vector<float>.Count; j++)
{
indexResult = (numbers[j] > result) ? vIndices[j] : indexResult;
result = (vResult[j] > result) ? vResult[j] : result;
}
return indexResult;
}
[Benchmark]
public unsafe int VectorMaxIndexArrayBitwiseUnaligned()
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MinValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var i = 0;
ref var numbersReference = ref numbers[0];
ref var indicesReference = ref indices[0];
for (; i < length - remaining; i += Vector<float>.Count)
{
var nextThing = Unsafe.ReadUnaligned<Vector<float>>(ref Unsafe.As<float, byte>(ref Unsafe.Add(ref numbersReference, i)));
var nextIndices = Unsafe.ReadUnaligned<Vector<int>>(ref Unsafe.As<int, byte>(ref Unsafe.Add(ref indicesReference, i)));
var greaterThan = Vector.GreaterThan(nextThing, vResult);
var multiplication = Vector.BitwiseAnd(greaterThan, nextIndices);
vIndices = Vector.Max(multiplication, vIndices);
vResult = Vector.Max(nextThing, vResult);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
for (var j = 0; j < Vector<float>.Count; j++)
{
indexResult = (numbers[j] > result) ? vIndices[j] : indexResult;
result = (vResult[j] > result) ? vResult[j] : result;
}
return indexResult;
}
[Benchmark]
public unsafe int MaxIndexV()
{
int i = 0;
Vector256<float> result = Vector256.Create(float.MinValue);
Vector256<int> current = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7);
Vector256<int> indexor = Vector256.Create(8);
Vector256<int> indices = Vector256<int>.Zero;
Span<int> vIndices = stackalloc int[Vector256<int>.Count];
for (; i <= numbers.Length - Vector256<float>.Count; i += Vector256<float>.Count)
{
var v0 = Vector256.LoadUnsafe(ref numbers[i]);
var mask = Vector256.GreaterThan(v0, result);
result = Vector256.ConditionalSelect(mask, v0, result);
indices = Vector256.ConditionalSelect(mask.AsInt32(), current, indices);
current += indexor;
}
Vector256.StoreUnsafe(indices, ref vIndices[0]);
float maxValue = float.MinValue;
int index = 0;
for (; i < numbers.Length; i++)
{
if (numbers[i] > maxValue)
{
maxValue = numbers[i];
index = i;
}
}
for (int j = 0; j < vIndices.Length; j++)
{
if (numbers[vIndices[j]] > maxValue)
{
maxValue = numbers[vIndices[j]];
index = vIndices[j];
}
}
return index;
}
[Benchmark]
public int MaxIndexV2()
{
int i = 0;
Vector256<float> result = Vector256.Create(float.MinValue);
Vector256<int> current = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7);
Vector256<int> indexor = Vector256.Create(8);
Vector256<int> indices = Vector256<int>.Zero;
Span<int> vIndices = stackalloc int[Vector256<int>.Count];
for (; i <= numbers.Length - Vector256<float>.Count; i += Vector256<float>.Count)
{
var v0 = Vector256.LoadUnsafe(ref numbers[i]);
var mask = Vector256.GreaterThan(v0, result);
result = Vector256.ConditionalSelect(mask, v0, result);
indices = Vector256.ConditionalSelect(mask.AsInt32(), current, indices);
/* current += indexor; current += indexor generates bad code gen because indexor is a vector constant */
FixCodeGen(ref current, ref indexor);
}
Vector256.StoreUnsafe(indices, ref vIndices[0]);
float maxValue = float.MinValue;
int index = 0;
for (; i < numbers.Length; i++)
{
if (numbers[i] > maxValue)
{
maxValue = numbers[i];
index = i;
}
}
for (int j = 0; j < vIndices.Length; j++)
{
if (numbers[vIndices[j]] > maxValue)
{
maxValue = numbers[vIndices[j]];
index = vIndices[j];
}
}
return index;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] static void FixCodeGen(ref Vector256<int> current, ref Vector256<int> indexor) { current += indexor; }
//[Benchmark]
public unsafe int VectorMaxIndexArrayBitwiseUnalignedLessMax() //I can't beat max
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MinValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var allOnes = new Vector<int>(int.MaxValue); /*Convert.ToString(allOnes[0], 2).Dump();*/
byte[] b = BitConverter.GetBytes(Convert.ToInt32(Convert.ToString(int.MaxValue, 2), 2));
var oneFloat = BitConverter.ToSingle(b, 0);
var allOnesF = new Vector<float>(oneFloat); //System.Numerics.IBinaryNumber<System.Single>.AllBitsSet
var i = 0;
ref var numbersReference = ref numbers[0];
ref var indicesReference = ref indices[0];
for (; i < length - remaining; i += Vector<float>.Count)
{
var nextThing = Unsafe.ReadUnaligned<Vector<float>>(ref Unsafe.As<float, byte>(ref Unsafe.Add(ref numbersReference, i)));
var nextIndices = Unsafe.ReadUnaligned<Vector<int>>(ref Unsafe.As<int, byte>(ref Unsafe.Add(ref indicesReference, i)));
var greaterThan = Vector.GreaterThan(nextThing, vResult);
var multiplication = Vector.BitwiseAnd(greaterThan, nextIndices);
//vIndices = Vector.Max(multiplication, vIndices).Dump("max"); /* Keep this one. The two lines below aren't worth it. */
vIndices = Vector.BitwiseAnd(allOnes, vIndices);
vIndices = Vector.Add(Vector.BitwiseAnd(greaterThan, nextIndices), vIndices);
vResult = Vector.Max(nextThing, vResult);
//vResult = Vector.BitwiseAnd<float>(allOnesF, vResult);
//vResult = Vector.Add(Vector.BitwiseAnd<float>(Vector.ConvertToSingle(greaterThan), nextThing), vResult);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
for (var j = 0; j < Vector<float>.Count; j++)
{
indexResult = (numbers[j] > result) ? vIndices[j] : indexResult;
result = (vResult[j] > result) ? vResult[j] : result;
}
return indexResult;
}
//[Benchmark]
public int VectorMaxIndexArrayVector() //VectorMaxIndexArray was better
{
var result = float.MinValue;
var indexResult = int.MinValue;
var vResult = new Vector<float>(float.MinValue);
var vIndices = new Vector<int>(int.MaxValue);
var length = numbers.Length;
var remaining = length % Vector<float>.Count;
var tempIndices = new Vector<int>();
var tempArray = new int[8];
var greaterMask = new Vector<int>(0);
var nextThing = new Vector<float>(0);
{
var i = 0;
for (; i < length - remaining; i += Vector<float>.Count)
{
tempArray[0] = 0 + i;
tempArray[1] = 1 + i;
tempArray[2] = 2 + i;
tempArray[3] = 3 + i;
tempArray[4] = 4 + i;
tempArray[5] = 5 + i;
tempArray[6] = 6 + i;
tempArray[7] = 7 + i;
tempIndices = new Vector<int>(tempArray);
nextThing = new Vector<float>(numbers, i);
greaterMask = Vector.GreaterThan(nextThing, vResult);
vResult = Vector.Max(nextThing, vResult);
vIndices = Vector.Min(Vector.Multiply(greaterMask, tempIndices), vIndices);
}
for (; i < numbers.Length; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (numbers[i] > result) ? numbers[i] : result;
}
}
for (var i = 0; i < Vector<float>.Count; i++)
{
indexResult = (numbers[i] > result) ? vIndices[i] : indexResult;
result = (vResult[i] > result) ? vResult[i] : result;
}
return -indexResult;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment