Skip to content

Instantly share code, notes, and snippets.

@HadrienG2
Last active April 3, 2020 15:31
Show Gist options
  • Select an option

  • Save HadrienG2/b21b35319470e2a025d6c3fe8a8792c3 to your computer and use it in GitHub Desktop.

Select an option

Save HadrienG2/b21b35319470e2a025d6c3fe8a8792c3 to your computer and use it in GitHub Desktop.

Revisions

  1. HadrienG2 revised this gist Apr 3, 2020. 1 changed file with 19 additions and 19 deletions.
    38 changes: 19 additions & 19 deletions demo.cpp
    Original file line number Diff line number Diff line change
    @@ -312,19 +312,19 @@ int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    // Move to 1-based and negative indexing
    const int neg_1based_virtual_bin = -global_virtual_bin - 1;

    // Now we have the right index for the first overflow bins, but the indices
    // are wrong after crossing "regular" bins, because our global bin counter
    // takes regular bins into account when it should only consider overflow.
    // At this point, we have an index that represents a count of all bins, both
    // regular and overflow, that are located before the current bin when
    // enumerating histogram bins in row-major order.
    //
    // To fix this, we need to know how many regular bins exist before the
    // current bin (in row-major order), and increment the virtual bin index by
    // this amount. This will un-do the counting of regular bins, thus giving
    // us a count of overflow bins.
    // We will next count the number of _regular_ bins which are located before
    // the current bin, and by removing this offset from the above index, we
    // will get a count of overflow bins that are located before the current bin
    // in row-major order. Which is what we want as our overflow bin index.
    //
    int total_regular_bins_before = 0;

    // First, we need to know how many regular bins we leave behind us for each
    // step on each axis, assuming the bin from which we come was regular.
    // step on each axis, assuming that the bin from which we come was regular.
    //
    // If mathematically inclined, you can also think of this as the size of an
    // hyperplane of regular bins when projecting on lower-numbered dimensions.
    @@ -336,26 +336,26 @@ int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    bin_sizes[dim] = bin_sizes[dim-1] * axes[dim-1]->GetNBinsNoOver();
    }

    // Then, starting from the _last_ dimension...
    // Then, starting from the _last_ histogram dimension...
    for (int dim = NDIMS-1; dim >= 0; --dim) {
    // We can tell how many regular bins lie before us, when accounting for
    // previous full regular bin hyperplanes on this axis.
    // We can tell how many regular bins lie before us on this axis,
    // accounting for the underflow bin of this axis if it has one.
    const int num_underflow_bins = static_cast<int>(!axes[dim]->CanGrow());
    const int num_regular_bins_before =
    std::max(virtual_bins[dim] - num_underflow_bins, 0);
    total_regular_bins_before += num_regular_bins_before * bin_sizes[dim];

    // If we are on an overflow or underflow bin on this axis, we know that
    // we don't need to look at the remaining axes. Projecting on the
    // remaining dimensions would only take us into an hyperplane of
    // over/underflow bins for the current axis, and we know that by
    // construction there will be no regular bins in there.
    // we don't need to look at the remaining axes. Projecting on those
    // dimensions would only take us into an hyperplane of over/underflow
    // bins for the current axis, and we know that by construction there
    // will be no regular bins in there.
    if (bins[dim] < 1) break;
    }

    // At this point, we know how many regular bins would be crossed by
    // enumerating all bins before the current one in row-major order, so we can
    // emit a corrected overflow bin axis that doesn't account for regular bins.
    // Now that we know how many bins lie before us, and how many of those are
    // regular bins, we can trivially deduce how many overflow bins lie before
    // us, and emit that as our global overflow bin index.
    return neg_1based_virtual_bin + total_regular_bins_before;
    }

    @@ -450,7 +450,7 @@ std::array<int, NDIMS> ComputeLocalBins(
    int num_regular_bins_before = 0;
    for (std::size_t dim = NDIMS-1; dim > 0; --dim) {
    // Let's start by computing the contribution of the underflow
    // hyperplane, in which we know there will be no regular bins
    // hyperplane (if any), in which we know there will be no regular bins
    const int num_underflow_hyperplanes =
    static_cast<int>(!axes[dim]->CanGrow());
    const int bins_in_underflow_hyperplane =
  2. HadrienG2 revised this gist Mar 31, 2020. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion demo.cpp
    Original file line number Diff line number Diff line change
    @@ -231,7 +231,7 @@ std::array<int, NDIMS> ComputeLocalBinsRaw(
    result[dim] = global_bin % axis_bin_counts[dim];
    global_bin /= axis_bin_counts[dim];
    }
    ASSERT(global_bin == 0, "Input global axis is out of range");
    ASSERT(global_bin == 0, "Input global bin index is out of range");
    return result;
    }

  3. HadrienG2 revised this gist Mar 31, 2020. 1 changed file with 9 additions and 6 deletions.
    15 changes: 9 additions & 6 deletions demo.cpp
    Original file line number Diff line number Diff line change
    @@ -27,7 +27,10 @@ class EqAxisMock : public IAxis
    public:
    EqAxisMock(double from, double to, int nBinsNoOver, bool canGrow) :
    m_from(from), m_to(to), m_nBinsNoOver(nBinsNoOver), m_canGrow(canGrow)
    {}
    {
    ASSERT(from < to, "Axis limits must be provided in the right order");
    ASSERT(nBinsNoOver > 0, "Invalid number of bins");
    }

    bool CanGrow() const final override {
    return m_canGrow;
    @@ -196,8 +199,8 @@ int ComputeGlobalBinRaw(const std::array<int, NDIMS>& zero_based_bins,
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    ASSERT(zero_based_bins[dim] >= 0,
    "Expected a zero-based local bin index, got a negative one");
    ASSERT(axis_bin_counts[dim] >= 0,
    "Expected a positive bin count, got a negative one");
    ASSERT(axis_bin_counts[dim] >= 1,
    "Expected a positive bin count, got a negative or null one");
    ASSERT(zero_based_bins[dim] < axis_bin_counts[dim],
    "Input local bin index is outside of input axis range");
    result += zero_based_bins[dim] * bin_size;
    @@ -223,8 +226,8 @@ std::array<int, NDIMS> ComputeLocalBinsRaw(
    "Expected a zero-based global bin index, got a negative one");
    std::array<int, NDIMS> result;
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    ASSERT(axis_bin_counts[dim] >= 0,
    "Expected a positive bin count, got a negative one");
    ASSERT(axis_bin_counts[dim] >= 1,
    "Expected a positive bin count, got a negative or null one");
    result[dim] = global_bin % axis_bin_counts[dim];
    global_bin /= axis_bin_counts[dim];
    }
    @@ -464,7 +467,7 @@ std::array<int, NDIMS> ComputeLocalBins(
    // case if some axes are growable, and thus don't have overflow bins.
    if (overflow_bins_per_regular_hyperplane != 0) {
    // If so, we start by cutting off the contribution of the underflow
    // and overflow hyperplanes, to focus specifically on those bins...
    // and overflow hyperplanes, to focus specifically on regular bins.
    const int overflow_bins_in_regular_hyperplanes =
    std::clamp(
    unprocessed_previous_overflow_bins
  4. HadrienG2 revised this gist Mar 31, 2020. 1 changed file with 5 additions and 2 deletions.
    7 changes: 5 additions & 2 deletions demo.cpp
    Original file line number Diff line number Diff line change
    @@ -590,17 +590,20 @@ int main() {
    std::cout << array.back() << ')';
    };
    auto test_iteration = [&](const auto& bins, const auto& axes) {
    // We start from a set of local bin indices
    std::cout << "On true bin ";
    print_array(bins);
    std::cout << ": ";

    // We are able to convert them into a global histogram bin index, and
    // to convert such an index back into local bin indices.
    int global_bin = ComputeGlobalBin(bins, axes);
    auto computed_bins = ComputeLocalBins(global_bin, axes);
    std::cout << "Global bin " << global_bin << " aka ";
    print_array(computed_bins);

    // Once you have those, you can do everything including global
    // FindBin (= local FindBin + local-to-global idx conversion) or
    // Once you have those primitives, you can do everything including
    // global FindBin (= local FindBin + local-to-global idx conversion) or
    // global bin boundary computations (= global-to-local idx
    // conversion + local bin boundary computations)
    if (verbose) {
  5. HadrienG2 revised this gist Mar 31, 2020. 1 changed file with 112 additions and 75 deletions.
    187 changes: 112 additions & 75 deletions demo.cpp
    Original file line number Diff line number Diff line change
    @@ -103,6 +103,9 @@ class EqAxisMock : public IAxis


    // Apply a function to each element of an array and return the array of results
    //
    // You'll need to specify the "Out" type because C++ is bad at type inference.
    //
    template <typename Out, typename In, std::size_t NDIMS, typename UnaryFunction>
    std::array<Out, NDIMS> Map(const std::array<In, NDIMS>& inputs,
    UnaryFunction&& f) {
    @@ -135,12 +138,12 @@ std::array<int, NDIMS> GetNBinsNoOver(const std::array<const IAxis*, NDIMS>& axe
    //
    // A reader experienced with functional programming may recognize a Zip->Map
    // graph and wonder why I'm not just implementing Zip. The answer is that C++'s
    // poor type inference, limited type system vocabulary and ultra-verbose lambda
    // poor type inference, limited type system vocabulary and super-verbose lambda
    // syntax make any nontrivial combination of functional programming constructs
    // look like an unreadable mess.
    //
    // Specialized combinators stress the broken parts of C++ less, and are thus
    // preferrable over their general counterparts unless you need the generality.
    // Specialized combinators ask less from the broken parts of C++, and are thus
    // preferrable over their general counterparts when you don't need generality.
    //
    template <typename Out, typename In, std::size_t NDIMS, typename BinaryFunction>
    std::array<Out, NDIMS> ZipAxisMap(const std::array<In, NDIMS>& inputs,
    @@ -234,13 +237,17 @@ std::array<int, NDIMS> ComputeLocalBinsRaw(
    // convention to a "virtual bin" convention where the underflow bin has index 0
    // and the overflow bin has index N+1 where N is the axis' regular bin count.
    //
    // For growable axes, subtract 1 to index to remain zero-based.
    // For growable axes, subtract 1 from regular indices so that the indexing
    // convention remains zero-based (this means that there will be no "holes" in
    // global binning, which matters more than the choice of regular index base)
    //
    int LocalBinToVirtualBin(int bin, const IAxis& axis) {
    switch (bin) {
    case -1:
    ASSERT(!axis.CanGrow(), "Growable axes shouldn't underflow");
    return 0;
    case -2:
    ASSERT(!axis.CanGrow(), "Growable axes shouldn't overflow");
    return axis.GetNBins() - 1;
    default:
    ASSERT(bin != 0, "0 is not a valid local bin index");
    @@ -254,19 +261,21 @@ int LocalBinToVirtualBin(int bin, const IAxis& axis) {
    // Convert back from zero-based virtual bins to the standard under/overflow bin
    // indexing convention.
    //
    // For growable axes, add 1 to index to become one-based.
    // For growable axes, add 1 in order to go back to the usual 1-based regular bin
    // indexing convention, thus reversing the effect of LocalBinToVirtualBin.
    //
    int VirtualBinToLocalBin(int virtual_bin, const IAxis& axis) {
    if (virtual_bin == 0) {
    if ((!axis.CanGrow()) && (virtual_bin == 0)) {
    return -1;
    } else if (virtual_bin == (axis.GetNBins() - 1)) {
    } else if ((!axis.CanGrow()) && (virtual_bin == (axis.GetNBins() - 1))) {
    return -2;
    } else {
    ASSERT(virtual_bin >= 1,
    const int regular_bin_offset = -static_cast<int>(axis.CanGrow());
    ASSERT(virtual_bin >= 1 + regular_bin_offset,
    "Expected a zero-based virtual bin index, got a negative one");
    ASSERT(virtual_bin <= axis.GetNBinsNoOver(),
    ASSERT(virtual_bin <= axis.GetNBinsNoOver() + regular_bin_offset,
    "Input virtual bin index is out of axis range");
    return virtual_bin + static_cast<int>(axis.CanGrow());
    return virtual_bin - regular_bin_offset;
    }
    }

    @@ -286,9 +295,9 @@ int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    ) + 1;
    }

    // Convert bin indices to another coordinate system where the underflow bin
    // has coordinate 0, regular bins have coordinates [1, N], and the overflow
    // bin has coordinate N+1, where N is GetNBinsNoOver().
    // Convert bin indices to a zero-based coordinate system where the underflow
    // bin (if any) has coordinate 0 and the overflow bin (if any) has
    // coordinate N-1, where N is the axis' total number of bins.
    std::array<int, NDIMS> virtual_bins =
    ZipAxisMap<int>(bins, axes, LocalBinToVirtualBin);

    @@ -301,20 +310,20 @@ int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    const int neg_1based_virtual_bin = -global_virtual_bin - 1;

    // Now we have the right index for the first overflow bins, but the indices
    // are wrong after crossing "regular" bins, because the global bin counter
    // are wrong after crossing "regular" bins, because our global bin counter
    // takes regular bins into account when it should only consider overflow.
    //
    // To fix this, we need to know how many regular bins exist before the
    // current bin (in row-major order), and increment the virtual bin index by
    // this amount. This will un-do the counting of regular bins, thus giving
    // us a count of overflow bins.
    //
    int num_regular_bins_before = 0;
    int total_regular_bins_before = 0;

    // First, we need to know how many regular bins we leave behind us for each
    // step on each axis, assuming the bin from which we come was regular.
    //
    // If mathematically inclined, you can think of this as the size of an
    // If mathematically inclined, you can also think of this as the size of an
    // hyperplane of regular bins when projecting on lower-numbered dimensions.
    // See the docs of ComputeLocalBins for more on this worldview.
    //
    @@ -327,21 +336,24 @@ int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    // Then, starting from the _last_ dimension...
    for (int dim = NDIMS-1; dim >= 0; --dim) {
    // We can tell how many regular bins lie before us, when accounting for
    // previous hyperplanes on this axis.
    const int num_bins_before = std::max(virtual_bins[dim] - 1, 0);
    num_regular_bins_before += num_bins_before * bin_sizes[dim];
    // previous full regular bin hyperplanes on this axis.
    const int num_underflow_bins = static_cast<int>(!axes[dim]->CanGrow());
    const int num_regular_bins_before =
    std::max(virtual_bins[dim] - num_underflow_bins, 0);
    total_regular_bins_before += num_regular_bins_before * bin_sizes[dim];

    // If we are on an overflow or underflow bin on this axis, we know that
    // we don't need to look at the remaining axes. Projecting on these
    // other axes would only take us into an hyperplane of over/underflow
    // bins for this axis, where there are no regular bins to account for.
    // we don't need to look at the remaining axes. Projecting on the
    // remaining dimensions would only take us into an hyperplane of
    // over/underflow bins for the current axis, and we know that by
    // construction there will be no regular bins in there.
    if (bins[dim] < 1) break;
    }

    // At this point, we know how many regular bins would be crossed by
    // enumerating all bins before the current one in row-major order, so we can
    // emit a corrected overflow bin axis that doesn't account for regular bins.
    return neg_1based_virtual_bin + num_regular_bins_before;
    return neg_1based_virtual_bin + total_regular_bins_before;
    }


    @@ -374,17 +386,17 @@ std::array<int, NDIMS> ComputeLocalBins(
    // one slides along a histogram axis. Using a 2D binning case as a simple
    // motivating example...
    //
    // -1 -2 -3 -4 <- No regular bins on the first bin of axis 1
    // -5 1 2 -6 <- Some of them on middle bins of axis 1
    // -1 -2 -3 -4 <- No regular bins on the underflow line of axis 1
    // -5 1 2 -6 <- Some of them on middle lines of axis 1
    // -7 3 4 -8
    // -9 -10 -11 -12 <- No regular bins on the last bin of axis 1
    // -9 -10 -11 -12 <- No regular bins on the overflow line of axis 1
    //
    // As we go to higher dimensions, the geometry becomes more complex, but
    // if we replace "line" with "plane", we get a similar picture in 3D as we
    // if we replace "line" with "plane", we get a similar picture in 3D when we
    // slide along axis 2:
    //
    // No regular bins on the Some of them on the No regular bins again
    // first plane of axis 2 middle planes of ax.2 on last plane of ax.2
    // No regular bins on the Some of them on the No regular bins again
    // UF plane of axis 2 regular planes of ax.2 on the OF plane of ax.2
    //
    // -1 -2 -3 -4 -17 -18 -19 -20 -29 -30 -31 -32
    // -5 -6 -7 -8 -21 1 2 -22 -33 -34 -35 -36
    @@ -400,18 +412,19 @@ std::array<int, NDIMS> ComputeLocalBins(
    // regular bins before the overflow bin we're currently looking at:
    //
    // - Start by processing the last histogram axis.
    // - Ignore the first and last hyperplane of overflow bins on this axis.
    // - Ignore the first and last hyperplane on this axis, which only contain
    // underflow and overflow bins respectively.
    // - Count how many complete hyperplanes of regular bins lie before us on
    // this axis, which we can do indirectly in our overflow bin based
    // reasoning by computing the perimeter of the regular region and dividing
    // our overflow bin count by that amount.
    // - Now we counted complete hyperplanes on the last histogram axis, but
    // we need to process the last incomplete hyperplane, if any.
    // our "regular" overflow bin count by that amount.
    // - Now we counted previous hyperplanes on this last histogram axis, but
    // we need to process the hyperplane that our bin is located in, if any.
    // * For this, we reduce our overflow bin count to a count of
    // _unaccounted_ overflow bins in the current hyperplane...
    // * ...which allows us to recursively continue the computation by
    // processing the next (well, previous) histogram axis in the context
    // of this hyperplane, as done above.
    // of this hyperplane, in the same manner as above.
    //
    // Alright, now that the general plan is sorted out, let's compute some
    // quantities that we are going to need, namely the total number of bins per
    @@ -429,46 +442,74 @@ std::array<int, NDIMS> ComputeLocalBins(
    bins_per_hyperplane[dim] = curr_bins_per_hyperplane;
    }

    // And now, starting from the last axis...
    // Given that, and starting from the last axis...
    int unprocessed_previous_overflow_bins = corrected_virtual_overflow_bin;
    int num_regular_bins_before = 0;
    for (std::size_t dim = NDIMS-1; dim > 0; --dim) {
    // From the total number of bins per hyperplane and the number of
    // Let's start by computing the contribution of the underflow
    // hyperplane, in which we know there will be no regular bins
    const int num_underflow_hyperplanes =
    static_cast<int>(!axes[dim]->CanGrow());
    const int bins_in_underflow_hyperplane =
    num_underflow_hyperplanes * bins_per_hyperplane[dim-1];

    // Next, from the total number of bins per hyperplane and the number of
    // regular bins per hyperplane that has them, we deduce the number of
    // overflow bins per hyperplane that has regular bins.
    const int overflow_bins_per_hyperplane =
    const int overflow_bins_per_regular_hyperplane =
    bins_per_hyperplane[dim-1] - regular_bins_per_hyperplane[dim-1];

    // From this and the count of overflow bins before us, we deduce how
    // many overflow bins before the current bin do not lie on this axis'
    // underflow and overflow hyperplanes.
    const int of_bins_wo_overflow_hyperplanes =
    std::clamp(
    unprocessed_previous_overflow_bins - bins_per_hyperplane[dim-1],
    0,
    overflow_bins_per_hyperplane * axes[dim]->GetNBinsNoOver()
    );

    // From this quantity, in turn, we can deduce how many _full_
    // hyperplanes worth of regular bins are located before the current bin.
    if (overflow_bins_per_hyperplane != 0) {
    // Let's assume that there _are_ overflow bins...
    // This allows us to answer a key question: are there any under/overflow
    // bins on the hyperplanes that have regular bins? It may not be the
    // case if some axes are growable, and thus don't have overflow bins.
    if (overflow_bins_per_regular_hyperplane != 0) {
    // If so, we start by cutting off the contribution of the underflow
    // and overflow hyperplanes, to focus specifically on those bins...
    const int overflow_bins_in_regular_hyperplanes =
    std::clamp(
    unprocessed_previous_overflow_bins
    - bins_in_underflow_hyperplane,
    0,
    overflow_bins_per_regular_hyperplane
    * axes[dim]->GetNBinsNoOver()
    );

    // We count how many _complete_ "regular" hyperplanes that leaves
    // before us, and account for those in our regular bin count.
    const int num_regular_hyperplanes_before =
    of_bins_wo_overflow_hyperplanes / overflow_bins_per_hyperplane;
    overflow_bins_in_regular_hyperplanes
    / overflow_bins_per_regular_hyperplane;
    num_regular_bins_before +=
    num_regular_hyperplanes_before * regular_bins_per_hyperplane[dim-1];
    num_regular_hyperplanes_before
    * regular_bins_per_hyperplane[dim-1];

    // This only leaves the _current_ hyperplane as a possible source of
    // more regular bins that we haven't accounted for yet. We'll take those
    // into account while processing previous dimensions.
    unprocessed_previous_overflow_bins
    = of_bins_wo_overflow_hyperplanes % overflow_bins_per_hyperplane;
    // more regular bins that we haven't accounted for yet. We'll take
    // those into account while processing previous dimensions.
    unprocessed_previous_overflow_bins =
    overflow_bins_in_regular_hyperplanes
    % overflow_bins_per_regular_hyperplane;
    } else {
    // If there are no overflow bins per hyperplane in the lower
    // hyperplane, then the previous computation should have found none
    ASSERT(of_bins_wo_overflow_hyperplanes == 0,
    "Logic error detected in the global -> local bin algorithm");
    // If there are no overflow bins in regular hyperplane, then the
    // rule changes: observing _one_ overflow bin after the underflow
    // hyperplane means that _all_ regular hyperplanes on this axis are
    // already before us.
    if (unprocessed_previous_overflow_bins >= bins_in_underflow_hyperplane) {
    num_regular_bins_before +=
    axes[dim]->GetNBinsNoOver()
    * regular_bins_per_hyperplane[dim-1];
    }

    // In this case, we're done, because the current bin may only lie
    // in the underflow or underflow hyperplane of this axis. Which
    // means that there are no further regular bins to be accounted for
    // in the current hyperplane.
    unprocessed_previous_overflow_bins = 0;
    }

    // No need to continue this loop if we've taken into account all
    // overflow bins that were associated with regular bins.
    if (unprocessed_previous_overflow_bins == 0) break;
    }

    // By the time we reach the first axis, there should only be at most one
    @@ -477,27 +518,27 @@ std::array<int, NDIMS> ComputeLocalBins(
    // -1 1 2 3 -2
    // ^ ^
    // | |
    // | Option 2: one overflow bin before
    // | Option 2: one overflow bin before us
    // |
    // Option 1: no overflow bin before
    // Option 1: no overflow bin before us
    //
    ASSERT(unprocessed_previous_overflow_bins <= 1,
    "Logic error detected in the global -> local bin algorithm");
    num_regular_bins_before +=
    unprocessed_previous_overflow_bins * axes[0]->GetNBinsNoOver();

    // Now that we know the number of regular bins before us, we can get add
    // this to the overflow bin index to get a global zero-based bin index
    // accounting for both under/overflow bins and regular bins, just like we
    // had in the ComputeGlobalBin() implementation...
    // Now that we know the number of regular bins before us, we can add this to
    // to the zero-based overflow bin index that we started with to get a global
    // zero-based bin index accounting for both under/overflow bins and regular
    // bins, just like what we had in the ComputeGlobalBin() implementation.
    const int global_virtual_bin =
    corrected_virtual_overflow_bin + num_regular_bins_before;

    // ...then we can go back to per-axis zero-based "virtual" bin indices...
    // We can then easily go back to zero-based "virtual" bin indices...
    const std::array<int, NDIMS> virtual_bins =
    ComputeLocalBinsRaw(global_virtual_bin, GetNBins(axes));

    // And finally, return to the -1/-2 overflow bin indexing convention.
    // ...and from that go back to the -1/-2 overflow bin indexing convention.
    return ZipAxisMap<int>(virtual_bins, axes, VirtualBinToLocalBin);
    }

    @@ -507,13 +548,9 @@ std::array<int, NDIMS> ComputeLocalBins(

    int main() {
    // Set up some test axes
    //
    // FIXME: Doesn't work yet when one axis can grow, and thus does not have
    // overflow and underflow bins. Some code somewhere must assume that
    // those bins always exist...
    const EqAxisMock eq_axis_0(3.0, 6.0, 3, false);
    const EqAxisMock eq_axis_0(3.0, 6.0, 3, true);
    const EqAxisMock eq_axis_1(7.0, 9.5, 5, false);
    const EqAxisMock eq_axis_2(-2., -1., 2, false);
    const EqAxisMock eq_axis_2(-2., -1., 2, true);

    // Should I print out GetBinFrom and GetBinTo values?
    const bool verbose = true;
  6. HadrienG2 revised this gist Mar 30, 2020. 1 changed file with 499 additions and 130 deletions.
    629 changes: 499 additions & 130 deletions demo.cpp
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,5 @@
    #include <algorithm>
    #include <array>
    #include <exception>
    #include <iostream>
    #include <utility>
    @@ -9,6 +10,7 @@


    struct IAxis {
    virtual bool CanGrow() const = 0;
    virtual int GetNBinsNoOver() const = 0;
    virtual int GetNOverflowBins() const = 0;
    int GetNBins() const {
    @@ -23,23 +25,27 @@ struct IAxis {
    class EqAxisMock : public IAxis
    {
    public:
    EqAxisMock(double from, double to, int nBinsNoOver) :
    m_from(from), m_to(to), m_nBinsNoOver(nBinsNoOver)
    EqAxisMock(double from, double to, int nBinsNoOver, bool canGrow) :
    m_from(from), m_to(to), m_nBinsNoOver(nBinsNoOver), m_canGrow(canGrow)
    {}

    bool CanGrow() const final override {
    return m_canGrow;
    }

    int GetNBinsNoOver() const final override {
    return m_nBinsNoOver;
    }

    int GetNOverflowBins() const final override {
    return 2;
    return m_canGrow ? 0 : 2;
    }

    int FindBin(double x) const final override {
    if (x < m_from) {
    return -1;
    return m_canGrow ? 0 : -1;
    } else if (x >= m_to) {
    return -2;
    return m_canGrow ? 0 : -2;
    } else {
    return (x - m_from) / GetBinWidth() + 1;
    }
    @@ -48,32 +54,43 @@ class EqAxisMock : public IAxis
    double GetBinFrom(int bin) const final override {
    switch (bin) {
    case -1:
    ASSERT(!m_canGrow,
    "Growable axes don't have an underflow bin");
    return -std::numeric_limits<double>::infinity();
    case -2:
    ASSERT(!m_canGrow,
    "Growable axes don't have an overflow bin");
    return m_to;
    default:
    ASSERT((bin >= 1) || (bin <= GetNBinsNoOver()),
    "GetBinFrom got an out-of-bounds bin index");
    ASSERT(bin != 0, "0 is not a valid local bin index");
    ASSERT(bin <= GetNBinsNoOver(),
    "Local bin index is out of axis range");
    return m_from + (bin - 1) * GetBinWidth();
    }
    }

    double GetBinTo(int bin) const final override {
    switch (bin) {
    case -1:
    ASSERT(!m_canGrow,
    "Growable axes don't have an underflow bin");
    return m_from;
    case -2:
    ASSERT(!m_canGrow,
    "Growable axes don't have an overflow bin");
    return std::numeric_limits<double>::infinity();
    default:
    ASSERT((bin >= 1) || (bin <= GetNBinsNoOver()),
    "GetBinTo got an out-of-bounds bin index");
    ASSERT(bin != 0, "0 is not a valid local bin index");
    ASSERT(bin <= GetNBinsNoOver(),
    "Local bin index is out of axis range");
    return m_from + bin * GetBinWidth();
    }
    }

    private:
    double m_from, m_to;
    int m_nBinsNoOver;
    bool m_canGrow;

    // NOTE: Specific to equidistant axis, so general algorithm shouldn't use it
    double GetBinWidth() const {
    @@ -82,175 +99,527 @@ class EqAxisMock : public IAxis
    };


    // TODO: Generalize the algorithm to N-d: make it take a vector of (IAxis, int)
    // pairs and implement it using a loop.
    int ComputeGlobalBin(const IAxis& axis_0, int bin_0,
    const IAxis& axis_1, int bin_1) {
    // ---


    // Apply a function to each element of an array and return the array of results
    template <typename Out, typename In, std::size_t NDIMS, typename UnaryFunction>
    std::array<Out, NDIMS> Map(const std::array<In, NDIMS>& inputs,
    UnaryFunction&& f) {
    std::array<Out, NDIMS> outputs;
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    outputs[dim] = f(inputs[dim]);
    }
    return outputs;
    }


    // Shorthand for calling GetNBins on an array of axes
    template <std::size_t NDIMS>
    std::array<int, NDIMS> GetNBins(const std::array<const IAxis*, NDIMS>& axes) {
    return Map<int>(axes,
    [](const IAxis* axis) { return axis->GetNBins(); });
    }


    // Shorthand for calling GetNBinsNoOver on an array of axes
    template <std::size_t NDIMS>
    std::array<int, NDIMS> GetNBinsNoOver(const std::array<const IAxis*, NDIMS>& axes) {
    return Map<int>(axes,
    [](const IAxis* axis) { return axis->GetNBinsNoOver(); });
    }


    // Given an array of inputs and an array of axis pointers of the same size,
    // apply a binary function to each (input, axis) pair and return the results.
    //
    // A reader experienced with functional programming may recognize a Zip->Map
    // graph and wonder why I'm not just implementing Zip. The answer is that C++'s
    // poor type inference, limited type system vocabulary and ultra-verbose lambda
    // syntax make any nontrivial combination of functional programming constructs
    // look like an unreadable mess.
    //
    // Specialized combinators stress the broken parts of C++ less, and are thus
    // preferrable over their general counterparts unless you need the generality.
    //
    template <typename Out, typename In, std::size_t NDIMS, typename BinaryFunction>
    std::array<Out, NDIMS> ZipAxisMap(const std::array<In, NDIMS>& inputs,
    const std::array<const IAxis*, NDIMS>& axes,
    BinaryFunction&& f) {
    std::array<Out, NDIMS> outputs;
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    outputs[dim] = f(inputs[dim], *axes[dim]);
    }
    return outputs;
    }


    // Shorthand for calling GetBinFrom from an array of axes to an array of bins
    template <std::size_t NDIMS>
    std::array<double, NDIMS> GetBinFrom(const std::array<int, NDIMS>& bins,
    const std::array<const IAxis*, NDIMS>& axes) {
    return ZipAxisMap<double>(
    bins,
    axes,
    [](int bin, const IAxis& axis) { return axis.GetBinFrom(bin); }
    );
    }


    // Shorthand for calling GetBinTo from an array of axes to an array of bins
    template <std::size_t NDIMS>
    std::array<double, NDIMS> GetBinTo(const std::array<int, NDIMS>& bins,
    const std::array<const IAxis*, NDIMS>& axes) {
    return ZipAxisMap<double>(
    bins,
    axes,
    [](int bin, const IAxis& axis) { return axis.GetBinTo(bin); }
    );
    }


    // Compute a zero-based global bin index given...
    //
    // - A set of zero-based per-axis bin indices
    // - The number of considered bins on each axis (can be either GetNBinsNoOver
    // or GetNBins depending on what you are trying to do)
    // - A policy of treating all bins are regular (i.e. no negative indices)
    //
    template <std::size_t NDIMS>
    int ComputeGlobalBinRaw(const std::array<int, NDIMS>& zero_based_bins,
    const std::array<int, NDIMS>& axis_bin_counts) {
    int result = 0;
    int bin_size = 1;
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    ASSERT(zero_based_bins[dim] >= 0,
    "Expected a zero-based local bin index, got a negative one");
    ASSERT(axis_bin_counts[dim] >= 0,
    "Expected a positive bin count, got a negative one");
    ASSERT(zero_based_bins[dim] < axis_bin_counts[dim],
    "Input local bin index is outside of input axis range");
    result += zero_based_bins[dim] * bin_size;
    bin_size *= axis_bin_counts[dim];
    }
    return result;
    }


    // Compute zero-based local bin indices given...
    //
    // - A zero-based global bin index
    // - The number of considered bins on each axis (can be either GetNBinsNoOver
    // or GetNBins depending on what you are trying to do)
    // - A policy of treating all bins are regular (i.e. no negative indices)
    //
    template <std::size_t NDIMS>
    std::array<int, NDIMS> ComputeLocalBinsRaw(
    int global_bin,
    const std::array<int, NDIMS>& axis_bin_counts
    ) {
    ASSERT(global_bin >= 0,
    "Expected a zero-based global bin index, got a negative one");
    std::array<int, NDIMS> result;
    for (std::size_t dim = 0; dim < NDIMS; ++dim) {
    ASSERT(axis_bin_counts[dim] >= 0,
    "Expected a positive bin count, got a negative one");
    result[dim] = global_bin % axis_bin_counts[dim];
    global_bin /= axis_bin_counts[dim];
    }
    ASSERT(global_bin == 0, "Input global axis is out of range");
    return result;
    }


    // Convert a local axis bins from the standard -1/-2 under/overflow bin indexing
    // convention to a "virtual bin" convention where the underflow bin has index 0
    // and the overflow bin has index N+1 where N is the axis' regular bin count.
    //
    // For growable axes, subtract 1 to index to remain zero-based.
    //
    int LocalBinToVirtualBin(int bin, const IAxis& axis) {
    switch (bin) {
    case -1:
    return 0;
    case -2:
    return axis.GetNBins() - 1;
    default:
    ASSERT(bin != 0, "0 is not a valid local bin index");
    ASSERT(bin <= axis.GetNBinsNoOver(),
    "Input local bin index is out of axis range");
    return bin - static_cast<int>(axis.CanGrow());
    }
    }


    // Convert back from zero-based virtual bins to the standard under/overflow bin
    // indexing convention.
    //
    // For growable axes, add 1 to index to become one-based.
    //
    int VirtualBinToLocalBin(int virtual_bin, const IAxis& axis) {
    if (virtual_bin == 0) {
    return -1;
    } else if (virtual_bin == (axis.GetNBins() - 1)) {
    return -2;
    } else {
    ASSERT(virtual_bin >= 1,
    "Expected a zero-based virtual bin index, got a negative one");
    ASSERT(virtual_bin <= axis.GetNBinsNoOver(),
    "Input virtual bin index is out of axis range");
    return virtual_bin + static_cast<int>(axis.CanGrow());
    }
    }


    // Compute the global index of a certain bin on an N-dimensional histogram,
    // knowing the local bin indices as returned by calling FindBin on each axis.
    template <std::size_t NDIMS>
    int ComputeGlobalBin(const std::array<int, NDIMS>& bins,
    const std::array<const IAxis*, NDIMS>& axes) {
    // Get regular bins out of the way
    if ((bin_0 >= 1) && (bin_1 >= 1)) {
    return bin_0 + (bin_1 - 1) * axis_0.GetNBinsNoOver();
    if (std::all_of(bins.cbegin(), bins.cend(),
    [](int bin) { return bin >= 1; }))
    {
    return ComputeGlobalBinRaw(
    Map<int>(bins, [](int bin) { return bin - 1; }),
    GetNBinsNoOver(axes)
    ) + 1;
    }

    // Convert bin indices to another coordinate system where the underflow bin
    // has coordinate 0, regular bins have coordinates [1, N], and the overflow
    // bin has coordinate N+1, where N is GetNBinsNoOver().
    auto compute_virtual_bin = [](const IAxis& axis, int bin) {
    switch (bin) {
    case -1:
    return 0;
    case -2:
    return axis.GetNBins() - 1;
    default:
    ASSERT((bin >= 1) || (bin <= axis.GetNBinsNoOver()),
    "Received an invalid local bin index as input");
    return bin;
    }
    };
    const int virtual_bin_0 = compute_virtual_bin(axis_0, bin_0);
    ASSERT((virtual_bin_0 >= 0) || (virtual_bin_0 < axis_0.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 0");
    const int virtual_bin_1 = compute_virtual_bin(axis_1, bin_1);
    ASSERT((virtual_bin_1 >= 0) || (virtual_bin_1 < axis_1.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 1");

    // Deduce what the global bin index would be in this coordinate system
    const int global_virtual_bin = virtual_bin_0 + virtual_bin_1 * axis_0.GetNBins();
    ASSERT((global_virtual_bin >= 0) || (global_virtual_bin < axis_0.GetNBins() * axis_1.GetNBins()),
    "Computed global virtual bin index is out of expected range");

    // Move to negative and 1-based indexing
    std::array<int, NDIMS> virtual_bins =
    ZipAxisMap<int>(bins, axes, LocalBinToVirtualBin);

    // Deduce what the global bin index would be in this coordinate system that
    // unifies regular and overflow bins.
    const int global_virtual_bin = ComputeGlobalBinRaw(virtual_bins,
    GetNBins(axes));

    // Move to 1-based and negative indexing
    const int neg_1based_virtual_bin = -global_virtual_bin - 1;

    // Now we have the right index for the first overflow bins, but the indices
    // are wrong after crossing "regular" bins, because the count of overflow
    // bins takes these into account when it shouldn't.
    // are wrong after crossing "regular" bins, because the global bin counter
    // takes regular bins into account when it should only consider overflow.
    //
    // To fix this, we need to know how many regular bins exist before the
    // current bin (in row-major order), and increment the virtual bin index by
    // this amount. This will un-do the counting of regular bins as overflow.
    // this amount. This will un-do the counting of regular bins, thus giving
    // us a count of overflow bins.
    //
    int num_regular_bins_before = 0;

    // Rows of regular bins can be taken into account with just a clamp trick
    const int num_rows_before = std::clamp(virtual_bin_1 - 1, 0, axis_1.GetNBinsNoOver());
    num_regular_bins_before += num_rows_before * axis_0.GetNBinsNoOver();
    // First, we need to know how many regular bins we leave behind us for each
    // step on each axis, assuming the bin from which we come was regular.
    //
    // If mathematically inclined, you can think of this as the size of an
    // hyperplane of regular bins when projecting on lower-numbered dimensions.
    // See the docs of ComputeLocalBins for more on this worldview.
    //
    std::array<int, NDIMS> bin_sizes;
    bin_sizes[0] = 1;
    for (std::size_t dim = 1; dim < NDIMS; ++dim) {
    bin_sizes[dim] = bin_sizes[dim-1] * axes[dim-1]->GetNBinsNoOver();
    }

    // For columns, however, we need to know whether we are on a regular row
    // or an under/overflow row.
    if (bin_1 >= 1) {
    const int num_cols_before = std::clamp(virtual_bin_0 - 1, 0, axis_1.GetNBinsNoOver());
    num_regular_bins_before += num_cols_before;
    // Then, starting from the _last_ dimension...
    for (int dim = NDIMS-1; dim >= 0; --dim) {
    // We can tell how many regular bins lie before us, when accounting for
    // previous hyperplanes on this axis.
    const int num_bins_before = std::max(virtual_bins[dim] - 1, 0);
    num_regular_bins_before += num_bins_before * bin_sizes[dim];

    // If we are on an overflow or underflow bin on this axis, we know that
    // we don't need to look at the remaining axes. Projecting on these
    // other axes would only take us into an hyperplane of over/underflow
    // bins for this axis, where there are no regular bins to account for.
    if (bins[dim] < 1) break;
    }

    // And with this correction, we should be good
    // At this point, we know how many regular bins would be crossed by
    // enumerating all bins before the current one in row-major order, so we can
    // emit a corrected overflow bin axis that doesn't account for regular bins.
    return neg_1based_virtual_bin + num_regular_bins_before;
    }


    // TODO: Generalize the algorithm to N-d: make it take a vector of IAxis as
    // input, return a vector of Int as output, and implement it using a loop.
    std::pair<int, int> ComputeLocalBins(int global_bin, const IAxis& axis_0, const IAxis& axis_1) {
    // Given a global histogram bin index as generated by ComputeGlobalBin above,
    // go back to per-axis "local" bin coordinates.
    template<std::size_t NDIMS>
    std::array<int, NDIMS> ComputeLocalBins(
    int global_bin,
    const std::array<const IAxis*, NDIMS> axes
    ) {
    // Get regular bins out of the way
    if (global_bin >= 1) {
    const int bin_0 = ((global_bin - 1) % axis_0.GetNBinsNoOver()) + 1;
    const int bin_1 =
    (((global_bin - 1) - (bin_0 - 1)) / axis_0.GetNBinsNoOver()) + 1;
    return std::make_pair(bin_0, bin_1);
    return Map<int>(
    ComputeLocalBinsRaw(global_bin - 1, GetNBinsNoOver(axes)),
    [](int bin) { return bin + 1; }
    );
    }

    // Convert our negative index to something positive and 0-based, as that is
    // more convenient to work with. Note, however, that this is _not_
    // equivalent to the virtual_bin that we had before, because what we have
    // here is a count of overflow bins, not of all bins.
    // here is a count of overflow bins, not of all bins...
    ASSERT(global_bin != 0, "Received an invalid global bin index as input");
    const int corrected_virtual_bin = -global_bin - 1;

    // This overflow bin count, accounts for...
    // - At most one full row of "top" underflow bins, which are the axis 0 bins
    // associated with the axis 1 underflow bin.
    // - Any number of axis 0 (underflow, overflow) bin pairs, with regular
    // bins in the middle.
    // - Possibly one trailing axis 0 underflow bin, if we are on the overflow
    // bin of such a pair.
    // - At most one full row of "bottom" overflow bins, which are the axis 0
    // bins associated with the axis 1 overflow bin.
    const int corrected_virtual_overflow_bin = -global_bin - 1;

    // ...so we need to retrieve and bring back the regular bin count, and this
    // is where the fun begins.
    //
    // The main difficulty is that the number of regular bins is not fixed as
    // one slides along a histogram axis. Using a 2D binning case as a simple
    // motivating example...
    //
    // We can suppress the contribution of the top and bottom row of
    // under/overflow bins like so:
    const int middle_overflow_bins =
    std::clamp(corrected_virtual_bin - axis_0.GetNBins(),
    0,
    2 * axis_1.GetNBinsNoOver());

    // Then, we can deduce the number of rows of regular bins that we must add
    // back into the index, taking into account the fact that we must also add
    // a row if there is only one underflow bin before us (i.e. we are the
    // overflow bin of a row), like so:
    const int middle_regular_bins =
    ((middle_overflow_bins + 1) / 2) * axis_0.GetNBinsNoOver();

    // With this regular bin count, we can recover the same kind of virtual bin
    // index that we had in ComputeGlobalBin()...
    const int global_virtual_bin = corrected_virtual_bin + middle_regular_bins;
    ASSERT((global_virtual_bin >= 0) || (global_virtual_bin < axis_0.GetNBins() * axis_1.GetNBins()),
    "Computed global virtual bin index is out of expected range");

    // ...then from that we can deduce "virtual" local bin indices on each axis
    const int virtual_bin_1 = (global_virtual_bin / axis_0.GetNBins());
    ASSERT((virtual_bin_1 >= 0) || (virtual_bin_1 < axis_1.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 1");
    const int virtual_bin_0 = (global_virtual_bin % axis_0.GetNBins());
    ASSERT((virtual_bin_0 >= 0) || (virtual_bin_0 < axis_0.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 0");

    // And from that point, we can go back to the -1/-2 overflow convention
    auto compute_bin = [](const IAxis& axis, int virtual_bin) -> int {
    if (virtual_bin == 0) {
    return -1;
    } else if (virtual_bin == (axis.GetNBins() - 1)) {
    return -2;
    // -1 -2 -3 -4 <- No regular bins on the first bin of axis 1
    // -5 1 2 -6 <- Some of them on middle bins of axis 1
    // -7 3 4 -8
    // -9 -10 -11 -12 <- No regular bins on the last bin of axis 1
    //
    // As we go to higher dimensions, the geometry becomes more complex, but
    // if we replace "line" with "plane", we get a similar picture in 3D as we
    // slide along axis 2:
    //
    // No regular bins on the Some of them on the No regular bins again
    // first plane of axis 2 middle planes of ax.2 on last plane of ax.2
    //
    // -1 -2 -3 -4 -17 -18 -19 -20 -29 -30 -31 -32
    // -5 -6 -7 -8 -21 1 2 -22 -33 -34 -35 -36
    // -9 -10 -11 -12 -23 3 4 -24 -37 -37 -39 -40
    // -13 -14 -15 -16 -25 -26 -27 -28 -41 -42 -43 -44
    //
    // We can generalize this to N dimensions by saying that as we slide along
    // the last axis of an N-d histogram, we see an hyperplane full of overflow
    // bins, then some hyperplanes with regular bins in the "middle" surrounded
    // by overflow bins, then a last hyperplane full of overflow bins.
    //
    // From this, we can devise a recursive algorithm to recover the number of
    // regular bins before the overflow bin we're currently looking at:
    //
    // - Start by processing the last histogram axis.
    // - Ignore the first and last hyperplane of overflow bins on this axis.
    // - Count how many complete hyperplanes of regular bins lie before us on
    // this axis, which we can do indirectly in our overflow bin based
    // reasoning by computing the perimeter of the regular region and dividing
    // our overflow bin count by that amount.
    // - Now we counted complete hyperplanes on the last histogram axis, but
    // we need to process the last incomplete hyperplane, if any.
    // * For this, we reduce our overflow bin count to a count of
    // _unaccounted_ overflow bins in the current hyperplane...
    // * ...which allows us to recursively continue the computation by
    // processing the next (well, previous) histogram axis in the context
    // of this hyperplane, as done above.
    //
    // Alright, now that the general plan is sorted out, let's compute some
    // quantities that we are going to need, namely the total number of bins per
    // hyperplane (overflow and regular) and the number of regular bins per
    // hyperplane on the hyperplanes that have them.
    //
    std::array<int, NDIMS-1> bins_per_hyperplane;
    std::array<int, NDIMS-1> regular_bins_per_hyperplane;
    int curr_bins_per_hyperplane = 1;
    int curr_regular_bins_per_hyperplane = 1;
    for (std::size_t dim = 0; dim < NDIMS-1; ++dim) {
    curr_regular_bins_per_hyperplane *= axes[dim]->GetNBinsNoOver();
    regular_bins_per_hyperplane[dim] = curr_regular_bins_per_hyperplane;
    curr_bins_per_hyperplane *= axes[dim]->GetNBins();
    bins_per_hyperplane[dim] = curr_bins_per_hyperplane;
    }

    // And now, starting from the last axis...
    int unprocessed_previous_overflow_bins = corrected_virtual_overflow_bin;
    int num_regular_bins_before = 0;
    for (std::size_t dim = NDIMS-1; dim > 0; --dim) {
    // From the total number of bins per hyperplane and the number of
    // regular bins per hyperplane that has them, we deduce the number of
    // overflow bins per hyperplane that has regular bins.
    const int overflow_bins_per_hyperplane =
    bins_per_hyperplane[dim-1] - regular_bins_per_hyperplane[dim-1];

    // From this and the count of overflow bins before us, we deduce how
    // many overflow bins before the current bin do not lie on this axis'
    // underflow and overflow hyperplanes.
    const int of_bins_wo_overflow_hyperplanes =
    std::clamp(
    unprocessed_previous_overflow_bins - bins_per_hyperplane[dim-1],
    0,
    overflow_bins_per_hyperplane * axes[dim]->GetNBinsNoOver()
    );

    // From this quantity, in turn, we can deduce how many _full_
    // hyperplanes worth of regular bins are located before the current bin.
    if (overflow_bins_per_hyperplane != 0) {
    // Let's assume that there _are_ overflow bins...
    const int num_regular_hyperplanes_before =
    of_bins_wo_overflow_hyperplanes / overflow_bins_per_hyperplane;
    num_regular_bins_before +=
    num_regular_hyperplanes_before * regular_bins_per_hyperplane[dim-1];

    // This only leaves the _current_ hyperplane as a possible source of
    // more regular bins that we haven't accounted for yet. We'll take those
    // into account while processing previous dimensions.
    unprocessed_previous_overflow_bins
    = of_bins_wo_overflow_hyperplanes % overflow_bins_per_hyperplane;
    } else {
    return virtual_bin;
    // If there are no overflow bins per hyperplane in the lower
    // hyperplane, then the previous computation should have found none
    ASSERT(of_bins_wo_overflow_hyperplanes == 0,
    "Logic error detected in the global -> local bin algorithm");
    }
    };
    return std::make_pair(compute_bin(axis_0, virtual_bin_0),
    compute_bin(axis_1, virtual_bin_1));
    }

    // By the time we reach the first axis, there should only be at most one
    // full row of regular bins before us:
    //
    // -1 1 2 3 -2
    // ^ ^
    // | |
    // | Option 2: one overflow bin before
    // |
    // Option 1: no overflow bin before
    //
    ASSERT(unprocessed_previous_overflow_bins <= 1,
    "Logic error detected in the global -> local bin algorithm");
    num_regular_bins_before +=
    unprocessed_previous_overflow_bins * axes[0]->GetNBinsNoOver();

    // Now that we know the number of regular bins before us, we can get add
    // this to the overflow bin index to get a global zero-based bin index
    // accounting for both under/overflow bins and regular bins, just like we
    // had in the ComputeGlobalBin() implementation...
    const int global_virtual_bin =
    corrected_virtual_overflow_bin + num_regular_bins_before;

    // ...then we can go back to per-axis zero-based "virtual" bin indices...
    const std::array<int, NDIMS> virtual_bins =
    ComputeLocalBinsRaw(global_virtual_bin, GetNBins(axes));

    // And finally, return to the -1/-2 overflow bin indexing convention.
    return ZipAxisMap<int>(virtual_bins, axes, VirtualBinToLocalBin);
    }


    // ---


    int main() {
    // Set up a pair of test axes
    EqAxisMock axis_0(3.0, 6.0, 3);
    EqAxisMock axis_1(7.0, 9.5, 5);
    // Set up some test axes
    //
    // FIXME: Doesn't work yet when one axis can grow, and thus does not have
    // overflow and underflow bins. Some code somewhere must assume that
    // those bins always exist...
    const EqAxisMock eq_axis_0(3.0, 6.0, 3, false);
    const EqAxisMock eq_axis_1(7.0, 9.5, 5, false);
    const EqAxisMock eq_axis_2(-2., -1., 2, false);

    // Should I print out GetBinFrom and GetBinTo values?
    const bool verbose = true;

    // For the purpose of testing the generality of the code, only access these
    // axes through a minimal virtual interface
    const IAxis& axis_0 = eq_axis_0;
    const IAxis& axis_1 = eq_axis_1;
    const IAxis& axis_2 = eq_axis_2;

    // Enumerate all the bins
    auto enumerate_bins = [](const IAxis& axis) -> std::vector<int> {
    const auto enumerate_bins = [](const IAxis& axis) -> std::vector<int> {
    std::vector<int> all_bins;
    all_bins.push_back(-1);
    if (!axis.CanGrow()) all_bins.push_back(-1);
    for (int bin = 1; bin <= axis.GetNBinsNoOver(); ++bin) {
    all_bins.push_back(bin);
    }
    all_bins.push_back(-2);
    if (!axis.CanGrow()) all_bins.push_back(-2);
    return all_bins;
    };
    std::vector<int> all_bins_0 = enumerate_bins(axis_0);
    std::vector<int> all_bins_1 = enumerate_bins(axis_1);
    const std::vector<int> all_bins_0 = enumerate_bins(axis_0);
    const std::vector<int> all_bins_1 = enumerate_bins(axis_1);
    const std::vector<int> all_bins_2 = enumerate_bins(axis_2);

    // We'll test for the 1D, 2D and 3D cases, with these common parts:
    int last_regular_bin;
    int last_overflow_bin;
    auto test_setup = [&] {
    last_regular_bin = 0;
    last_overflow_bin = 0;
    };
    const auto print_array = [](const auto& array) {
    std::cout << '(';
    for (std::size_t dim = 0; dim < array.size() - 1; ++dim) {
    std::cout << array[dim] << ", ";
    }
    std::cout << array.back() << ')';
    };
    auto test_iteration = [&](const auto& bins, const auto& axes) {
    std::cout << "On true bin ";
    print_array(bins);
    std::cout << ": ";

    int global_bin = ComputeGlobalBin(bins, axes);
    auto computed_bins = ComputeLocalBins(global_bin, axes);
    std::cout << "Global bin " << global_bin << " aka ";
    print_array(computed_bins);

    // Once you have those, you can do everything including global
    // FindBin (= local FindBin + local-to-global idx conversion) or
    // global bin boundary computations (= global-to-local idx
    // conversion + local bin boundary computations)
    if (verbose) {
    std::cout << " goes from ";
    print_array(GetBinFrom(bins, axes));
    std::cout << " to ";
    print_array(GetBinTo(bins, axes));
    }
    std::cout << std::endl;

    // Check correctness of the results
    if (std::all_of(bins.cbegin(), bins.cend(),
    [](int bin) { return bin >= 1; }))
    {
    ASSERT(global_bin == last_regular_bin + 1,
    "Computed global regular bin index is incorrect");
    last_regular_bin = global_bin;
    } else {
    ASSERT(global_bin == last_overflow_bin - 1,
    "Computed global overflow bin index is incorrect");
    last_overflow_bin = global_bin;
    }
    ASSERT(computed_bins == bins,
    "Computed local bin indices are incorrect");
    };

    // Display the bins boundaries
    // Test the 1D case
    std::cout << "1D case:" << std::endl;
    test_setup();
    for (const int bin_0: all_bins_0) {
    test_iteration(std::array{bin_0}, std::array{&axis_0});
    }

    // Test the 2D case
    std::cout << std::endl << "2D case:" << std::endl << "---" << std::endl;
    test_setup();
    for (const int bin_1: all_bins_1) {
    for (const int bin_0: all_bins_0) {
    std::cout << "On true bin (" << bin_0 << ", " << bin_1 << "):\t";
    int global_bin = ComputeGlobalBin(axis_0, bin_0, axis_1, bin_1);
    auto [computed_bin_0, computed_bin_1] =
    ComputeLocalBins(global_bin, axis_0, axis_1);
    std::cout << "Global bin " << global_bin
    << " aka (" << computed_bin_0 << ", "
    << computed_bin_1 << ')'
    << " goes from (" << axis_0.GetBinFrom(bin_0) << ", "
    << axis_1.GetBinFrom(bin_1) << ')'
    << " to (" << axis_0.GetBinTo(bin_0) << ", "
    << axis_1.GetBinTo(bin_1) << ')'
    << std::endl;
    test_iteration(std::array{bin_0, bin_1},
    std::array{&axis_0, &axis_1});
    }
    std::cout << "---" << std::endl;
    }

    // Test the 3D case
    std::cout << std::endl << "3D case:" << std::endl << "===" << std::endl;
    test_setup();
    for (const int bin_2: all_bins_2) {
    std::cout << "---" << std::endl;
    for (const int bin_1: all_bins_1) {
    for (const int bin_0: all_bins_0) {
    test_iteration(std::array{bin_0, bin_1, bin_2},
    std::array{&axis_0, &axis_1, &axis_2});
    }
    std::cout << "---" << std::endl;
    }
    std::cout << "===" << std::endl;
    }
    }
  7. HadrienG2 renamed this gist Mar 27, 2020. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  8. HadrienG2 created this gist Mar 27, 2020.
    256 changes: 256 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,256 @@
    #include <algorithm>
    #include <exception>
    #include <iostream>
    #include <utility>
    #include <vector>


    #define ASSERT(x, msg) if (!(x)) throw std::runtime_error(msg)


    struct IAxis {
    virtual int GetNBinsNoOver() const = 0;
    virtual int GetNOverflowBins() const = 0;
    int GetNBins() const {
    return GetNBinsNoOver() + GetNOverflowBins();
    }
    virtual int FindBin(double x) const = 0;
    virtual double GetBinFrom(int bin) const = 0;
    virtual double GetBinTo(int bin) const = 0;
    };


    class EqAxisMock : public IAxis
    {
    public:
    EqAxisMock(double from, double to, int nBinsNoOver) :
    m_from(from), m_to(to), m_nBinsNoOver(nBinsNoOver)
    {}

    int GetNBinsNoOver() const final override {
    return m_nBinsNoOver;
    }

    int GetNOverflowBins() const final override {
    return 2;
    }

    int FindBin(double x) const final override {
    if (x < m_from) {
    return -1;
    } else if (x >= m_to) {
    return -2;
    } else {
    return (x - m_from) / GetBinWidth() + 1;
    }
    }

    double GetBinFrom(int bin) const final override {
    switch (bin) {
    case -1:
    return -std::numeric_limits<double>::infinity();
    case -2:
    return m_to;
    default:
    ASSERT((bin >= 1) || (bin <= GetNBinsNoOver()),
    "GetBinFrom got an out-of-bounds bin index");
    return m_from + (bin - 1) * GetBinWidth();
    }
    }

    double GetBinTo(int bin) const final override {
    switch (bin) {
    case -1:
    return m_from;
    case -2:
    return std::numeric_limits<double>::infinity();
    default:
    ASSERT((bin >= 1) || (bin <= GetNBinsNoOver()),
    "GetBinTo got an out-of-bounds bin index");
    return m_from + bin * GetBinWidth();
    }
    }

    private:
    double m_from, m_to;
    int m_nBinsNoOver;

    // NOTE: Specific to equidistant axis, so general algorithm shouldn't use it
    double GetBinWidth() const {
    return (m_to - m_from) / m_nBinsNoOver;
    }
    };


    // TODO: Generalize the algorithm to N-d: make it take a vector of (IAxis, int)
    // pairs and implement it using a loop.
    int ComputeGlobalBin(const IAxis& axis_0, int bin_0,
    const IAxis& axis_1, int bin_1) {
    // Get regular bins out of the way
    if ((bin_0 >= 1) && (bin_1 >= 1)) {
    return bin_0 + (bin_1 - 1) * axis_0.GetNBinsNoOver();
    }

    // Convert bin indices to another coordinate system where the underflow bin
    // has coordinate 0, regular bins have coordinates [1, N], and the overflow
    // bin has coordinate N+1, where N is GetNBinsNoOver().
    auto compute_virtual_bin = [](const IAxis& axis, int bin) {
    switch (bin) {
    case -1:
    return 0;
    case -2:
    return axis.GetNBins() - 1;
    default:
    ASSERT((bin >= 1) || (bin <= axis.GetNBinsNoOver()),
    "Received an invalid local bin index as input");
    return bin;
    }
    };
    const int virtual_bin_0 = compute_virtual_bin(axis_0, bin_0);
    ASSERT((virtual_bin_0 >= 0) || (virtual_bin_0 < axis_0.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 0");
    const int virtual_bin_1 = compute_virtual_bin(axis_1, bin_1);
    ASSERT((virtual_bin_1 >= 0) || (virtual_bin_1 < axis_1.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 1");

    // Deduce what the global bin index would be in this coordinate system
    const int global_virtual_bin = virtual_bin_0 + virtual_bin_1 * axis_0.GetNBins();
    ASSERT((global_virtual_bin >= 0) || (global_virtual_bin < axis_0.GetNBins() * axis_1.GetNBins()),
    "Computed global virtual bin index is out of expected range");

    // Move to negative and 1-based indexing
    const int neg_1based_virtual_bin = -global_virtual_bin - 1;

    // Now we have the right index for the first overflow bins, but the indices
    // are wrong after crossing "regular" bins, because the count of overflow
    // bins takes these into account when it shouldn't.
    //
    // To fix this, we need to know how many regular bins exist before the
    // current bin (in row-major order), and increment the virtual bin index by
    // this amount. This will un-do the counting of regular bins as overflow.
    //
    int num_regular_bins_before = 0;

    // Rows of regular bins can be taken into account with just a clamp trick
    const int num_rows_before = std::clamp(virtual_bin_1 - 1, 0, axis_1.GetNBinsNoOver());
    num_regular_bins_before += num_rows_before * axis_0.GetNBinsNoOver();

    // For columns, however, we need to know whether we are on a regular row
    // or an under/overflow row.
    if (bin_1 >= 1) {
    const int num_cols_before = std::clamp(virtual_bin_0 - 1, 0, axis_1.GetNBinsNoOver());
    num_regular_bins_before += num_cols_before;
    }

    // And with this correction, we should be good
    return neg_1based_virtual_bin + num_regular_bins_before;
    }


    // TODO: Generalize the algorithm to N-d: make it take a vector of IAxis as
    // input, return a vector of Int as output, and implement it using a loop.
    std::pair<int, int> ComputeLocalBins(int global_bin, const IAxis& axis_0, const IAxis& axis_1) {
    // Get regular bins out of the way
    if (global_bin >= 1) {
    const int bin_0 = ((global_bin - 1) % axis_0.GetNBinsNoOver()) + 1;
    const int bin_1 =
    (((global_bin - 1) - (bin_0 - 1)) / axis_0.GetNBinsNoOver()) + 1;
    return std::make_pair(bin_0, bin_1);
    }

    // Convert our negative index to something positive and 0-based, as that is
    // more convenient to work with. Note, however, that this is _not_
    // equivalent to the virtual_bin that we had before, because what we have
    // here is a count of overflow bins, not of all bins.
    ASSERT(global_bin != 0, "Received an invalid global bin index as input");
    const int corrected_virtual_bin = -global_bin - 1;

    // This overflow bin count, accounts for...
    // - At most one full row of "top" underflow bins, which are the axis 0 bins
    // associated with the axis 1 underflow bin.
    // - Any number of axis 0 (underflow, overflow) bin pairs, with regular
    // bins in the middle.
    // - Possibly one trailing axis 0 underflow bin, if we are on the overflow
    // bin of such a pair.
    // - At most one full row of "bottom" overflow bins, which are the axis 0
    // bins associated with the axis 1 overflow bin.
    //
    // We can suppress the contribution of the top and bottom row of
    // under/overflow bins like so:
    const int middle_overflow_bins =
    std::clamp(corrected_virtual_bin - axis_0.GetNBins(),
    0,
    2 * axis_1.GetNBinsNoOver());

    // Then, we can deduce the number of rows of regular bins that we must add
    // back into the index, taking into account the fact that we must also add
    // a row if there is only one underflow bin before us (i.e. we are the
    // overflow bin of a row), like so:
    const int middle_regular_bins =
    ((middle_overflow_bins + 1) / 2) * axis_0.GetNBinsNoOver();

    // With this regular bin count, we can recover the same kind of virtual bin
    // index that we had in ComputeGlobalBin()...
    const int global_virtual_bin = corrected_virtual_bin + middle_regular_bins;
    ASSERT((global_virtual_bin >= 0) || (global_virtual_bin < axis_0.GetNBins() * axis_1.GetNBins()),
    "Computed global virtual bin index is out of expected range");

    // ...then from that we can deduce "virtual" local bin indices on each axis
    const int virtual_bin_1 = (global_virtual_bin / axis_0.GetNBins());
    ASSERT((virtual_bin_1 >= 0) || (virtual_bin_1 < axis_1.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 1");
    const int virtual_bin_0 = (global_virtual_bin % axis_0.GetNBins());
    ASSERT((virtual_bin_0 >= 0) || (virtual_bin_0 < axis_0.GetNBins()),
    "Computed local virtual bin index is out of expected range for axis 0");

    // And from that point, we can go back to the -1/-2 overflow convention
    auto compute_bin = [](const IAxis& axis, int virtual_bin) -> int {
    if (virtual_bin == 0) {
    return -1;
    } else if (virtual_bin == (axis.GetNBins() - 1)) {
    return -2;
    } else {
    return virtual_bin;
    }
    };
    return std::make_pair(compute_bin(axis_0, virtual_bin_0),
    compute_bin(axis_1, virtual_bin_1));
    }


    int main() {
    // Set up a pair of test axes
    EqAxisMock axis_0(3.0, 6.0, 3);
    EqAxisMock axis_1(7.0, 9.5, 5);

    // Enumerate all the bins
    auto enumerate_bins = [](const IAxis& axis) -> std::vector<int> {
    std::vector<int> all_bins;
    all_bins.push_back(-1);
    for (int bin = 1; bin <= axis.GetNBinsNoOver(); ++bin) {
    all_bins.push_back(bin);
    }
    all_bins.push_back(-2);
    return all_bins;
    };
    std::vector<int> all_bins_0 = enumerate_bins(axis_0);
    std::vector<int> all_bins_1 = enumerate_bins(axis_1);

    // Display the bins boundaries
    for (const int bin_1: all_bins_1) {
    for (const int bin_0: all_bins_0) {
    std::cout << "On true bin (" << bin_0 << ", " << bin_1 << "):\t";
    int global_bin = ComputeGlobalBin(axis_0, bin_0, axis_1, bin_1);
    auto [computed_bin_0, computed_bin_1] =
    ComputeLocalBins(global_bin, axis_0, axis_1);
    std::cout << "Global bin " << global_bin
    << " aka (" << computed_bin_0 << ", "
    << computed_bin_1 << ')'
    << " goes from (" << axis_0.GetBinFrom(bin_0) << ", "
    << axis_1.GetBinFrom(bin_1) << ')'
    << " to (" << axis_0.GetBinTo(bin_0) << ", "
    << axis_1.GetBinTo(bin_1) << ')'
    << std::endl;
    }
    }
    }