/*
 * Decompiled with CFR 0.152.
 */
package boofcv.alg.misc.impl;

import boofcv.alg.misc.PixelMathLambdas;
import boofcv.concurrency.BoofConcurrency;
import boofcv.struct.image.GrayF32;
import boofcv.struct.image.GrayF64;
import boofcv.struct.image.GrayI16;
import boofcv.struct.image.GrayS16;
import boofcv.struct.image.GrayS32;
import boofcv.struct.image.GrayS64;
import boofcv.struct.image.GrayS8;
import boofcv.struct.image.GrayU16;
import boofcv.struct.image.GrayU8;

public class ImplPixelMath_MT {
    public static void lambda1(byte[] input, int inputStart, int inputStride, byte[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_I8 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda1(short[] input, int inputStart, int inputStride, short[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_I16 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda1(int[] input, int inputStart, int inputStride, int[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_S32 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda1(long[] input, int inputStart, int inputStride, long[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_S64 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda1(float[] input, int inputStart, int inputStride, float[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_F32 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda1(double[] input, int inputStart, int inputStride, double[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function1_F64 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = function.process(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void lambda2(byte[] inputA, int inputStartA, int inputStrideA, byte[] inputB, int inputStartB, int inputStrideB, byte[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_I8 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void lambda2(short[] inputA, int inputStartA, int inputStrideA, short[] inputB, int inputStartB, int inputStrideB, short[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_I16 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void lambda2(int[] inputA, int inputStartA, int inputStrideA, int[] inputB, int inputStartB, int inputStrideB, int[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_S32 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void lambda2(long[] inputA, int inputStartA, int inputStrideA, long[] inputB, int inputStartB, int inputStrideB, long[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_S64 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void lambda2(float[] inputA, int inputStartA, int inputStrideA, float[] inputB, int inputStartB, int inputStrideB, float[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_F32 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void lambda2(double[] inputA, int inputStartA, int inputStrideA, double[] inputB, int inputStartB, int inputStrideB, double[] output, int outputStart, int outputStride, int rows, int cols, PixelMathLambdas.Function2_F64 function) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexA = inputStartA + y * inputStrideA;
            int indexB = inputStartB + y * inputStrideB;
            int indexDst = outputStart + y * outputStride;
            int end = indexA + cols;
            while (indexA < end) {
                output[indexDst] = function.process(inputA[indexA], inputB[indexB]);
                ++indexA;
                ++indexB;
                ++indexDst;
            }
        });
    }

    public static void abs(byte[] input, int inputStart, int inputStride, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void abs(short[] input, int inputStart, int inputStride, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void abs(int[] input, int inputStart, int inputStride, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void abs(long[] input, int inputStart, int inputStride, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void abs(float[] input, int inputStart, int inputStride, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void abs(double[] input, int inputStart, int inputStride, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.abs(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(byte[] input, int inputStart, int inputStride, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(short[] input, int inputStart, int inputStride, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(int[] input, int inputStart, int inputStride, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(long[] input, int inputStart, int inputStride, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(float[] input, int inputStart, int inputStride, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void negative(double[] input, int inputStart, int inputStride, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = -input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(byte[] input, int inputStart, int inputStride, double value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)Math.round((double)(input[indexSrc] & 0xFF) * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(byte[] input, int inputStart, int inputStride, double value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)Math.round((double)input[indexSrc] * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(short[] input, int inputStart, int inputStride, double value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)Math.round((double)(input[indexSrc] & 0xFFFF) * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(short[] input, int inputStart, int inputStride, double value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)Math.round((double)input[indexSrc] * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(int[] input, int inputStart, int inputStride, double value, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (int)Math.round((double)input[indexSrc] * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(long[] input, int inputStart, int inputStride, double value, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.round((double)input[indexSrc] * value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(float[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(double[] input, int inputStart, int inputStride, double value, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(byte[] input, int inputStart, int inputStride, double value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)(input[indexSrc] & 0xFF) * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(byte[] input, int inputStart, int inputStride, double value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(short[] input, int inputStart, int inputStride, double value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)(input[indexSrc] & 0xFFFF) * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(short[] input, int inputStart, int inputStride, double value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(int[] input, int inputStart, int inputStride, double value, int lower, int upper, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(long[] input, int inputStart, int inputStride, double value, long lower, long upper, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                long val2 = Math.round((double)input[indexSrc] * value);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(float[] input, int inputStart, int inputStride, float value, float lower, float upper, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float val2 = input[indexSrc] * value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(double[] input, int inputStart, int inputStride, double value, double lower, double upper, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double val2 = input[indexSrc] * value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFF) * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiplyU_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFFFF) * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(int[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void multiply_A(long[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] * value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(byte[] input, int inputStart, int inputStride, double denominator, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)Math.round((double)(input[indexSrc] & 0xFF) / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(byte[] input, int inputStart, int inputStride, double denominator, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)Math.round((double)input[indexSrc] / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(short[] input, int inputStart, int inputStride, double denominator, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)Math.round((double)(input[indexSrc] & 0xFFFF) / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(short[] input, int inputStart, int inputStride, double denominator, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)Math.round((double)input[indexSrc] / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(int[] input, int inputStart, int inputStride, double denominator, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (int)Math.round((double)input[indexSrc] / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(long[] input, int inputStart, int inputStride, double denominator, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.round((double)input[indexSrc] / denominator);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(float[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(double[] input, int inputStart, int inputStride, double denominator, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(byte[] input, int inputStart, int inputStride, double denominator, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)(input[indexSrc] & 0xFF) / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(byte[] input, int inputStart, int inputStride, double denominator, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(short[] input, int inputStart, int inputStride, double denominator, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)(input[indexSrc] & 0xFFFF) / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(short[] input, int inputStart, int inputStride, double denominator, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(int[] input, int inputStart, int inputStride, double denominator, int lower, int upper, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (int)Math.round((double)input[indexSrc] / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(long[] input, int inputStart, int inputStride, double denominator, long lower, long upper, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                long val2 = Math.round((double)input[indexSrc] / denominator);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(float[] input, int inputStart, int inputStride, float denominator, float lower, float upper, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float val2 = input[indexSrc] / denominator;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(double[] input, int inputStart, int inputStride, double denominator, double lower, double upper, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double val2 = input[indexSrc] / denominator;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(byte[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFF) / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(byte[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divideU_A(short[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFFFF) / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(short[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(int[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void divide_A(long[] input, int inputStart, int inputStride, float denominator, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] / denominator;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)((input[indexSrc] & 0xFF) + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)(input[indexSrc] + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)((input[indexSrc] & 0xFFFF) + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)(input[indexSrc] + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(int[] input, int inputStart, int inputStride, int value, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(long[] input, int inputStart, int inputStride, long value, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(float[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(double[] input, int inputStart, int inputStride, double value, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (input[indexSrc] & 0xFF) + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (input[indexSrc] & 0xFFFF) + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(int[] input, int inputStart, int inputStride, int value, int lower, int upper, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(long[] input, int inputStart, int inputStride, long value, long lower, long upper, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                long val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(float[] input, int inputStart, int inputStride, float value, float lower, float upper, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(double[] input, int inputStart, int inputStride, double value, double lower, double upper, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double val2 = input[indexSrc] + value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFF) + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plusU_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFFFF) + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(int[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void plus_A(long[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] + value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)((input[indexSrc] & 0xFF) - value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)(input[indexSrc] - value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)((input[indexSrc] & 0xFFFF) - value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)(input[indexSrc] - value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(int[] input, int inputStart, int inputStride, int value, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(long[] input, int inputStart, int inputStride, long value, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(float[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(double[] input, int inputStart, int inputStride, double value, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (input[indexSrc] & 0xFF) - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = (input[indexSrc] & 0xFFFF) - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(int[] input, int inputStart, int inputStride, int value, int lower, int upper, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(long[] input, int inputStart, int inputStride, long value, long lower, long upper, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                long val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(float[] input, int inputStart, int inputStride, float value, float lower, float upper, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(double[] input, int inputStart, int inputStride, double value, double lower, double upper, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double val2 = input[indexSrc] - value;
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFF) - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)(input[indexSrc] & 0xFFFF) - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(int[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_A(long[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)input[indexSrc] - value;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)(value - (input[indexSrc] & 0xFF));
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(byte[] input, int inputStart, int inputStride, int value, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (byte)(value - input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)(value - (input[indexSrc] & 0xFFFF));
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(short[] input, int inputStart, int inputStride, int value, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (short)(value - input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(int[] input, int inputStart, int inputStride, int value, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(long[] input, int inputStart, int inputStride, long value, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(float[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(double[] input, int inputStart, int inputStride, double value, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = value - (input[indexSrc] & 0xFF);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(byte[] input, int inputStart, int inputStride, int value, int lower, int upper, byte[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (byte)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = value - (input[indexSrc] & 0xFFFF);
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(short[] input, int inputStart, int inputStride, int value, int lower, int upper, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = (short)val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(int[] input, int inputStart, int inputStride, int value, int lower, int upper, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(long[] input, int inputStart, int inputStride, long value, long lower, long upper, long[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                long val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(float[] input, int inputStart, int inputStride, float value, float lower, float upper, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(double[] input, int inputStart, int inputStride, double value, double lower, double upper, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double val2 = value - input[indexSrc];
                if (val2 < lower) {
                    val2 = lower;
                }
                if (val2 > upper) {
                    val2 = upper;
                }
                output[indexDst] = val2;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)(input[indexSrc] & 0xFF);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(byte[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minusU_B(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)(input[indexSrc] & 0xFFFF);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(short[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(int[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void minus_B(long[] input, int inputStart, int inputStride, float value, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = value - (float)input[indexSrc];
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void boundImage(GrayU8 img, int min, int max) {
        int h = img.getHeight();
        int w = img.getWidth();
        byte[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                int value = data[index] & 0xFF;
                if (value < min) {
                    data[index] = (byte)min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = (byte)max;
            }
        });
    }

    public static void diffAbs(GrayU8 imgA, GrayU8 imgB, GrayU8 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = (byte)Math.abs((imgA.data[indexA] & 0xFF) - (imgB.data[indexB] & 0xFF));
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayS8 img, int min, int max) {
        int h = img.getHeight();
        int w = img.getWidth();
        byte[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                byte value = data[index];
                if (value < min) {
                    data[index] = (byte)min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = (byte)max;
            }
        });
    }

    public static void diffAbs(GrayS8 imgA, GrayS8 imgB, GrayS8 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = (byte)Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayU16 img, int min, int max) {
        int h = img.getHeight();
        int w = img.getWidth();
        short[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                int value = data[index] & 0xFFFF;
                if (value < min) {
                    data[index] = (short)min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = (short)max;
            }
        });
    }

    public static void diffAbs(GrayU16 imgA, GrayU16 imgB, GrayU16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = (short)Math.abs((imgA.data[indexA] & 0xFFFF) - (imgB.data[indexB] & 0xFFFF));
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayS16 img, int min, int max) {
        int h = img.getHeight();
        int w = img.getWidth();
        short[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                short value = data[index];
                if (value < min) {
                    data[index] = (short)min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = (short)max;
            }
        });
    }

    public static void diffAbs(GrayS16 imgA, GrayS16 imgB, GrayS16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = (short)Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayS32 img, int min, int max) {
        int h = img.getHeight();
        int w = img.getWidth();
        int[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                int value = data[index];
                if (value < min) {
                    data[index] = min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = max;
            }
        });
    }

    public static void diffAbs(GrayS32 imgA, GrayS32 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayS64 img, long min, long max) {
        int h = img.getHeight();
        int w = img.getWidth();
        long[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                long value = data[index];
                if (value < min) {
                    data[index] = min;
                    continue;
                }
                if (value <= max) continue;
                data[index] = max;
            }
        });
    }

    public static void diffAbs(GrayS64 imgA, GrayS64 imgB, GrayS64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayF32 img, float min, float max) {
        int h = img.getHeight();
        int w = img.getWidth();
        float[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                float value = data[index];
                if (value < min) {
                    data[index] = min;
                    continue;
                }
                if (!(value > max)) continue;
                data[index] = max;
            }
        });
    }

    public static void diffAbs(GrayF32 imgA, GrayF32 imgB, GrayF32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void boundImage(GrayF64 img, double min, double max) {
        int h = img.getHeight();
        int w = img.getWidth();
        double[] data = img.data;
        BoofConcurrency.loopFor(0, h, y -> {
            int index;
            int indexEnd = index + w;
            for (index = img.getStartIndex() + y * img.getStride(); index < indexEnd; ++index) {
                double value = data[index];
                if (value < min) {
                    data[index] = min;
                    continue;
                }
                if (!(value > max)) continue;
                data[index] = max;
            }
        });
    }

    public static void diffAbs(GrayF64 imgA, GrayF64 imgB, GrayF64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexDiff = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexDiff] = Math.abs(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexDiff;
            }
        });
    }

    public static void add(GrayU8 imgA, GrayU8 imgB, GrayU16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (short)((imgA.data[indexA] & 0xFF) + (imgB.data[indexB] & 0xFF));
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayU8 imgA, GrayU8 imgB, GrayI16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (short)((imgA.data[indexA] & 0xFF) - (imgB.data[indexB] & 0xFF));
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayS8 imgA, GrayS8 imgB, GrayS16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (short)(imgA.data[indexA] + imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayS8 imgA, GrayS8 imgB, GrayS16 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (short)(imgA.data[indexA] - imgB.data[indexB]);
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayU16 imgA, GrayU16 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (imgA.data[indexA] & 0xFFFF) + (imgB.data[indexB] & 0xFFFF);
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayU16 imgA, GrayU16 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = (imgA.data[indexA] & 0xFFFF) - (imgB.data[indexB] & 0xFFFF);
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayS16 imgA, GrayS16 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] + imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayS16 imgA, GrayS16 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] - imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayS32 imgA, GrayS32 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] + imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayS32 imgA, GrayS32 imgB, GrayS32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] - imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayS64 imgA, GrayS64 imgB, GrayS64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] + imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayS64 imgA, GrayS64 imgB, GrayS64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] - imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayF32 imgA, GrayF32 imgB, GrayF32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] + imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayF32 imgA, GrayF32 imgB, GrayF32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] - imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void multiply(GrayF32 imgA, GrayF32 imgB, GrayF32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] * imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void divide(GrayF32 imgA, GrayF32 imgB, GrayF32 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] / imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void add(GrayF64 imgA, GrayF64 imgB, GrayF64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] + imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void subtract(GrayF64 imgA, GrayF64 imgB, GrayF64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] - imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void multiply(GrayF64 imgA, GrayF64 imgB, GrayF64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] * imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void divide(GrayF64 imgA, GrayF64 imgB, GrayF64 output) {
        int h = imgA.getHeight();
        int w = imgA.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexA = imgA.getStartIndex() + y * imgA.getStride();
            int indexB = imgB.getStartIndex() + y * imgB.getStride();
            int indexOut = output.getStartIndex() + y * output.getStride();
            int indexEnd = indexA + w;
            while (indexA < indexEnd) {
                output.data[indexOut] = imgA.data[indexA] / imgB.data[indexB];
                ++indexA;
                ++indexB;
                ++indexOut;
            }
        });
    }

    public static void log(float[] input, int inputStart, int inputStride, float val2, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)Math.log(val2 + input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void log(double[] input, int inputStart, int inputStride, double val2, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.log(val2 + input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void logSign(float[] input, int inputStart, int inputStride, float val2, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float value = input[indexSrc];
                output[indexDst] = value < 0.0f ? (float)(-Math.log(val2 - value)) : (float)Math.log(val2 + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void logSign(double[] input, int inputStart, int inputStride, double val2, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double value = input[indexSrc];
                output[indexDst] = value < 0.0 ? -Math.log(val2 - value) : Math.log(val2 + value);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void sqrt(float[] input, int inputStart, int inputStride, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = (float)Math.sqrt(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void sqrt(double[] input, int inputStart, int inputStride, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                output[indexDst] = Math.sqrt(input[indexSrc]);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void pow2(byte[] input, int inputStart, int inputStride, short[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int v = input[indexSrc] & 0xFF;
                output[indexDst] = (short)(v * v);
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void pow2(short[] input, int inputStart, int inputStride, int[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                int v = input[indexSrc] & 0xFFFF;
                output[indexDst] = v * v;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void pow2(float[] input, int inputStart, int inputStride, float[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                float v = input[indexSrc];
                output[indexDst] = v * v;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void pow2(double[] input, int inputStart, int inputStride, double[] output, int outputStart, int outputStride, int rows, int cols) {
        BoofConcurrency.loopFor(0, rows, y -> {
            int indexSrc = inputStart + y * inputStride;
            int indexDst = outputStart + y * outputStride;
            int end = indexSrc + cols;
            while (indexSrc < end) {
                double v = input[indexSrc];
                output[indexDst] = v * v;
                ++indexSrc;
                ++indexDst;
            }
        });
    }

    public static void stdev(GrayU8 mean, GrayU16 pow2, GrayU8 stdev) {
        int h = mean.getHeight();
        int w = mean.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexMean = mean.startIndex + y * mean.stride;
            int indexPow = pow2.startIndex + y * pow2.stride;
            int indexStdev = stdev.startIndex + y * stdev.stride;
            int indexEnd = indexMean + w;
            while (indexMean < indexEnd) {
                int mu = mean.data[indexMean] & 0xFF;
                int p2 = pow2.data[indexPow] & 0xFFFF;
                stdev.data[indexStdev] = (byte)Math.sqrt(Math.max(0, p2 - mu * mu));
                ++indexMean;
                ++indexPow;
                ++indexStdev;
            }
        });
    }

    public static void stdev(GrayU16 mean, GrayS32 pow2, GrayU16 stdev) {
        int h = mean.getHeight();
        int w = mean.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexMean = mean.startIndex + y * mean.stride;
            int indexPow = pow2.startIndex + y * pow2.stride;
            int indexStdev = stdev.startIndex + y * stdev.stride;
            int indexEnd = indexMean + w;
            while (indexMean < indexEnd) {
                int mu = mean.data[indexMean] & 0xFFFF;
                int p2 = pow2.data[indexPow];
                stdev.data[indexStdev] = (short)Math.sqrt(Math.max(0, p2 - mu * mu));
                ++indexMean;
                ++indexPow;
                ++indexStdev;
            }
        });
    }

    public static void stdev(GrayF32 mean, GrayF32 pow2, GrayF32 stdev) {
        int h = mean.getHeight();
        int w = mean.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexMean = mean.startIndex + y * mean.stride;
            int indexPow = pow2.startIndex + y * pow2.stride;
            int indexStdev = stdev.startIndex + y * stdev.stride;
            int indexEnd = indexMean + w;
            while (indexMean < indexEnd) {
                float mu = mean.data[indexMean];
                float p2 = pow2.data[indexPow];
                stdev.data[indexStdev] = (float)Math.sqrt(Math.max(0.0f, p2 - mu * mu));
                ++indexMean;
                ++indexPow;
                ++indexStdev;
            }
        });
    }

    public static void stdev(GrayF64 mean, GrayF64 pow2, GrayF64 stdev) {
        int h = mean.getHeight();
        int w = mean.getWidth();
        BoofConcurrency.loopFor(0, h, y -> {
            int indexMean = mean.startIndex + y * mean.stride;
            int indexPow = pow2.startIndex + y * pow2.stride;
            int indexStdev = stdev.startIndex + y * stdev.stride;
            int indexEnd = indexMean + w;
            while (indexMean < indexEnd) {
                double mu = mean.data[indexMean];
                double p2 = pow2.data[indexPow];
                stdev.data[indexStdev] = Math.sqrt(Math.max(0.0, p2 - mu * mu));
                ++indexMean;
                ++indexPow;
                ++indexStdev;
            }
        });
    }
}

