From 84331d6f07701f8f8e25b034ea9757c1e0adfdf9 Mon Sep 17 00:00:00 2001 From: vchamarthi Date: Tue, 14 Apr 2026 11:20:03 -0500 Subject: [PATCH 1/5] asv- benchamrks integration from a non-protected branch --- .gitignore | 4 + benchmarks/README.md | 85 +++++++ benchmarks/asv.conf.json | 19 ++ benchmarks/benchmarks/__init__.py | 50 ++++ benchmarks/benchmarks/bench_fft1d.py | 134 ++++++++++ benchmarks/benchmarks/bench_fftnd.py | 208 ++++++++++++++++ benchmarks/benchmarks/bench_memory.py | 121 +++++++++ benchmarks/benchmarks/bench_numpy_fft.py | 232 +++++++++++++++++ benchmarks/benchmarks/bench_scipy_fft.py | 305 +++++++++++++++++++++++ 9 files changed, 1158 insertions(+) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/asv.conf.json create mode 100644 benchmarks/benchmarks/__init__.py create mode 100644 benchmarks/benchmarks/bench_fft1d.py create mode 100644 benchmarks/benchmarks/bench_fftnd.py create mode 100644 benchmarks/benchmarks/bench_memory.py create mode 100644 benchmarks/benchmarks/bench_numpy_fft.py create mode 100644 benchmarks/benchmarks/bench_scipy_fft.py diff --git a/.gitignore b/.gitignore index b7738655..20ecca31 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,7 @@ mkl_fft/_pydfti.c mkl_fft/_pydfti.cpython*.so mkl_fft/_pydfti.*-win_amd64.pyd mkl_fft/src/mklfft.c + +# ASV benchmark artifacts +.asv/ +benchmarks/.asv/ diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..9d9c2bac --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,85 @@ +# mkl_fft ASV Benchmarks + +Performance benchmarks for [mkl_fft](https://github.com/IntelPython/mkl_fft) using +[Airspeed Velocity (ASV)](https://asv.readthedocs.io/en/stable/). + +## Structure + +``` +benchmarks/ +├── asv.conf.json # ASV configuration (CI-only, no env/build settings) +└── benchmarks/ + ├── __init__.py # Thread pinning (MKL_NUM_THREADS) + ├── bench_fft1d.py # mkl_fft root API — 1-D transforms + ├── bench_fftnd.py # mkl_fft root API — 2-D and N-D transforms + ├── bench_numpy_fft.py # mkl_fft.interfaces.numpy_fft — full coverage + ├── bench_scipy_fft.py # mkl_fft.interfaces.scipy_fft — full coverage + └── bench_memory.py # Peak RSS memory benchmarks +``` + +### Coverage + +| File | API | Transforms | +|------|-----|-----------| +| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` — power-of-two and non-power-of-two | +| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | +| `bench_numpy_fft.py` | `mkl_fft.interfaces.numpy_fft` | All exported functions including Hermitian (`hfft`, `ihfft`) | +| `bench_scipy_fft.py` | `mkl_fft.interfaces.scipy_fft` | All exported functions including Hermitian 2-D/N-D (`hfft2`, `hfftn`) | +| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | + +Benchmarks cover float32, float64, complex64, complex128 dtypes, power-of-two +and non-power-of-two sizes, square and non-square/non-cubic shapes. + +## Threading + +`__init__.py` pins `MKL_NUM_THREADS` to **4** when the machine has 4 or more +physical cores, or falls back to **1** (single-threaded) otherwise. This keeps +results comparable across CI machines in the shared pool regardless of their +total core count. Physical cores are read from `/proc/cpuinfo` — hyperthreads +are excluded per MKL recommendation. + +Override by setting `MKL_NUM_THREADS` in the environment before running ASV. + +## Running Locally + +> Benchmarks are designed for CI. Local runs require `mkl_fft` to be installed +> in the active Python environment. Benchmarks that exercise SciPy interface +> (`bench_scipy_fft.py`) also require SciPy: +> +> ```bash +> python -m pip install -e .. +> python -m pip install scipy +> ``` + +```bash +cd benchmarks/ + +# Quick smoke-run against the current working tree (no env management) +asv run --python=same --quick --show-stderr HEAD^! + +# Run a specific benchmark file +asv run --python=same --quick --bench bench_fft1d HEAD^! + +# View and publish results +asv publish # generates .asv/html/ +asv preview # serves at http://localhost:8080 +``` + +## CI + +Benchmarks run automatically in Jenkins on the `auto-bench` node via +`benchmarkHelper.performanceTest()` from the shared library. The pipeline uses: + +```bash +asv run --environment existing: --set-commit-hash $COMMIT_SHA +``` + +This bypasses ASV environment management entirely — mkl_fft is pre-installed +into a conda environment by the pipeline before ASV is invoked. + +- **Nightly (prod):** results are published to the benchmark dashboard +- **PR (dev):** `asv compare` output is evaluated for regressions; a 30% slowdown + triggers a failed GitHub commit status + +Results are stored in the `mkl_fft-results` branch of +`intel-innersource/libraries.python.intel.infrastructure.benchmark-dashboards`. diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000..aa661032 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "project": "mkl_fft", + "project_url": "https://github.com/IntelPython/mkl_fft", + "show_commit_url": "https://github.com/IntelPython/mkl_fft/commit/", + "repo": "..", + "branches": [ + "master" + ], + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "build_cache_size": 2, + "default_benchmark_timeout": 500, + "regressions_thresholds": { + ".*": 0.3 + } +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 00000000..2a37b3d3 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1,50 @@ +"""ASV benchmarks for mkl_fft. + +Thread control — design rationale +---------------------------------- +Since we do not have a dedicated CI benchmark machine, benchmarks run on a shared CI pool +whose machines vary in core count over time. +Using the full physical core count of each machine would make results +incomparable across runs on different machines. + +Strategy: + - Physical cores >= 4 → fix MKL_NUM_THREADS = 4 + 4 is the lowest common denominator that guarantees multi-threaded MKL + behavior and is achievable on any modern CI machine. Results from + different machines in the pool are therefore directly comparable. + - Physical cores < 4 → fall back to MKL_NUM_THREADS = 1 (single-threaded) + Prevents over-subscription on under-resourced machines and avoids + misleading comparisons against 4-thread baselines. + +MKL recommendation: use physical cores, not logical (hyperthreaded) CPUs. +""" + +import os +import re + +_MIN_THREADS = 4 # minimum physical cores required for multi-threaded mode + + +def _physical_cores(): + """Return physical core count from /proc/cpuinfo; fall back to os.cpu_count().""" + try: + with open("/proc/cpuinfo") as f: + content = f.read() + cpu_cores = int(re.search(r"cpu cores\s*:\s*(\d+)", content).group(1)) + sockets = max( + len(set(re.findall(r"physical id\s*:\s*(\d+)", content))), 1 + ) + return cpu_cores * sockets + except Exception: + return os.cpu_count() or 1 + + +def _thread_count(): + physical = _physical_cores() + return str(_MIN_THREADS) if physical >= _MIN_THREADS else "1" + + +_THREADS = os.environ.get("MKL_NUM_THREADS", _thread_count()) +os.environ["MKL_NUM_THREADS"] = _THREADS +os.environ.setdefault("OMP_NUM_THREADS", _THREADS) +os.environ.setdefault("OPENBLAS_NUM_THREADS", _THREADS) diff --git a/benchmarks/benchmarks/bench_fft1d.py b/benchmarks/benchmarks/bench_fft1d.py new file mode 100644 index 00000000..62d0c40f --- /dev/null +++ b/benchmarks/benchmarks/bench_fft1d.py @@ -0,0 +1,134 @@ +"""Benchmarks for 1-D FFT operations using the mkl_fft root API.""" + +import numpy as np + +import mkl_fft + +_RNG_SEED = 42 + + +def _make_input(rng, n, dtype): + """Return a 1-D array of length *n* with the given *dtype*. + + Complex dtypes are populated with non-zero imaginary parts so the + benchmark exercises a genuine complex transform path. + """ + dt = np.dtype(dtype) + if dt.kind == "c": + return (rng.standard_normal(n) + 1j * rng.standard_normal(n)).astype(dt) + return rng.standard_normal(n).astype(dt) + + +# --------------------------------------------------------------------------- +# Complex-to-complex 1-D (power-of-two sizes) +# --------------------------------------------------------------------------- + + +class TimeFFT1D: + """Forward and inverse complex FFT — power-of-two sizes.""" + + params = [ + [64, 256, 1024, 4096, 16384, 65536], + ["float32", "float64", "complex64", "complex128"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, n, dtype) + + def time_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def time_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# Real-to-complex / complex-to-real 1-D (power-of-two sizes) +# --------------------------------------------------------------------------- + + +class TimeRFFT1D: + """Forward rfft and inverse irfft — power-of-two sizes.""" + + params = [ + [64, 256, 1024, 4096, 16384, 65536], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_real = rng.standard_normal(n).astype(dtype) + # irfft input: complex half-spectrum of length n//2+1 + self.x_complex = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + + def time_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# Complex-to-complex 1-D (non-power-of-two sizes) +# --------------------------------------------------------------------------- + + +class TimeFFT1DNonPow2: + """Forward and inverse complex FFT — non-power-of-two sizes. + + MKL uses a different code path for non-power-of-two transforms; + this suite catches regressions in that path. + """ + + params = [ + [127, 509, 1000, 4001, 10007], + ["float64", "complex128", "complex64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, n, dtype) + + def time_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def time_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# Real-to-complex / complex-to-real 1-D (non-power-of-two sizes) +# --------------------------------------------------------------------------- + + +class TimeRFFT1DNonPow2: + """Forward rfft and inverse irfft — non-power-of-two sizes.""" + + params = [ + [127, 509, 1000, 4001, 10007], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_real = rng.standard_normal(n).astype(dtype) + self.x_complex = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + + def time_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) diff --git a/benchmarks/benchmarks/bench_fftnd.py b/benchmarks/benchmarks/bench_fftnd.py new file mode 100644 index 00000000..b5503031 --- /dev/null +++ b/benchmarks/benchmarks/bench_fftnd.py @@ -0,0 +1,208 @@ +"""Benchmarks for 2-D and N-D FFT operations using the mkl_fft root API.""" + +import numpy as np + +import mkl_fft + +_RNG_SEED = 42 + + +def _make_input(rng, shape, dtype): + """Return an array of the given *shape* and *dtype*. + + Complex dtypes are populated with non-zero imaginary parts so the + benchmark exercises a genuine complex transform path. + """ + dt = np.dtype(dtype) + if dt.kind == "c": + return ( + rng.standard_normal(shape) + 1j * rng.standard_normal(shape) + ).astype(dt) + return rng.standard_normal(shape).astype(dt) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex (power-of-two, square + non-square) +# --------------------------------------------------------------------------- + + +class TimeFFT2D: + """Forward and inverse 2-D FFT — square and non-square shapes.""" + + params = [ + [ + (64, 64), + (128, 128), + (256, 256), + (512, 512), + (256, 128), + (512, 256), # non-square + ], + ["float32", "float64", "complex64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + def time_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# 2-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRFFT2D: + """Forward rfft2 and inverse irfft2.""" + + params = [ + [(64, 64), (128, 128), (256, 256), (512, 512)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = (shape[0], shape[1] // 2 + 1) + self.x_real = rng.standard_normal(shape).astype(dtype) + # irfft2 input: complex half-spectrum — shape (M, N//2+1) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfft2(self, shape, dtype): + mkl_fft.rfft2(self.x_real) + + def time_irfft2(self, shape, dtype): + mkl_fft.irfft2(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex (non-power-of-two) +# --------------------------------------------------------------------------- + + +class TimeFFT2DNonPow2: + """Forward and inverse 2-D FFT — non-power-of-two sizes.""" + + params = [ + [ + (96, 96), + (100, 100), + (270, 270), + (500, 500), + (100, 200), # non-square non-pow2 + ], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + def time_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex (3-D cubes + non-cubic shape) +# --------------------------------------------------------------------------- + + +class TimeFFTnD: + """Forward and inverse N-D FFT.""" + + params = [ + [ + (16, 16, 16), + (32, 32, 32), + (64, 64, 64), + (32, 64, 128), # non-cubic + ], + ["float32", "float64", "complex64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + def time_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) + + +# --------------------------------------------------------------------------- +# N-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRFFTnD: + """Forward rfftn and inverse irfftn.""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + # irfftn input: complex half-spectrum — last axis is shape[-1]//2+1 + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfftn(self, shape, dtype): + mkl_fft.rfftn(self.x_real) + + def time_irfftn(self, shape, dtype): + mkl_fft.irfftn(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex (non-power-of-two 3-D) +# --------------------------------------------------------------------------- + + +class TimeFFTnDNonPow2: + """Forward and inverse N-D FFT — non-power-of-two sizes.""" + + params = [ + [ + (24, 24, 24), + (30, 30, 30), + (50, 50, 50), + (30, 40, 50), # non-cubic non-pow2 + ], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + def time_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) diff --git a/benchmarks/benchmarks/bench_memory.py b/benchmarks/benchmarks/bench_memory.py new file mode 100644 index 00000000..1efe3ccd --- /dev/null +++ b/benchmarks/benchmarks/bench_memory.py @@ -0,0 +1,121 @@ +"""Peak-memory benchmarks for FFT operations. + +Measures peak RSS (resident set size) to detect memory regressions +in the mkl_fft root API across 1-D, 2-D, and 3-D transforms. +""" + +import numpy as np + +import mkl_fft + +_RNG_SEED = 42 + + +def _make_input(rng, shape, dtype): + dt = np.dtype(dtype) + s = (shape,) if isinstance(shape, int) else shape + if dt.kind == "c": + return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) + return rng.standard_normal(s).astype(dt) + + +# --------------------------------------------------------------------------- +# 1-D complex FFT +# --------------------------------------------------------------------------- + + +class PeakMemFFT1D: + """Peak RSS for 1-D complex FFT.""" + + params = [ + [1024, 16384, 65536, 262144], + ["float64", "complex128"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) + + def peakmem_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def peakmem_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# 1-D real FFT +# --------------------------------------------------------------------------- + + +class PeakMemRFFT1D: + """Peak RSS for 1-D real FFT (forward and inverse).""" + + params = [ + [1024, 16384, 65536, 262144], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_real = rng.standard_normal(n).astype(dtype) + self.x_complex = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + + def peakmem_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def peakmem_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# 2-D complex FFT +# --------------------------------------------------------------------------- + + +class PeakMemFFT2D: + """Peak RSS for 2-D complex FFT.""" + + params = [ + [(64, 64), (128, 128), (256, 256), (512, 512)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def peakmem_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def peakmem_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# N-D complex FFT (3-D) +# --------------------------------------------------------------------------- + + +class PeakMemFFTnD: + """Peak RSS for N-D complex FFT (3-D shapes).""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def peakmem_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def peakmem_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) diff --git a/benchmarks/benchmarks/bench_numpy_fft.py b/benchmarks/benchmarks/bench_numpy_fft.py new file mode 100644 index 00000000..f1373d8d --- /dev/null +++ b/benchmarks/benchmarks/bench_numpy_fft.py @@ -0,0 +1,232 @@ +"""Benchmarks for mkl_fft.interfaces.numpy_fft. + +Covers every function exported by the interface: + fft / ifft — 1-D C2C + rfft / irfft — 1-D R2C / C2R + hfft / ihfft — 1-D Hermitian + fft2 / ifft2 — 2-D C2C + rfft2 / irfft2 — 2-D R2C / C2R + fftn / ifftn — N-D C2C + rfftn / irfftn — N-D R2C / C2R +""" + +import numpy as np + +from mkl_fft.interfaces import numpy_fft + +_RNG_SEED = 42 + + +def _make_input(rng, shape, dtype): + """Return an array of *shape* and *dtype*. + + Complex dtypes get non-zero imaginary parts for a realistic signal. + *shape* may be an int (1-D) or a tuple. + """ + dt = np.dtype(dtype) + s = (shape,) if isinstance(shape, int) else shape + if dt.kind == "c": + return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) + return rng.standard_normal(s).astype(dt) + + +# --------------------------------------------------------------------------- +# 1-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeC2C1D: + """numpy_fft.fft / ifft — 1-D.""" + + params = [ + [256, 1024, 16384], + ["float32", "float64", "complex64", "complex128"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) + + def time_fft(self, n, dtype): + numpy_fft.fft(self.x) + + def time_ifft(self, n, dtype): + numpy_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# 1-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRC1D: + """numpy_fft.rfft / irfft — 1-D.""" + + params = [ + [256, 1024, 16384], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_real = rng.standard_normal(n).astype(dtype) + self.x_complex = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + + def time_rfft(self, n, dtype): + numpy_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + numpy_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# 1-D Hermitian +# hfft: input complex length n//2+1 → output real length n +# ihfft: input real length n → output complex length n//2+1 +# --------------------------------------------------------------------------- + + +class TimeHermitian1D: + """numpy_fft.hfft / ihfft — 1-D Hermitian. + + *dtype* is the **output** dtype of hfft (real); the inverse ihfft + takes the same real input and produces the corresponding complex output. + """ + + params = [ + [256, 1024, 16384], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + # hfft input: complex half-spectrum of length n//2+1 + self.x_hfft = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + # ihfft input: real signal of length n + self.x_ihfft = rng.standard_normal(n).astype(dtype) + + def time_hfft(self, n, dtype): + numpy_fft.hfft(self.x_hfft, n=n) + + def time_ihfft(self, n, dtype): + numpy_fft.ihfft(self.x_ihfft) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeC2C2D: + """numpy_fft.fft2 / ifft2 — 2-D.""" + + params = [ + [(64, 64), (256, 256), (512, 512)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def time_fft2(self, shape, dtype): + numpy_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + numpy_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# 2-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRC2D: + """numpy_fft.rfft2 / irfft2 — 2-D.""" + + params = [ + [(64, 64), (256, 256), (512, 512)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = (shape[0], shape[1] // 2 + 1) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfft2(self, shape, dtype): + numpy_fft.rfft2(self.x_real) + + def time_irfft2(self, shape, dtype): + numpy_fft.irfft2(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeCCND: + """numpy_fft.fftn / ifftn — N-D.""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def time_fftn(self, shape, dtype): + numpy_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + numpy_fft.ifftn(self.x) + + +# --------------------------------------------------------------------------- +# N-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRCND: + """numpy_fft.rfftn / irfftn — N-D.""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfftn(self, shape, dtype): + numpy_fft.rfftn(self.x_real) + + def time_irfftn(self, shape, dtype): + numpy_fft.irfftn(self.x_complex, s=shape) diff --git a/benchmarks/benchmarks/bench_scipy_fft.py b/benchmarks/benchmarks/bench_scipy_fft.py new file mode 100644 index 00000000..ca79ea18 --- /dev/null +++ b/benchmarks/benchmarks/bench_scipy_fft.py @@ -0,0 +1,305 @@ +"""Benchmarks for mkl_fft.interfaces.scipy_fft. + +Covers every function exported by the interface: + fft / ifft — 1-D C2C + rfft / irfft — 1-D R2C / C2R + hfft / ihfft — 1-D Hermitian + fft2 / ifft2 — 2-D C2C + rfft2 / irfft2 — 2-D R2C / C2R + hfft2 / ihfft2 — 2-D Hermitian (scipy_fft only) + fftn / ifftn — N-D C2C + rfftn / irfftn — N-D R2C / C2R + hfftn / ihfftn — N-D Hermitian (scipy_fft only) +""" + +import numpy as np + +from mkl_fft.interfaces import scipy_fft + +_RNG_SEED = 42 + + +def _make_input(rng, shape, dtype): + """Return an array of *shape* and *dtype*. + + Complex dtypes get non-zero imaginary parts for a realistic signal. + *shape* may be an int (1-D) or a tuple. + """ + dt = np.dtype(dtype) + s = (shape,) if isinstance(shape, int) else shape + if dt.kind == "c": + return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) + return rng.standard_normal(s).astype(dt) + + +# --------------------------------------------------------------------------- +# 1-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeC2C1D: + """scipy_fft.fft / ifft — 1-D.""" + + params = [ + [256, 1024, 16384], + ["float32", "float64", "complex64", "complex128"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) + + def time_fft(self, n, dtype): + scipy_fft.fft(self.x) + + def time_ifft(self, n, dtype): + scipy_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# 1-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRC1D: + """scipy_fft.rfft / irfft — 1-D.""" + + params = [ + [256, 1024, 16384], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_real = rng.standard_normal(n).astype(dtype) + self.x_complex = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + + def time_rfft(self, n, dtype): + scipy_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + scipy_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# 1-D Hermitian +# hfft: input complex length n//2+1 → output real length n +# ihfft: input real length n → output complex length n//2+1 +# --------------------------------------------------------------------------- + + +class TimeHermitian1D: + """scipy_fft.hfft / ihfft — 1-D Hermitian. + + *dtype* is the **output** dtype of hfft (real); the corresponding + complex input dtype is derived automatically. + """ + + params = [ + [256, 1024, 16384], + ["float32", "float64"], + ] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + self.x_hfft = ( + rng.standard_normal(n // 2 + 1) + + 1j * rng.standard_normal(n // 2 + 1) + ).astype(cdtype) + self.x_ihfft = rng.standard_normal(n).astype(dtype) + + def time_hfft(self, n, dtype): + scipy_fft.hfft(self.x_hfft, n=n) + + def time_ihfft(self, n, dtype): + scipy_fft.ihfft(self.x_ihfft) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeC2C2D: + """scipy_fft.fft2 / ifft2 — 2-D.""" + + params = [ + [(64, 64), (256, 256), (512, 512)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def time_fft2(self, shape, dtype): + scipy_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + scipy_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# 2-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRC2D: + """scipy_fft.rfft2 / irfft2 — 2-D.""" + + params = [ + [(64, 64), (256, 256), (512, 512)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = (shape[0], shape[1] // 2 + 1) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfft2(self, shape, dtype): + scipy_fft.rfft2(self.x_real) + + def time_irfft2(self, shape, dtype): + scipy_fft.irfft2(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# 2-D Hermitian (scipy_fft only — not in numpy_fft interface) +# hfft2: input complex shape (M, N//2+1) → output real shape (M, N) +# ihfft2: input real shape (M, N) → output complex shape (M, N//2+1) +# --------------------------------------------------------------------------- + + +class TimeHermitian2D: + """scipy_fft.hfft2 / ihfft2 — 2-D Hermitian. + + *dtype* is the **output** dtype of hfft2 (real). + """ + + params = [ + [(64, 64), (256, 256), (512, 512)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = (shape[0], shape[1] // 2 + 1) + self.x_hfft2 = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + self.x_ihfft2 = rng.standard_normal(shape).astype(dtype) + + def time_hfft2(self, shape, dtype): + scipy_fft.hfft2(self.x_hfft2, s=shape) + + def time_ihfft2(self, shape, dtype): + scipy_fft.ihfft2(self.x_ihfft2) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex +# --------------------------------------------------------------------------- + + +class TimeCCND: + """scipy_fft.fftn / ifftn — N-D.""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float64", "complex128"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) + + def time_fftn(self, shape, dtype): + scipy_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + scipy_fft.ifftn(self.x) + + +# --------------------------------------------------------------------------- +# N-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class TimeRCND: + """scipy_fft.rfftn / irfftn — N-D.""" + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + + def time_rfftn(self, shape, dtype): + scipy_fft.rfftn(self.x_real) + + def time_irfftn(self, shape, dtype): + scipy_fft.irfftn(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# N-D Hermitian (scipy_fft only) +# hfftn: input complex, last axis length s[-1]//2+1 → output real shape s +# ihfftn: input real shape s → output complex, last axis length s[-1]//2+1 +# --------------------------------------------------------------------------- + + +class TimeHermitianND: + """scipy_fft.hfftn / ihfftn — N-D Hermitian. + + *dtype* is the **output** dtype of hfftn (real). + """ + + params = [ + [(16, 16, 16), (32, 32, 32), (64, 64, 64)], + ["float32", "float64"], + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + # hfftn input: last axis has length shape[-1]//2+1 + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_hfftn = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + self.x_ihfftn = rng.standard_normal(shape).astype(dtype) + + def time_hfftn(self, shape, dtype): + scipy_fft.hfftn(self.x_hfftn, s=shape) + + def time_ihfftn(self, shape, dtype): + scipy_fft.ihfftn(self.x_ihfftn) From 2bb1af1a71bcc6533064ffbc7bb1da4ba8aac781 Mon Sep 17 00:00:00 2001 From: vchamarthi Date: Tue, 14 Apr 2026 11:38:30 -0500 Subject: [PATCH 2/5] copilot fixes --- benchmarks/benchmarks/__init__.py | 4 ++-- benchmarks/benchmarks/_utils.py | 16 ++++++++++++++++ benchmarks/benchmarks/bench_fft1d.py | 14 ++------------ benchmarks/benchmarks/bench_fftnd.py | 16 ++-------------- benchmarks/benchmarks/bench_memory.py | 10 ++-------- benchmarks/benchmarks/bench_numpy_fft.py | 17 +++-------------- benchmarks/benchmarks/bench_scipy_fft.py | 17 +++-------------- 7 files changed, 30 insertions(+), 64 deletions(-) create mode 100644 benchmarks/benchmarks/_utils.py diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 2a37b3d3..b9630085 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -26,7 +26,7 @@ def _physical_cores(): - """Return physical core count from /proc/cpuinfo; fall back to os.cpu_count().""" + """Return physical core count from /proc/cpuinfo; fall back to 1 (conservative).""" try: with open("/proc/cpuinfo") as f: content = f.read() @@ -36,7 +36,7 @@ def _physical_cores(): ) return cpu_cores * sockets except Exception: - return os.cpu_count() or 1 + return 1 def _thread_count(): diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py new file mode 100644 index 00000000..747bfddc --- /dev/null +++ b/benchmarks/benchmarks/_utils.py @@ -0,0 +1,16 @@ +"""Shared utilities for mkl_fft benchmarks.""" + +import numpy as np + + +def _make_input(rng, shape, dtype): + """Return an array of *shape* and *dtype*. + + Complex dtypes get non-zero imaginary parts for a realistic signal. + *shape* may be an int (1-D) or a tuple. + """ + dt = np.dtype(dtype) + s = (shape,) if isinstance(shape, int) else shape + if dt.kind == "c": + return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) + return rng.standard_normal(s).astype(dt) diff --git a/benchmarks/benchmarks/bench_fft1d.py b/benchmarks/benchmarks/bench_fft1d.py index 62d0c40f..51c3d2fe 100644 --- a/benchmarks/benchmarks/bench_fft1d.py +++ b/benchmarks/benchmarks/bench_fft1d.py @@ -4,19 +4,9 @@ import mkl_fft -_RNG_SEED = 42 - - -def _make_input(rng, n, dtype): - """Return a 1-D array of length *n* with the given *dtype*. +from ._utils import _make_input - Complex dtypes are populated with non-zero imaginary parts so the - benchmark exercises a genuine complex transform path. - """ - dt = np.dtype(dtype) - if dt.kind == "c": - return (rng.standard_normal(n) + 1j * rng.standard_normal(n)).astype(dt) - return rng.standard_normal(n).astype(dt) +_RNG_SEED = 42 # --------------------------------------------------------------------------- diff --git a/benchmarks/benchmarks/bench_fftnd.py b/benchmarks/benchmarks/bench_fftnd.py index b5503031..2fa2d224 100644 --- a/benchmarks/benchmarks/bench_fftnd.py +++ b/benchmarks/benchmarks/bench_fftnd.py @@ -4,21 +4,9 @@ import mkl_fft -_RNG_SEED = 42 - +from ._utils import _make_input -def _make_input(rng, shape, dtype): - """Return an array of the given *shape* and *dtype*. - - Complex dtypes are populated with non-zero imaginary parts so the - benchmark exercises a genuine complex transform path. - """ - dt = np.dtype(dtype) - if dt.kind == "c": - return ( - rng.standard_normal(shape) + 1j * rng.standard_normal(shape) - ).astype(dt) - return rng.standard_normal(shape).astype(dt) +_RNG_SEED = 42 # --------------------------------------------------------------------------- diff --git a/benchmarks/benchmarks/bench_memory.py b/benchmarks/benchmarks/bench_memory.py index 1efe3ccd..6d7b1b3d 100644 --- a/benchmarks/benchmarks/bench_memory.py +++ b/benchmarks/benchmarks/bench_memory.py @@ -8,15 +8,9 @@ import mkl_fft -_RNG_SEED = 42 - +from ._utils import _make_input -def _make_input(rng, shape, dtype): - dt = np.dtype(dtype) - s = (shape,) if isinstance(shape, int) else shape - if dt.kind == "c": - return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) - return rng.standard_normal(s).astype(dt) +_RNG_SEED = 42 # --------------------------------------------------------------------------- diff --git a/benchmarks/benchmarks/bench_numpy_fft.py b/benchmarks/benchmarks/bench_numpy_fft.py index f1373d8d..49e32a44 100644 --- a/benchmarks/benchmarks/bench_numpy_fft.py +++ b/benchmarks/benchmarks/bench_numpy_fft.py @@ -14,20 +14,9 @@ from mkl_fft.interfaces import numpy_fft -_RNG_SEED = 42 - - -def _make_input(rng, shape, dtype): - """Return an array of *shape* and *dtype*. +from ._utils import _make_input - Complex dtypes get non-zero imaginary parts for a realistic signal. - *shape* may be an int (1-D) or a tuple. - """ - dt = np.dtype(dtype) - s = (shape,) if isinstance(shape, int) else shape - if dt.kind == "c": - return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) - return rng.standard_normal(s).astype(dt) +_RNG_SEED = 42 # --------------------------------------------------------------------------- @@ -182,7 +171,7 @@ def time_irfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeCCND: +class TimeC2CND: """numpy_fft.fftn / ifftn — N-D.""" params = [ diff --git a/benchmarks/benchmarks/bench_scipy_fft.py b/benchmarks/benchmarks/bench_scipy_fft.py index ca79ea18..9024387d 100644 --- a/benchmarks/benchmarks/bench_scipy_fft.py +++ b/benchmarks/benchmarks/bench_scipy_fft.py @@ -16,20 +16,9 @@ from mkl_fft.interfaces import scipy_fft -_RNG_SEED = 42 - - -def _make_input(rng, shape, dtype): - """Return an array of *shape* and *dtype*. +from ._utils import _make_input - Complex dtypes get non-zero imaginary parts for a realistic signal. - *shape* may be an int (1-D) or a tuple. - """ - dt = np.dtype(dtype) - s = (shape,) if isinstance(shape, int) else shape - if dt.kind == "c": - return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) - return rng.standard_normal(s).astype(dt) +_RNG_SEED = 42 # --------------------------------------------------------------------------- @@ -218,7 +207,7 @@ def time_ihfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeCCND: +class TimeC2CND: """scipy_fft.fftn / ifftn — N-D.""" params = [ From a0d94630030577c66a8382cb2fbdacf927da4e38 Mon Sep 17 00:00:00 2001 From: vchamarthi Date: Mon, 18 May 2026 20:57:14 -0500 Subject: [PATCH 3/5] Fix PR comments --- benchmarks/README.md | 86 +++---------- benchmarks/benchmarks/__init__.py | 2 - benchmarks/benchmarks/_utils.py | 49 +++++++ benchmarks/benchmarks/bench_fft1d.py | 62 ++------- benchmarks/benchmarks/bench_fftnd.py | 99 ++++---------- benchmarks/benchmarks/bench_memory.py | 59 +++------ benchmarks/benchmarks/bench_numpy_fft.py | 116 ++++------------- benchmarks/benchmarks/bench_scipy_fft.py | 157 +++++------------------ 8 files changed, 175 insertions(+), 455 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 9d9c2bac..8689ee1b 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -3,83 +3,27 @@ Performance benchmarks for [mkl_fft](https://github.com/IntelPython/mkl_fft) using [Airspeed Velocity (ASV)](https://asv.readthedocs.io/en/stable/). -## Structure - -``` -benchmarks/ -├── asv.conf.json # ASV configuration (CI-only, no env/build settings) -└── benchmarks/ - ├── __init__.py # Thread pinning (MKL_NUM_THREADS) - ├── bench_fft1d.py # mkl_fft root API — 1-D transforms - ├── bench_fftnd.py # mkl_fft root API — 2-D and N-D transforms - ├── bench_numpy_fft.py # mkl_fft.interfaces.numpy_fft — full coverage - ├── bench_scipy_fft.py # mkl_fft.interfaces.scipy_fft — full coverage - └── bench_memory.py # Peak RSS memory benchmarks -``` - ### Coverage -| File | API | Transforms | -|------|-----|-----------| -| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` — power-of-two and non-power-of-two | -| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | -| `bench_numpy_fft.py` | `mkl_fft.interfaces.numpy_fft` | All exported functions including Hermitian (`hfft`, `ihfft`) | -| `bench_scipy_fft.py` | `mkl_fft.interfaces.scipy_fft` | All exported functions including Hermitian 2-D/N-D (`hfft2`, `hfftn`) | -| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | - -Benchmarks cover float32, float64, complex64, complex128 dtypes, power-of-two -and non-power-of-two sizes, square and non-square/non-cubic shapes. +| File | API | Transforms | Dtypes | Sizes/Shapes | +|------|-----|-----------|--------|-------------| +| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` | float32, float64, complex64, complex128 | power-of-two and non-power-of-two | +| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | float32, float64, complex64, complex128 | square and non-square/non-cubic | +| `bench_numpy_fft.py` | `mkl_fft.interfaces.numpy_fft` | All exported functions including Hermitian (`hfft`, `ihfft`) | float32, float64, complex64, complex128 | power-of-two | +| `bench_scipy_fft.py` | `mkl_fft.interfaces.scipy_fft` | All exported functions including Hermitian 2-D/N-D (`hfft2`, `hfftn`) | float32, float64, complex64, complex128 | square and cubic | +| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | float32, float64, complex128 | power-of-two | ## Threading -`__init__.py` pins `MKL_NUM_THREADS` to **4** when the machine has 4 or more -physical cores, or falls back to **1** (single-threaded) otherwise. This keeps -results comparable across CI machines in the shared pool regardless of their -total core count. Physical cores are read from `/proc/cpuinfo` — hyperthreads -are excluded per MKL recommendation. - -Override by setting `MKL_NUM_THREADS` in the environment before running ASV. - -## Running Locally - -> Benchmarks are designed for CI. Local runs require `mkl_fft` to be installed -> in the active Python environment. Benchmarks that exercise SciPy interface -> (`bench_scipy_fft.py`) also require SciPy: -> -> ```bash -> python -m pip install -e .. -> python -m pip install scipy -> ``` - -```bash -cd benchmarks/ - -# Quick smoke-run against the current working tree (no env management) -asv run --python=same --quick --show-stderr HEAD^! - -# Run a specific benchmark file -asv run --python=same --quick --bench bench_fft1d HEAD^! - -# View and publish results -asv publish # generates .asv/html/ -asv preview # serves at http://localhost:8080 -``` - -## CI - -Benchmarks run automatically in Jenkins on the `auto-bench` node via -`benchmarkHelper.performanceTest()` from the shared library. The pipeline uses: +Set `MKL_NUM_THREADS` in the environment before running ASV to control the +thread count used by MKL: ```bash -asv run --environment existing: --set-commit-hash $COMMIT_SHA +MKL_NUM_THREADS=8 asv run --python=same --quick HEAD^! ``` -This bypasses ASV environment management entirely — mkl_fft is pre-installed -into a conda environment by the pipeline before ASV is invoked. - -- **Nightly (prod):** results are published to the benchmark dashboard -- **PR (dev):** `asv compare` output is evaluated for regressions; a 30% slowdown - triggers a failed GitHub commit status - -Results are stored in the `mkl_fft-results` branch of -`intel-innersource/libraries.python.intel.infrastructure.benchmark-dashboards`. +If `MKL_NUM_THREADS` is not set, `__init__.py` applies a default: **4** threads +when the machine has 4 or more physical cores, or **1** (single-threaded) +otherwise. This keeps results comparable across CI machines in the shared pool +regardless of their total core count. Physical cores are read from +`/proc/cpuinfo` — hyperthreads are excluded per MKL recommendation. diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index b9630085..8c294d8c 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -46,5 +46,3 @@ def _thread_count(): _THREADS = os.environ.get("MKL_NUM_THREADS", _thread_count()) os.environ["MKL_NUM_THREADS"] = _THREADS -os.environ.setdefault("OMP_NUM_THREADS", _THREADS) -os.environ.setdefault("OPENBLAS_NUM_THREADS", _THREADS) diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index 747bfddc..f21ff827 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -2,6 +2,8 @@ import numpy as np +_RNG_SEED = 42 + def _make_input(rng, shape, dtype): """Return an array of *shape* and *dtype*. @@ -14,3 +16,50 @@ def _make_input(rng, shape, dtype): if dt.kind == "c": return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) return rng.standard_normal(s).astype(dt) + + +class BenchC2C: + """Base setup for complex-to-complex benchmarks. + + Subclasses define params, param_names, and time_* / peakmem_* methods. + """ + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + +# dtype axes +_DTYPES_ALL = ["float32", "float64", "complex64", "complex128"] +_DTYPES_REAL = ["float32", "float64"] +_DTYPES_REDUCED = ["float64", "complex128"] + +# shape/size axes shared across multiple files +_SHAPES_2D = [(64, 64), (128, 128), (256, 256), (512, 512)] +_SHAPES_2D_IFACE = [(64, 64), (256, 256), (512, 512)] +_SHAPES_3D = [(16, 16, 16), (32, 32, 32), (64, 64, 64)] + + +class BenchR2C: + """Base setup for real-to-complex / complex-to-real and Hermitian benchmarks. + + Prepares: + self.x_real — real array of full shape (rfft / ihfft input) + self.x_complex — complex half-spectrum array (irfft / hfft input) + + Works for 1-D (shape as int) and multi-D (shape as tuple). + Subclasses define params, param_names, and time_* / peakmem_* methods. + """ + + def setup(self, shape, dtype): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + if isinstance(shape, int): + half_shape = shape // 2 + 1 + else: + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) diff --git a/benchmarks/benchmarks/bench_fft1d.py b/benchmarks/benchmarks/bench_fft1d.py index 51c3d2fe..851b905d 100644 --- a/benchmarks/benchmarks/bench_fft1d.py +++ b/benchmarks/benchmarks/bench_fft1d.py @@ -1,12 +1,11 @@ """Benchmarks for 1-D FFT operations using the mkl_fft root API.""" -import numpy as np - import mkl_fft -from ._utils import _make_input +from ._utils import _DTYPES_ALL, _DTYPES_REAL, BenchC2C, BenchR2C -_RNG_SEED = 42 +_SIZES_POW2 = [64, 256, 1024, 4096, 16384, 65536] +_SIZES_NONPOW2 = [127, 509, 1000, 4001, 10007] # --------------------------------------------------------------------------- @@ -14,19 +13,12 @@ # --------------------------------------------------------------------------- -class TimeFFT1D: +class BenchFFT1D(BenchC2C): """Forward and inverse complex FFT — power-of-two sizes.""" - params = [ - [64, 256, 1024, 4096, 16384, 65536], - ["float32", "float64", "complex64", "complex128"], - ] + params = [_SIZES_POW2, _DTYPES_ALL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, n, dtype) - def time_fft(self, n, dtype): mkl_fft.fft(self.x) @@ -39,25 +31,12 @@ def time_ifft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeRFFT1D: +class BenchRFFT1D(BenchR2C): """Forward rfft and inverse irfft — power-of-two sizes.""" - params = [ - [64, 256, 1024, 4096, 16384, 65536], - ["float32", "float64"], - ] + params = [_SIZES_POW2, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_real = rng.standard_normal(n).astype(dtype) - # irfft input: complex half-spectrum of length n//2+1 - self.x_complex = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - def time_rfft(self, n, dtype): mkl_fft.rfft(self.x_real) @@ -70,23 +49,16 @@ def time_irfft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeFFT1DNonPow2: +class BenchFFT1DNonPow2(BenchC2C): """Forward and inverse complex FFT — non-power-of-two sizes. MKL uses a different code path for non-power-of-two transforms; this suite catches regressions in that path. """ - params = [ - [127, 509, 1000, 4001, 10007], - ["float64", "complex128", "complex64"], - ] + params = [_SIZES_NONPOW2, ["float64", "complex128", "complex64"]] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, n, dtype) - def time_fft(self, n, dtype): mkl_fft.fft(self.x) @@ -99,24 +71,12 @@ def time_ifft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeRFFT1DNonPow2: +class BenchRFFT1DNonPow2(BenchR2C): """Forward rfft and inverse irfft — non-power-of-two sizes.""" - params = [ - [127, 509, 1000, 4001, 10007], - ["float32", "float64"], - ] + params = [_SIZES_NONPOW2, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_real = rng.standard_normal(n).astype(dtype) - self.x_complex = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - def time_rfft(self, n, dtype): mkl_fft.rfft(self.x_real) diff --git a/benchmarks/benchmarks/bench_fftnd.py b/benchmarks/benchmarks/bench_fftnd.py index 2fa2d224..2a69477e 100644 --- a/benchmarks/benchmarks/bench_fftnd.py +++ b/benchmarks/benchmarks/bench_fftnd.py @@ -1,39 +1,31 @@ """Benchmarks for 2-D and N-D FFT operations using the mkl_fft root API.""" -import numpy as np - import mkl_fft -from ._utils import _make_input - -_RNG_SEED = 42 - +from ._utils import ( + _DTYPES_ALL, + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) # --------------------------------------------------------------------------- # 2-D complex-to-complex (power-of-two, square + non-square) # --------------------------------------------------------------------------- -class TimeFFT2D: +class BenchFFT2D(BenchC2C): """Forward and inverse 2-D FFT — square and non-square shapes.""" params = [ - [ - (64, 64), - (128, 128), - (256, 256), - (512, 512), - (256, 128), - (512, 256), # non-square - ], - ["float32", "float64", "complex64", "complex128"], + _SHAPES_2D + [(256, 128), (512, 256)], + _DTYPES_ALL, ] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, shape, dtype) - def time_fft2(self, shape, dtype): mkl_fft.fft2(self.x) @@ -46,26 +38,12 @@ def time_ifft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRFFT2D: +class BenchRFFT2D(BenchR2C): """Forward rfft2 and inverse irfft2.""" - params = [ - [(64, 64), (128, 128), (256, 256), (512, 512)], - ["float32", "float64"], - ] + params = [_SHAPES_2D, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = (shape[0], shape[1] // 2 + 1) - self.x_real = rng.standard_normal(shape).astype(dtype) - # irfft2 input: complex half-spectrum — shape (M, N//2+1) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfft2(self, shape, dtype): mkl_fft.rfft2(self.x_real) @@ -78,7 +56,7 @@ def time_irfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeFFT2DNonPow2: +class BenchFFT2DNonPow2(BenchC2C): """Forward and inverse 2-D FFT — non-power-of-two sizes.""" params = [ @@ -89,14 +67,10 @@ class TimeFFT2DNonPow2: (500, 500), (100, 200), # non-square non-pow2 ], - ["float64", "complex128"], + _DTYPES_REDUCED, ] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, shape, dtype) - def time_fft2(self, shape, dtype): mkl_fft.fft2(self.x) @@ -109,24 +83,15 @@ def time_ifft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeFFTnD: +class BenchFFTnD(BenchC2C): """Forward and inverse N-D FFT.""" params = [ - [ - (16, 16, 16), - (32, 32, 32), - (64, 64, 64), - (32, 64, 128), # non-cubic - ], - ["float32", "float64", "complex64", "complex128"], + _SHAPES_3D + [(32, 64, 128)], + _DTYPES_ALL, ] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, shape, dtype) - def time_fftn(self, shape, dtype): mkl_fft.fftn(self.x) @@ -139,26 +104,12 @@ def time_ifftn(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRFFTnD: +class BenchRFFTnD(BenchR2C): """Forward rfftn and inverse irfftn.""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float32", "float64"], - ] + params = [_SHAPES_3D, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - # irfftn input: complex half-spectrum — last axis is shape[-1]//2+1 - half_shape = shape[:-1] + (shape[-1] // 2 + 1,) - self.x_real = rng.standard_normal(shape).astype(dtype) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfftn(self, shape, dtype): mkl_fft.rfftn(self.x_real) @@ -171,7 +122,7 @@ def time_irfftn(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeFFTnDNonPow2: +class BenchFFTnDNonPow2(BenchC2C): """Forward and inverse N-D FFT — non-power-of-two sizes.""" params = [ @@ -181,14 +132,10 @@ class TimeFFTnDNonPow2: (50, 50, 50), (30, 40, 50), # non-cubic non-pow2 ], - ["float64", "complex128"], + _DTYPES_REDUCED, ] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - self.x = _make_input(rng, shape, dtype) - def time_fftn(self, shape, dtype): mkl_fft.fftn(self.x) diff --git a/benchmarks/benchmarks/bench_memory.py b/benchmarks/benchmarks/bench_memory.py index 6d7b1b3d..46d7176c 100644 --- a/benchmarks/benchmarks/bench_memory.py +++ b/benchmarks/benchmarks/bench_memory.py @@ -4,13 +4,18 @@ in the mkl_fft root API across 1-D, 2-D, and 3-D transforms. """ -import numpy as np - import mkl_fft -from ._utils import _make_input +from ._utils import ( + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) -_RNG_SEED = 42 +_SIZES_1D = [1024, 16384, 65536, 262144] # --------------------------------------------------------------------------- @@ -18,18 +23,12 @@ # --------------------------------------------------------------------------- -class PeakMemFFT1D: +class PeakMemFFT1D(BenchC2C): """Peak RSS for 1-D complex FFT.""" - params = [ - [1024, 16384, 65536, 262144], - ["float64", "complex128"], - ] + params = [_SIZES_1D, _DTYPES_REDUCED] param_names = ["n", "dtype"] - def setup(self, n, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) - def peakmem_fft(self, n, dtype): mkl_fft.fft(self.x) @@ -42,24 +41,12 @@ def peakmem_ifft(self, n, dtype): # --------------------------------------------------------------------------- -class PeakMemRFFT1D: +class PeakMemRFFT1D(BenchR2C): """Peak RSS for 1-D real FFT (forward and inverse).""" - params = [ - [1024, 16384, 65536, 262144], - ["float32", "float64"], - ] + params = [_SIZES_1D, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_real = rng.standard_normal(n).astype(dtype) - self.x_complex = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - def peakmem_rfft(self, n, dtype): mkl_fft.rfft(self.x_real) @@ -72,18 +59,12 @@ def peakmem_irfft(self, n, dtype): # --------------------------------------------------------------------------- -class PeakMemFFT2D: +class PeakMemFFT2D(BenchC2C): """Peak RSS for 2-D complex FFT.""" - params = [ - [(64, 64), (128, 128), (256, 256), (512, 512)], - ["float64", "complex128"], - ] + params = [_SHAPES_2D, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def peakmem_fft2(self, shape, dtype): mkl_fft.fft2(self.x) @@ -96,18 +77,12 @@ def peakmem_ifft2(self, shape, dtype): # --------------------------------------------------------------------------- -class PeakMemFFTnD: +class PeakMemFFTnD(BenchC2C): """Peak RSS for N-D complex FFT (3-D shapes).""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float64", "complex128"], - ] + params = [_SHAPES_3D, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def peakmem_fftn(self, shape, dtype): mkl_fft.fftn(self.x) diff --git a/benchmarks/benchmarks/bench_numpy_fft.py b/benchmarks/benchmarks/bench_numpy_fft.py index 49e32a44..5bb7f72b 100644 --- a/benchmarks/benchmarks/bench_numpy_fft.py +++ b/benchmarks/benchmarks/bench_numpy_fft.py @@ -10,13 +10,19 @@ rfftn / irfftn — N-D R2C / C2R """ -import numpy as np - from mkl_fft.interfaces import numpy_fft -from ._utils import _make_input +from ._utils import ( + _DTYPES_ALL, + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D_IFACE, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) -_RNG_SEED = 42 +_SIZES_1D = [256, 1024, 16384] # --------------------------------------------------------------------------- @@ -24,18 +30,12 @@ # --------------------------------------------------------------------------- -class TimeC2C1D: +class BenchC2C1D(BenchC2C): """numpy_fft.fft / ifft — 1-D.""" - params = [ - [256, 1024, 16384], - ["float32", "float64", "complex64", "complex128"], - ] + params = [_SIZES_1D, _DTYPES_ALL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) - def time_fft(self, n, dtype): numpy_fft.fft(self.x) @@ -48,24 +48,12 @@ def time_ifft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeRC1D: +class BenchRC1D(BenchR2C): """numpy_fft.rfft / irfft — 1-D.""" - params = [ - [256, 1024, 16384], - ["float32", "float64"], - ] + params = [_SIZES_1D, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_real = rng.standard_normal(n).astype(dtype) - self.x_complex = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - def time_rfft(self, n, dtype): numpy_fft.rfft(self.x_real) @@ -80,35 +68,21 @@ def time_irfft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeHermitian1D: +class BenchHermitian1D(BenchR2C): """numpy_fft.hfft / ihfft — 1-D Hermitian. *dtype* is the **output** dtype of hfft (real); the inverse ihfft takes the same real input and produces the corresponding complex output. """ - params = [ - [256, 1024, 16384], - ["float32", "float64"], - ] + params = [_SIZES_1D, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - # hfft input: complex half-spectrum of length n//2+1 - self.x_hfft = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - # ihfft input: real signal of length n - self.x_ihfft = rng.standard_normal(n).astype(dtype) - def time_hfft(self, n, dtype): - numpy_fft.hfft(self.x_hfft, n=n) + numpy_fft.hfft(self.x_complex, n=n) def time_ihfft(self, n, dtype): - numpy_fft.ihfft(self.x_ihfft) + numpy_fft.ihfft(self.x_real) # --------------------------------------------------------------------------- @@ -116,18 +90,12 @@ def time_ihfft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeC2C2D: +class BenchC2C2D(BenchC2C): """numpy_fft.fft2 / ifft2 — 2-D.""" - params = [ - [(64, 64), (256, 256), (512, 512)], - ["float64", "complex128"], - ] + params = [_SHAPES_2D_IFACE, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def time_fft2(self, shape, dtype): numpy_fft.fft2(self.x) @@ -140,25 +108,12 @@ def time_ifft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRC2D: +class BenchRC2D(BenchR2C): """numpy_fft.rfft2 / irfft2 — 2-D.""" - params = [ - [(64, 64), (256, 256), (512, 512)], - ["float32", "float64"], - ] + params = [_SHAPES_2D_IFACE, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = (shape[0], shape[1] // 2 + 1) - self.x_real = rng.standard_normal(shape).astype(dtype) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfft2(self, shape, dtype): numpy_fft.rfft2(self.x_real) @@ -171,18 +126,12 @@ def time_irfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeC2CND: +class BenchC2CND(BenchC2C): """numpy_fft.fftn / ifftn — N-D.""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float64", "complex128"], - ] + params = [_SHAPES_3D, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def time_fftn(self, shape, dtype): numpy_fft.fftn(self.x) @@ -195,25 +144,12 @@ def time_ifftn(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRCND: +class BenchRCND(BenchR2C): """numpy_fft.rfftn / irfftn — N-D.""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float32", "float64"], - ] + params = [_SHAPES_3D, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = shape[:-1] + (shape[-1] // 2 + 1,) - self.x_real = rng.standard_normal(shape).astype(dtype) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfftn(self, shape, dtype): numpy_fft.rfftn(self.x_real) diff --git a/benchmarks/benchmarks/bench_scipy_fft.py b/benchmarks/benchmarks/bench_scipy_fft.py index 9024387d..e397cde5 100644 --- a/benchmarks/benchmarks/bench_scipy_fft.py +++ b/benchmarks/benchmarks/bench_scipy_fft.py @@ -12,13 +12,19 @@ hfftn / ihfftn — N-D Hermitian (scipy_fft only) """ -import numpy as np - from mkl_fft.interfaces import scipy_fft -from ._utils import _make_input +from ._utils import ( + _DTYPES_ALL, + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D_IFACE, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) -_RNG_SEED = 42 +_SIZES_1D = [256, 1024, 16384] # --------------------------------------------------------------------------- @@ -26,18 +32,12 @@ # --------------------------------------------------------------------------- -class TimeC2C1D: +class BenchC2C1D(BenchC2C): """scipy_fft.fft / ifft — 1-D.""" - params = [ - [256, 1024, 16384], - ["float32", "float64", "complex64", "complex128"], - ] + params = [_SIZES_1D, _DTYPES_ALL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), n, dtype) - def time_fft(self, n, dtype): scipy_fft.fft(self.x) @@ -50,24 +50,12 @@ def time_ifft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeRC1D: +class BenchRC1D(BenchR2C): """scipy_fft.rfft / irfft — 1-D.""" - params = [ - [256, 1024, 16384], - ["float32", "float64"], - ] + params = [_SIZES_1D, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_real = rng.standard_normal(n).astype(dtype) - self.x_complex = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - def time_rfft(self, n, dtype): scipy_fft.rfft(self.x_real) @@ -82,33 +70,21 @@ def time_irfft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeHermitian1D: +class BenchHermitian1D(BenchR2C): """scipy_fft.hfft / ihfft — 1-D Hermitian. *dtype* is the **output** dtype of hfft (real); the corresponding complex input dtype is derived automatically. """ - params = [ - [256, 1024, 16384], - ["float32", "float64"], - ] + params = [_SIZES_1D, _DTYPES_REAL] param_names = ["n", "dtype"] - def setup(self, n, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - self.x_hfft = ( - rng.standard_normal(n // 2 + 1) - + 1j * rng.standard_normal(n // 2 + 1) - ).astype(cdtype) - self.x_ihfft = rng.standard_normal(n).astype(dtype) - def time_hfft(self, n, dtype): - scipy_fft.hfft(self.x_hfft, n=n) + scipy_fft.hfft(self.x_complex, n=n) def time_ihfft(self, n, dtype): - scipy_fft.ihfft(self.x_ihfft) + scipy_fft.ihfft(self.x_real) # --------------------------------------------------------------------------- @@ -116,18 +92,12 @@ def time_ihfft(self, n, dtype): # --------------------------------------------------------------------------- -class TimeC2C2D: +class BenchC2C2D(BenchC2C): """scipy_fft.fft2 / ifft2 — 2-D.""" - params = [ - [(64, 64), (256, 256), (512, 512)], - ["float64", "complex128"], - ] + params = [_SHAPES_2D_IFACE, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def time_fft2(self, shape, dtype): scipy_fft.fft2(self.x) @@ -140,25 +110,12 @@ def time_ifft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRC2D: +class BenchRC2D(BenchR2C): """scipy_fft.rfft2 / irfft2 — 2-D.""" - params = [ - [(64, 64), (256, 256), (512, 512)], - ["float32", "float64"], - ] + params = [_SHAPES_2D_IFACE, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = (shape[0], shape[1] // 2 + 1) - self.x_real = rng.standard_normal(shape).astype(dtype) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfft2(self, shape, dtype): scipy_fft.rfft2(self.x_real) @@ -173,33 +130,20 @@ def time_irfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeHermitian2D: +class BenchHermitian2D(BenchR2C): """scipy_fft.hfft2 / ihfft2 — 2-D Hermitian. *dtype* is the **output** dtype of hfft2 (real). """ - params = [ - [(64, 64), (256, 256), (512, 512)], - ["float32", "float64"], - ] + params = [_SHAPES_2D_IFACE, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = (shape[0], shape[1] // 2 + 1) - self.x_hfft2 = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - self.x_ihfft2 = rng.standard_normal(shape).astype(dtype) - def time_hfft2(self, shape, dtype): - scipy_fft.hfft2(self.x_hfft2, s=shape) + scipy_fft.hfft2(self.x_complex, s=shape) def time_ihfft2(self, shape, dtype): - scipy_fft.ihfft2(self.x_ihfft2) + scipy_fft.ihfft2(self.x_real) # --------------------------------------------------------------------------- @@ -207,18 +151,12 @@ def time_ihfft2(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeC2CND: +class BenchC2CND(BenchC2C): """scipy_fft.fftn / ifftn — N-D.""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float64", "complex128"], - ] + params = [_SHAPES_3D, _DTYPES_REDUCED] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - self.x = _make_input(np.random.default_rng(_RNG_SEED), shape, dtype) - def time_fftn(self, shape, dtype): scipy_fft.fftn(self.x) @@ -231,25 +169,12 @@ def time_ifftn(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeRCND: +class BenchRCND(BenchR2C): """scipy_fft.rfftn / irfftn — N-D.""" - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float32", "float64"], - ] + params = [_SHAPES_3D, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - half_shape = shape[:-1] + (shape[-1] // 2 + 1,) - self.x_real = rng.standard_normal(shape).astype(dtype) - self.x_complex = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - def time_rfftn(self, shape, dtype): scipy_fft.rfftn(self.x_real) @@ -264,31 +189,17 @@ def time_irfftn(self, shape, dtype): # --------------------------------------------------------------------------- -class TimeHermitianND: +class BenchHermitianND(BenchR2C): """scipy_fft.hfftn / ihfftn — N-D Hermitian. *dtype* is the **output** dtype of hfftn (real). """ - params = [ - [(16, 16, 16), (32, 32, 32), (64, 64, 64)], - ["float32", "float64"], - ] + params = [_SHAPES_3D, _DTYPES_REAL] param_names = ["shape", "dtype"] - def setup(self, shape, dtype): - rng = np.random.default_rng(_RNG_SEED) - cdtype = "complex64" if dtype == "float32" else "complex128" - # hfftn input: last axis has length shape[-1]//2+1 - half_shape = shape[:-1] + (shape[-1] // 2 + 1,) - self.x_hfftn = ( - rng.standard_normal(half_shape) - + 1j * rng.standard_normal(half_shape) - ).astype(cdtype) - self.x_ihfftn = rng.standard_normal(shape).astype(dtype) - def time_hfftn(self, shape, dtype): - scipy_fft.hfftn(self.x_hfftn, s=shape) + scipy_fft.hfftn(self.x_complex, s=shape) def time_ihfftn(self, shape, dtype): - scipy_fft.ihfftn(self.x_ihfftn) + scipy_fft.ihfftn(self.x_real) From 50e44f17ebf225002e7fbc1314a70975306aeeea Mon Sep 17 00:00:00 2001 From: vchamarthi Date: Wed, 20 May 2026 13:01:27 -0500 Subject: [PATCH 4/5] fix PR comments --- benchmarks/README.md | 32 +++++++++++++++++++++++++++++-- benchmarks/benchmarks/__init__.py | 16 ++++------------ 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 8689ee1b..c4d74126 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -25,5 +25,33 @@ MKL_NUM_THREADS=8 asv run --python=same --quick HEAD^! If `MKL_NUM_THREADS` is not set, `__init__.py` applies a default: **4** threads when the machine has 4 or more physical cores, or **1** (single-threaded) otherwise. This keeps results comparable across CI machines in the shared pool -regardless of their total core count. Physical cores are read from -`/proc/cpuinfo` — hyperthreads are excluded per MKL recommendation. +regardless of their total core count. Physical cores are detected via +`psutil.cpu_count(logical=False)` — hyperthreads are excluded per MKL +recommendation. + +## Running Benchmarks + +Prerequisites: + +```bash +pip install asv psutil +``` + +Run benchmarks against the current environment: + +```bash +asv run --python=same --quick HEAD^! +``` + +Compare two commits: + +```bash +asv continuous --python=same HEAD~1 HEAD +``` + +View results in a browser: + +```bash +asv publish +asv preview +``` diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 8c294d8c..43b2505d 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -20,23 +20,15 @@ """ import os -import re + +import psutil _MIN_THREADS = 4 # minimum physical cores required for multi-threaded mode def _physical_cores(): - """Return physical core count from /proc/cpuinfo; fall back to 1 (conservative).""" - try: - with open("/proc/cpuinfo") as f: - content = f.read() - cpu_cores = int(re.search(r"cpu cores\s*:\s*(\d+)", content).group(1)) - sockets = max( - len(set(re.findall(r"physical id\s*:\s*(\d+)", content))), 1 - ) - return cpu_cores * sockets - except Exception: - return 1 + """Return physical core count; fall back to 1 (conservative).""" + return psutil.cpu_count(logical=False) or 1 def _thread_count(): From a3065805878b17924be6b125ae4fbcd0c4ff0c91 Mon Sep 17 00:00:00 2001 From: vchamarthi Date: Wed, 20 May 2026 13:43:41 -0500 Subject: [PATCH 5/5] PR suggestions --- benchmarks/benchmarks/__init__.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 43b2505d..9b89c10b 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -1,23 +1,4 @@ -"""ASV benchmarks for mkl_fft. - -Thread control — design rationale ----------------------------------- -Since we do not have a dedicated CI benchmark machine, benchmarks run on a shared CI pool -whose machines vary in core count over time. -Using the full physical core count of each machine would make results -incomparable across runs on different machines. - -Strategy: - - Physical cores >= 4 → fix MKL_NUM_THREADS = 4 - 4 is the lowest common denominator that guarantees multi-threaded MKL - behavior and is achievable on any modern CI machine. Results from - different machines in the pool are therefore directly comparable. - - Physical cores < 4 → fall back to MKL_NUM_THREADS = 1 (single-threaded) - Prevents over-subscription on under-resourced machines and avoids - misleading comparisons against 4-thread baselines. - -MKL recommendation: use physical cores, not logical (hyperthreaded) CPUs. -""" +"""ASV benchmarks for mkl_fft""" import os