FFT benchmarks
Perform 2D FFT benchmarks using the CUDA and OpenCL backends of pyvkfft, and compare with scikit-cuda (cuFFT) and gpyfft (clfft) if they are present
Note 1: this is now more easily done using the ``pyvkfft-benchmark`` command-line script
Note 2: in this example, we are using a fixed batch size for 1D and 3D transforms (e.g. 16 arrays transformed in //). This is different from the command-line benchmark, which keeps a fixed total array size (100’s of MB). This is why the throughput remains low for small sizes, and then becomes larger than the card’s nominal bandwidth because of caching effects, before reaching more normal throughput values.
Note 3: due to a bug, we cannot delete cuFFT plans (or this corrupts the cuda context), so the memory usage will continue to grow during execution. Either do not test cufft (skcuda) or limit the size to avoid this. The command-line script avoids this by using a separate process for each individual test (and is consequently pretty slow).
[1]:
%matplotlib notebook
gpu_name = None
import os
import platform
import gc
from itertools import permutations
try:
import pycuda.driver as cu_drv
import pycuda.gpuarray as cua
from pycuda import curandom
import pyvkfft.cuda
from pyvkfft.cuda import VkFFTApp as cuVkFFTApp
from pyvkfft.base import primes
has_pyvkfft_cuda = True
except ImportError:
has_pyvkfft_cuda = False
try:
import pyopencl as cl
import pyopencl.array as cla
from pyopencl import clrandom
import pyvkfft.opencl
from pyvkfft.opencl import VkFFTApp as clVkFFTApp
from pyvkfft.base import primes
has_pyvkfft_opencl = True
except ImportError:
has_pyvkfft_opencl = False
try:
import pycuda.autoinit
import pycuda.driver as cu_drv
import pycuda.gpuarray as cua
from pycuda import curandom
import skcuda.fft as cu_fft
has_skcuda = True
except:
has_skcuda = False
try:
import pyopencl as cl
import pyopencl.array as cla
from pyopencl import clrandom
import gpyfft
has_gpyfft = True
except:
has_gpyfft = False
import matplotlib.pyplot as plt
import numpy as np
import timeit
/home/esrf/favre/miniconda3/envs/pynx-py311-cu11.7/lib/python3.11/site-packages/skcuda/cublas.py:284: UserWarning: creating CUBLAS context to get version number
warnings.warn('creating CUBLAS context to get version number')
[2]:
gpu_name_real = None
if has_pyvkfft_opencl or has_gpyfft:
# Create some context on the first available GPU
if 'PYOPENCL_CTX' in os.environ:
cl_ctx = cl.create_some_context()
else:
cl_ctx = None
# Find the first OpenCL GPU available and use it, unless
for p in cl.get_platforms():
for d in p.get_devices():
if d.type & cl.device_type.GPU == 0:
continue
gpu_name_real = d.name
print("Selected OpenCL device: ", d.name)
cl_ctx = cl.Context(devices=(d,))
break
if cl_ctx is not None:
break
cq = cl.CommandQueue(cl_ctx)
if has_pyvkfft_cuda or has_skcuda:
if gpu_name is None:
d = cu_drv.Device(0)
gpu_name_real = d.name()
print("Selected CUDA device: ", d.name())
cu_ctx = d.make_context()
else:
for i in range(cu_drv.Device.count()):
d = cu_drv.Device(i)
if gpu_name.lower() in d.name().lower():
gpu_name_real = d.name()
print("Selected CUDA device: ", d.name())
cu_ctx = d.make_context()
break
Selected OpenCL device: NVIDIA A40
Selected CUDA device: NVIDIA A40
[3]:
ndim = 2 # Dimensions for the FFT (1, 2 or 3)
nmax = 3072 # Maximum FFT size (e.g. 512 for 3D, 4096 for 2D,...) - nmax is included
dtype = np.complex64 # Data type
radix_max = 7 # Largest allowed prime factor: use 2 for quick tests or 7 (13 is also possible)
cl_platform = None # If None, the first OpenCL platform with a GPU is selected. Otherwise match part of the platform name
nb_repeat = 3 # Perform nb_repeat tests, keep best time
# number of parallel arrays for 2D (nz, n, n) and 1D (nz, nz, n) transforms
nz = 16
plt.figure(figsize=(9.5, 8))
results = {"n": []}
if "vkFFT.opencl" not in results and has_pyvkfft_opencl:
results["vkFFT.opencl"] = []
results["vkFFT.opencl-dt"] = []
if "gpyfft[clFFT]" not in results and has_gpyfft:
results["gpyfft[clFFT]"] = []
results["gpyfft[clFFT]-dt"] = []
if "vkFFT.cuda" not in results and has_pyvkfft_cuda:
results["vkFFT.cuda"] = []
results["vkFFT.cuda-dt"] = []
if "skcuda[cuFFT]" not in results and has_skcuda:
results["skcuda[cuFFT]"] = []
results["skcuda[cuFFT]-dt"] = []
plans_skcuda = []
if ndim == 1:
header_results = "%4d x%4d x%4s [%dD]" % (nz, nz, "N", ndim)
elif ndim ==2:
header_results = "%4d x%4s x%4s [%dD]" % (nz, "N", "N", ndim)
else:
header_results = "%4s x%4s x%4s [%dD]" % ("N", "N", "N", ndim)
for b in results.keys():
if b != "n" and "-dt" not in b:
header_results += "%17s " % b
print("Gbytes/s and time given for a couple (FFT, iFFT), dtype=%s" % np.dtype(np.complex64).name)
print()
print(header_results)
# Only test up to prime factors equal to 7 (cuFFT)
for n in range(16, nmax+1):
if max(primes(n)) > radix_max:
continue
results["n"].append(n)
# Estimate number of repeats to last 0.1s with at least 100 GB/s
nb = int(round(0.1 * 100 / (nz**(3-ndim) * n ** ndim * np.dtype(dtype).itemsize * ndim * 2 * 2 / 1024 ** 3)))
nb = max(nb, 1)
nb = min(nb, 1000)
# print("%4d (nb=%4d)"%(n, nb))
if ndim == 1:
sh = nz, nz, n
elif ndim == 2:
sh = nz, n, n
else:
sh = n, n, n
# OpenCL backends
if has_pyvkfft_opencl or has_gpyfft:
d = clrandom.rand(cq, shape=sh, dtype=np.float32).astype(dtype)
if has_pyvkfft_opencl:
dt = 0
try:
app= clVkFFTApp(d.shape, d.dtype, queue=cq, ndim=ndim)
for i in range(nb_repeat):
cq.finish()
t0 = timeit.default_timer()
for i in range(nb):
d = app.ifft(d)
d = app.fft(d)
cq.finish()
dt1 = timeit.default_timer() - t0
if dt == 0:
dt = dt1
elif dt1< dt:
dt = dt1
#print("%4d %4dx%4d 2D FFT+iFFT dt=%6.2f ms %7.2f Gbytes/s [pyvkfft.opencl] [nb=%4d]" %
# (nz, n, n, dt / nb * 1000, gbps, nb))
del app
gbps = d.nbytes * nb * ndim * 2 * 2 / dt / 1024 ** 3
except:
gbps = 0
results["vkFFT.opencl"].append(gbps)
results["vkFFT.opencl-dt"].append(dt)
gc.collect()
if has_gpyfft:
dt = 0
for axes in permutations([-1, -2, -3][:ndim]):
gpyfft_plan = gpyfft.FFT(cl_ctx, cq, d, None, axes=axes)
# Shuffle axes order to find fastest transform
for i in range(nb_repeat):
cq.finish()
t0 = timeit.default_timer()
for i in range(nb):
gpyfft_plan.enqueue(forward=True)
gpyfft_plan.enqueue(forward=False)
cq.finish()
dt1 = timeit.default_timer() - t0
if dt == 0:
dt = dt1
elif dt1< dt:
dt = dt1
del gpyfft_plan
gbps = d.nbytes * nb * ndim * 2 * 2 / dt / 1024 ** 3
#print("%4d %4dx%4d 2D FFT+iFFT dt=%6.2f ms %7.2f Gbytes/s [gpyfft[clFFT]] [nb=%4d]" %
# (nz, n, n, dt / nb * 1000, gbps, nb))
results["gpyfft[clFFT]"].append(gbps)
results["gpyfft[clFFT]-dt"].append(dt)
if has_pyvkfft_opencl or has_gpyfft:
d.data.release()
del d
gc.collect()
# CUDA backends
if has_pyvkfft_cuda or has_pyvkfft_cuda:
d = curandom.rand(shape=sh, dtype=np.float32).astype(dtype)
if has_pyvkfft_cuda:
try:
app= cuVkFFTApp(d.shape, d.dtype, ndim=ndim)
dt = 0
for i in range(nb_repeat):
cu_ctx.synchronize()
t0 = timeit.default_timer()
for i in range(nb):
d = app.ifft(d)
d = app.fft(d)
cu_ctx.synchronize()
dt1 = timeit.default_timer() - t0
if dt == 0:
dt = dt1
elif dt1< dt:
dt = dt1
#print("%4d %4dx%4d 2D FFT+iFFT dt=%6.2f ms %7.2f Gbytes/s [pyvkfft.cuda] [nb=%4d]" %
# (nz, n, n, dt / nb * 1000, gbps, nb))
del app
gbps = d.nbytes * nb * ndim * 2 * 2 / dt / 1024 ** 3
except:
gbps = 0
results["vkFFT.cuda"].append(gbps)
results["vkFFT.cuda-dt"].append(dt)
gc.collect()
if has_skcuda:
if ndim == 1:
plan = cu_fft.Plan(n, dtype, dtype, batch=nz*nz)
elif ndim == 2:
plan = cu_fft.Plan((n,n), dtype, dtype, batch=nz)
else:
plan = cu_fft.Plan((n,n,n), dtype, dtype, batch=1)
dt = 0
for i in range(nb_repeat):
cu_ctx.synchronize()
t0 = timeit.default_timer()
for i in range(nb):
cu_fft.fft(d, d, plan)
cu_fft.ifft(d, d, plan)
cu_ctx.synchronize()
dt1 = timeit.default_timer() - t0
if dt == 0:
dt = dt1
elif dt1< dt:
dt = dt1
gbps = d.nbytes * nb * ndim * 2 * 2 / dt / 1024 ** 3
#print("%4d %4dx%4d 2D FFT+iFFT dt=%6.2f ms %7.2f Gbytes/s [skcuda[cuFFT]] [nb=%4d]" %
# (nz, n, n, dt / nb * 1000, gbps, nb))
# del plan
plans_skcuda.append(plan) # We can't destroy skcuda plans (bug in cufft)
results["skcuda[cuFFT]"].append(gbps)
results["skcuda[cuFFT]-dt"].append(dt)
if has_pyvkfft_cuda or has_pyvkfft_cuda:
d.gpudata.free()
del d
gc.collect()
# text output
r = "%4d x%4d x %4d " % sh
for b in results.keys():
if b != "n" and "-dt" not in b:
dt = results[b+'-dt'][-1] / nb
if dt < 1e-3 :
r += "%7.2f [%6.2f µs]" % (results[b][-1], dt * 1e6)
elif dt > 1:
r += "%7.2f [%6.2f s]" % (results[b][-1], dt)
else:
r += "%7.2f [%6.2f ms]" % (results[b][-1], dt * 1000)
print(r + " [nb=%4d]"%nb)
plt.clf()
x = results['n']
if "gpyfft[clFFT]" in results:
y = results["gpyfft[clFFT]"]
plt.plot(x, y, color='#00A000', marker='v', markersize=3, linestyle='', label="gpyfft[clFFT]")
if "skcuda[cuFFT]" in results:
y = results["skcuda[cuFFT]"]
plt.plot(x, y, color='#A00000', marker='^', markersize=3, linestyle='', label="skcuda[cuFFT]")
if "vkFFT.opencl" in results:
y = results["vkFFT.opencl"]
plt.plot(x, y, color='#00FF00', marker='o', markersize=3, linestyle='', label="vkFFT.opencl")
if "vkFFT.cuda" in results:
y = results["vkFFT.cuda"]
plt.plot(x, y, color='#FF0000', marker='o', markersize=3, linestyle='', label="vkFFT.cuda")
plt.legend(loc='lower right', fontsize=10)
plt.xlabel("FFT size", fontsize=12)
plt.ylabel("idealised throughput [Gbytes/s]", fontsize=12)
plt.suptitle("%dD FFT speed [%s, %s, %s]" % (ndim, gpu_name_real, platform.platform(),
platform.node()), fontsize=12)
plt.title("'Ideal' throughput assumes one r+w operation per FFT axis", fontsize=10)
plt.grid(which='both', alpha=0.3)
plt.xlim(0)
plt.ylim(0)
plt.tight_layout()
# Force refresh
plt.draw()
plt.gcf().canvas.draw()
plt.pause(.001)
plt.savefig('benchmark-%dDFFT-%s-%s-%s.png'%(ndim, gpu_name_real.replace(' ','_'),
platform.platform(), platform.node()))
Gbytes/s and time given for a couple (FFT, iFFT), dtype=complex64
16 x N x N [2D] vkFFT.opencl gpyfft[clFFT] vkFFT.cuda skcuda[cuFFT]
16 x 16 x 16 17.37 [ 14.05 µs] 7.43 [ 32.87 µs] 26.93 [ 9.07 µs] 23.83 [ 10.24 µs] [nb=1000]
16 x 18 x 18 22.08 [ 13.99 µs] 9.27 [ 33.34 µs] 35.83 [ 8.62 µs] 27.95 [ 11.06 µs] [nb=1000]
16 x 20 x 20 27.68 [ 13.78 µs] 11.35 [ 33.62 µs] 44.56 [ 8.56 µs] 35.40 [ 10.77 µs] [nb=1000]
16 x 21 x 21 30.73 [ 13.69 µs] 12.28 [ 34.25 µs] 44.27 [ 9.50 µs] 37.12 [ 11.33 µs] [nb=1000]
16 x 24 x 24 38.59 [ 14.23 µs] 16.41 [ 33.47 µs] 60.57 [ 9.07 µs] 52.16 [ 10.53 µs] [nb=1000]
16 x 25 x 25 41.75 [ 14.28 µs] 17.21 [ 34.63 µs] 65.74 [ 9.07 µs] 58.28 [ 10.23 µs] [nb=1000]
16 x 27 x 27 49.26 [ 14.11 µs] 19.91 [ 34.92 µs] 75.13 [ 9.25 µs] 67.06 [ 10.37 µs] [nb=1000]
16 x 28 x 28 52.65 [ 14.20 µs] 21.57 [ 34.66 µs] 78.34 [ 9.54 µs] 66.11 [ 11.31 µs] [nb=1000]
16 x 30 x 30 61.19 [ 14.03 µs] 25.50 [ 33.66 µs] 87.13 [ 9.85 µs] 73.64 [ 11.66 µs] [nb=1000]
16 x 32 x 32 70.43 [ 13.87 µs] 28.57 [ 34.18 µs] 105.78 [ 9.23 µs] 86.82 [ 11.25 µs] [nb=1000]
16 x 35 x 35 83.83 [ 13.94 µs] 34.04 [ 34.32 µs] 108.79 [ 10.74 µs] 94.36 [ 12.38 µs] [nb=1000]
16 x 36 x 36 88.17 [ 14.02 µs] 35.76 [ 34.56 µs] 124.87 [ 9.90 µs] 114.77 [ 10.77 µs] [nb=1000]
16 x 40 x 40 109.30 [ 13.96 µs] 44.30 [ 34.45 µs] 135.87 [ 11.23 µs] 124.65 [ 12.24 µs] [nb=1000]
16 x 42 x 42 120.19 [ 14.00 µs] 49.04 [ 34.31 µs] 156.20 [ 10.77 µs] 137.46 [ 12.24 µs] [nb=1000]
16 x 45 x 45 139.18 [ 13.88 µs] 56.66 [ 34.08 µs] 148.36 [ 13.02 µs] 153.47 [ 12.58 µs] [nb=1000]
16 x 48 x 48 156.22 [ 14.07 µs] 63.33 [ 34.69 µs] 198.77 [ 11.05 µs] 179.30 [ 12.25 µs] [nb=1000]
16 x 49 x 49 161.70 [ 14.16 µs] 67.16 [ 34.09 µs] 200.96 [ 11.39 µs] 178.01 [ 12.86 µs] [nb=1000]
16 x 50 x 50 171.68 [ 13.89 µs] 69.07 [ 34.52 µs] 194.61 [ 12.25 µs] 184.67 [ 12.91 µs] [nb=1000]
16 x 54 x 54 197.31 [ 14.09 µs] 80.98 [ 34.34 µs] 245.09 [ 11.35 µs] 206.34 [ 13.48 µs] [nb=1000]
16 x 56 x 56 212.83 [ 14.05 µs] 86.92 [ 34.41 µs] 270.91 [ 11.04 µs] 238.15 [ 12.56 µs] [nb=1000]
16 x 60 x 60 245.59 [ 13.98 µs] 99.69 [ 34.44 µs] 266.54 [ 12.88 µs] 266.33 [ 12.89 µs] [nb=1000]
16 x 63 x 63 270.83 [ 13.98 µs] 109.18 [ 34.67 µs] 320.94 [ 11.79 µs] 269.14 [ 14.06 µs] [nb=1000]
16 x 64 x 64 277.46 [ 14.08 µs] 112.67 [ 34.67 µs] 384.08 [ 10.17 µs] 331.09 [ 11.80 µs] [nb=1000]
16 x 70 x 70 333.32 [ 14.02 µs] 121.70 [ 38.40 µs] 371.02 [ 12.60 µs] 330.69 [ 14.13 µs] [nb=1000]
16 x 72 x 72 347.05 [ 14.25 µs] 142.32 [ 34.74 µs] 366.59 [ 13.49 µs] 350.17 [ 14.12 µs] [nb=1000]
16 x 75 x 75 342.13 [ 15.68 µs] 156.06 [ 34.37 µs] 333.35 [ 16.09 µs] 181.28 [ 29.59 µs] [nb=1000]
16 x 80 x 80 440.13 [ 13.87 µs] 177.02 [ 34.48 µs] 461.22 [ 13.23 µs] 409.82 [ 14.89 µs] [nb=1000]
16 x 81 x 81 422.45 [ 14.81 µs] 181.59 [ 34.46 µs] 449.75 [ 13.91 µs] 414.34 [ 15.10 µs] [nb=1000]
16 x 84 x 84 431.07 [ 15.61 µs] 193.87 [ 34.71 µs] 446.61 [ 15.07 µs] 447.18 [ 15.05 µs] [nb=1000]
16 x 90 x 90 497.34 [ 15.53 µs] 221.91 [ 34.81 µs] 489.66 [ 15.78 µs] 413.43 [ 18.68 µs] [nb=1000]
16 x 96 x 96 566.17 [ 15.52 µs] 250.01 [ 35.15 µs] 560.65 [ 15.68 µs] 486.11 [ 18.08 µs] [nb=1000]
16 x 98 x 98 519.54 [ 17.63 µs] 258.84 [ 35.39 µs] 546.88 [ 16.75 µs] 249.31 [ 36.74 µs] [nb=1000]
16 x 100 x 100 607.65 [ 15.69 µs] 280.02 [ 34.06 µs] 639.92 [ 14.90 µs] 546.26 [ 17.46 µs] [nb=1000]
16 x 105 x 105 540.10 [ 19.47 µs] 167.52 [ 62.77 µs] 538.18 [ 19.54 µs] 514.01 [ 20.46 µs] [nb= 951]
16 x 108 x 108 616.07 [ 18.06 µs] 325.69 [ 34.15 µs] 648.63 [ 17.15 µs] 578.83 [ 19.22 µs] [nb= 899]
16 x 112 x 112 677.84 [ 17.65 µs] 340.48 [ 35.14 µs] 709.05 [ 16.87 µs] 657.14 [ 18.20 µs] [nb= 836]
16 x 120 x 120 717.43 [ 19.14 µs] 391.34 [ 35.09 µs] 782.99 [ 17.54 µs] 681.05 [ 20.16 µs] [nb= 728]
16 x 125 x 125 788.68 [ 18.89 µs] 369.43 [ 40.34 µs] 799.42 [ 18.64 µs] 722.87 [ 20.61 µs] [nb= 671]
16 x 126 x 126 722.78 [ 20.95 µs] 345.32 [ 43.85 µs] 759.39 [ 19.94 µs] 645.95 [ 23.44 µs] [nb= 660]
16 x 128 x 128 859.25 [ 18.18 µs] 416.98 [ 37.47 µs] 856.53 [ 18.24 µs] 836.59 [ 18.68 µs] [nb= 640]
16 x 135 x 135 712.28 [ 24.40 µs] 411.73 [ 42.21 µs] 733.55 [ 23.69 µs] 656.74 [ 26.47 µs] [nb= 575]
16 x 140 x 140 857.26 [ 21.80 µs] 358.17 [ 52.19 µs] 902.31 [ 20.72 µs] 766.14 [ 24.40 µs] [nb= 535]
16 x 144 x 144 976.17 [ 20.26 µs] 440.53 [ 44.89 µs]1018.93 [ 19.41 µs] 931.28 [ 21.23 µs] [nb= 506]
16 x 147 x 147 765.43 [ 26.92 µs] 434.85 [ 47.39 µs] 813.13 [ 25.34 µs] 374.65 [ 55.01 µs] [nb= 485]
16 x 150 x 150 944.70 [ 22.71 µs] 430.89 [ 49.80 µs] 985.92 [ 21.76 µs] 736.20 [ 29.15 µs] [nb= 466]
16 x 160 x 160 1049.86 [ 23.25 µs] 492.96 [ 49.53 µs]1105.16 [ 22.09 µs] 926.83 [ 26.34 µs] [nb= 410]
16 x 162 x 162 934.20 [ 26.79 µs] 355.70 [ 70.36 µs] 974.38 [ 25.69 µs] 765.53 [ 32.69 µs] [nb= 400]
16 x 168 x 168 1090.51 [ 24.68 µs] 396.20 [ 67.94 µs]1149.26 [ 23.42 µs] 803.34 [ 33.51 µs] [nb= 372]
16 x 175 x 175 943.61 [ 30.95 µs] 440.78 [ 66.26 µs] 966.92 [ 30.21 µs] 786.20 [ 37.15 µs] [nb= 342]
16 x 180 x 180 1017.56 [ 30.37 µs] 455.85 [ 67.78 µs]1114.08 [ 27.73 µs] 906.71 [ 34.08 µs] [nb= 324]
16 x 189 x 189 938.56 [ 36.30 µs] 377.63 [ 90.21 µs] 978.13 [ 34.83 µs] 828.66 [ 41.11 µs] [nb= 294]
16 x 192 x 192 1202.06 [ 29.25 µs] 385.26 [ 91.25 µs]1221.35 [ 28.78 µs]1018.30 [ 34.52 µs] [nb= 284]
16 x 196 x 196 1118.50 [ 32.75 µs] 383.94 [ 95.42 µs]1223.92 [ 29.93 µs] 999.29 [ 36.66 µs] [nb= 273]
16 x 200 x 200 991.16 [ 38.49 µs] 387.03 [ 98.56 µs]1039.61 [ 36.69 µs]1000.03 [ 38.15 µs] [nb= 262]
16 x 210 x 210 839.00 [ 50.13 µs] 266.28 [157.94 µs] 845.03 [ 49.77 µs] 947.95 [ 44.37 µs] [nb= 238]
16 x 216 x 216 945.92 [ 47.04 µs] 436.13 [102.02 µs] 950.91 [ 46.79 µs] 857.16 [ 51.91 µs] [nb= 225]
16 x 224 x 224 747.69 [ 64.00 µs] 481.54 [ 99.37 µs] 772.77 [ 61.92 µs] 747.68 [ 64.00 µs] [nb= 209]
16 x 225 x 225 796.05 [ 60.65 µs] 362.64 [133.14 µs] 819.09 [ 58.94 µs] 744.66 [ 64.83 µs] [nb= 207]
16 x 240 x 240 653.36 [ 84.08 µs] 503.35 [109.13 µs] 644.44 [ 85.24 µs] 515.99 [106.46 µs] [nb= 182]
16 x 243 x 243 532.06 [105.84 µs] 329.38 [170.97 µs] 529.90 [106.27 µs] 553.53 [101.74 µs] [nb= 178]
16 x 245 x 245 481.78 [118.82 µs] 340.85 [167.94 µs] 480.13 [119.23 µs] 228.43 [250.60 µs] [nb= 175]
16 x 250 x 250 516.05 [115.50 µs] 322.28 [184.95 µs] 519.26 [114.79 µs] 464.75 [128.25 µs] [nb= 168]
16 x 252 x 252 455.43 [132.98 µs] 363.80 [166.47 µs] 454.62 [133.22 µs] 489.95 [123.61 µs] [nb= 165]
16 x 256 x 256 455.62 [137.18 µs] 360.28 [173.48 µs] 539.26 [115.90 µs] 504.39 [123.91 µs] [nb= 160]
16 x 270 x 270 473.46 [146.84 µs] 348.35 [199.58 µs] 471.86 [147.34 µs] 397.18 [175.04 µs] [nb= 144]
16 x 280 x 280 446.95 [167.29 µs] 289.03 [258.68 µs] 442.96 [168.79 µs] 460.40 [162.40 µs] [nb= 134]
16 x 288 x 288 491.52 [160.93 µs] 294.61 [268.50 µs] 498.32 [158.74 µs] 475.06 [166.51 µs] [nb= 126]
16 x 294 x 294 466.36 [176.76 µs] 327.90 [251.39 µs] 466.68 [176.63 µs] 159.38 [517.20 µs] [nb= 121]
16 x 300 x 300 470.35 [182.48 µs] 313.36 [273.90 µs] 471.32 [182.11 µs] 471.74 [181.94 µs] [nb= 117]
16 x 315 x 315 416.85 [227.01 µs] 134.54 [703.34 µs] 417.98 [226.40 µs] 442.27 [213.96 µs] [nb= 106]
16 x 320 x 320 435.06 [224.46 µs] 358.69 [272.26 µs] 429.21 [227.52 µs] 473.13 [206.41 µs] [nb= 102]
16 x 324 x 324 480.68 [208.27 µs] 321.97 [310.94 µs] 481.39 [207.97 µs] 472.57 [211.85 µs] [nb= 100]
16 x 336 x 336 464.09 [232.00 µs] 307.76 [349.84 µs] 463.35 [232.37 µs] 476.69 [225.86 µs] [nb= 93]
16 x 343 x 343 413.13 [271.58 µs] 351.23 [319.45 µs] 415.23 [270.21 µs] 469.14 [239.16 µs] [nb= 89]
16 x 350 x 350 425.37 [274.64 µs] 285.35 [409.40 µs] 423.67 [275.74 µs] 456.04 [256.17 µs] [nb= 86]
16 x 360 x 360 448.57 [275.53 µs] 288.05 [429.07 µs] 449.07 [275.22 µs] 468.74 [263.68 µs] [nb= 81]
16 x 375 x 375 402.62 [333.09 µs] 238.22 [562.97 µs] 408.95 [327.94 µs] 223.25 [600.72 µs] [nb= 75]
16 x 378 x 378 424.27 [321.18 µs] 325.78 [418.27 µs] 424.91 [320.69 µs] 441.49 [308.65 µs] [nb= 73]
16 x 384 x 384 457.72 [307.23 µs] 264.06 [532.55 µs] 457.16 [307.60 µs] 474.51 [296.36 µs] [nb= 71]
16 x 392 x 392 458.17 [319.85 µs] 269.06 [544.65 µs] 458.65 [319.52 µs] 480.94 [304.71 µs] [nb= 68]
16 x 400 x 400 456.98 [333.91 µs] 257.47 [592.64 µs] 457.56 [333.48 µs] 480.03 [317.87 µs] [nb= 66]
16 x 405 x 405 430.97 [362.96 µs] 275.48 [567.83 µs] 433.71 [360.67 µs] 484.01 [323.19 µs] [nb= 64]
16 x 420 x 420 457.77 [367.50 µs] 149.30 [ 1.13 ms] 459.37 [366.21 µs] 471.92 [356.48 µs] [nb= 59]
16 x 432 x 432 461.36 [385.77 µs] 262.55 [677.87 µs] 461.80 [385.40 µs] 485.23 [366.79 µs] [nb= 56]
16 x 441 x 441 364.29 [509.14 µs] 247.65 [748.91 µs] 360.70 [514.20 µs] 431.11 [430.22 µs] [nb= 54]
16 x 448 x 448 466.89 [409.96 µs] 264.33 [724.11 µs] 466.52 [410.28 µs] 481.45 [397.56 µs] [nb= 52]
16 x 450 x 450 464.98 [415.33 µs] 293.83 [657.25 µs] 463.17 [416.95 µs] 457.95 [421.71 µs] [nb= 52]
16 x 480 x 480 456.65 [481.17 µs] 347.30 [632.66 µs] 458.31 [479.43 µs] 479.60 [458.15 µs] [nb= 46]
16 x 486 x 486 449.32 [501.33 µs] 299.56 [751.94 µs] 450.85 [499.62 µs] 487.09 [462.45 µs] [nb= 44]
16 x 490 x 490 417.34 [548.65 µs] 298.64 [766.74 µs] 417.97 [547.83 µs] 455.13 [503.10 µs] [nb= 44]
16 x 500 x 500 464.45 [513.34 µs] 247.80 [962.16 µs] 450.03 [529.79 µs] 243.92 [977.46 µs] [nb= 42]
16 x 504 x 504 467.28 [518.42 µs] 263.23 [920.29 µs] 468.00 [517.63 µs] 480.56 [504.10 µs] [nb= 41]
16 x 512 x 512 464.95 [537.70 µs] 299.22 [835.49 µs] 462.94 [540.03 µs] 488.71 [511.56 µs] [nb= 40]
16 x 525 x 525 397.14 [661.87 µs] 136.24 [ 1.93 ms] 398.96 [658.86 µs] 161.17 [ 1.63 ms] [nb= 38]
16 x 540 x 540 468.98 [592.97 µs] 253.67 [ 1.10 ms] 472.63 [588.40 µs] 479.60 [579.84 µs] [nb= 36]
16 x 560 x 560 474.31 [630.55 µs] 376.80 [793.71 µs] 475.26 [629.28 µs] 482.70 [619.58 µs] [nb= 33]
16 x 567 x 567 389.28 [787.59 µs] 244.83 [ 1.25 ms] 396.56 [773.14 µs] 482.07 [636.00 µs] [nb= 33]
16 x 576 x 576 458.97 [689.38 µs] 229.94 [ 1.38 ms] 464.17 [681.66 µs] 488.97 [647.08 µs] [nb= 32]
16 x 588 x 588 484.85 [680.06 µs] 267.01 [ 1.23 ms] 486.10 [678.31 µs] 485.33 [679.39 µs] [nb= 30]
16 x 600 x 600 450.53 [762.04 µs] 227.06 [ 1.51 ms] 449.97 [762.99 µs] 485.59 [707.02 µs] [nb= 29]
16 x 625 x 625 424.86 [876.83 µs] 286.49 [ 1.30 ms] 423.72 [879.19 µs] 481.49 [773.70 µs] [nb= 27]
16 x 630 x 630 413.33 [915.77 µs] 123.92 [ 3.05 ms] 429.37 [881.55 µs] 446.90 [846.97 µs] [nb= 26]
16 x 640 x 640 454.62 [859.24 µs] 223.61 [ 1.75 ms] 456.78 [855.16 µs] 496.19 [787.25 µs] [nb= 26]
16 x 648 x 648 470.04 [851.95 µs] 231.02 [ 1.73 ms] 472.95 [846.71 µs] 487.32 [821.75 µs] [nb= 25]
16 x 672 x 672 477.09 [902.69 µs] 249.19 [ 1.73 ms] 479.85 [897.50 µs] 492.57 [874.32 µs] [nb= 23]
16 x 675 x 675 364.03 [ 1.19 ms] 236.77 [ 1.84 ms] 363.92 [ 1.19 ms] 419.45 [ 1.04 ms] [nb= 23]
16 x 686 x 686 407.91 [ 1.10 ms] 240.23 [ 1.87 ms] 415.34 [ 1.08 ms] 241.77 [ 1.86 ms] [nb= 22]
16 x 700 x 700 458.50 [ 1.02 ms] 233.83 [ 2.00 ms] 460.51 [ 1.01 ms] 417.62 [ 1.12 ms] [nb= 21]
16 x 720 x 720 457.21 [ 1.08 ms] 206.65 [ 2.39 ms] 459.59 [ 1.08 ms] 485.25 [ 1.02 ms] [nb= 20]
16 x 729 x 729 349.66 [ 1.45 ms] 305.01 [ 1.66 ms] 348.76 [ 1.45 ms] 378.00 [ 1.34 ms] [nb= 20]
16 x 735 x 735 381.74 [ 1.35 ms] 126.07 [ 4.09 ms] 380.20 [ 1.36 ms] 160.66 [ 3.21 ms] [nb= 19]
16 x 750 x 750 472.57 [ 1.14 ms] 205.70 [ 2.61 ms] 473.54 [ 1.13 ms] 474.12 [ 1.13 ms] [nb= 19]
16 x 756 x 756 460.02 [ 1.18 ms] 229.85 [ 2.37 ms] 466.71 [ 1.17 ms] 484.06 [ 1.13 ms] [nb= 18]
16 x 768 x 768 473.89 [ 1.19 ms] 214.99 [ 2.62 ms] 456.49 [ 1.23 ms] 496.11 [ 1.13 ms] [nb= 18]
16 x 784 x 784 472.43 [ 1.24 ms] 216.92 [ 2.70 ms] 473.86 [ 1.24 ms] 489.36 [ 1.20 ms] [nb= 17]
16 x 800 x 800 466.61 [ 1.31 ms] 232.97 [ 2.62 ms] 464.60 [ 1.31 ms] 477.35 [ 1.28 ms] [nb= 16]
16 x 810 x 810 472.88 [ 1.32 ms] 230.09 [ 2.72 ms] 472.90 [ 1.32 ms] 442.94 [ 1.41 ms] [nb= 16]
16 x 840 x 840 459.59 [ 1.46 ms] 148.47 [ 4.53 ms] 461.70 [ 1.46 ms] 483.05 [ 1.39 ms] [nb= 15]
16 x 864 x 864 470.25 [ 1.51 ms] 222.85 [ 3.19 ms] 470.23 [ 1.51 ms] 482.55 [ 1.48 ms] [nb= 14]
16 x 875 x 875 433.65 [ 1.68 ms] 196.18 [ 3.72 ms] 427.18 [ 1.71 ms] 428.86 [ 1.70 ms] [nb= 14]
16 x 882 x 882 452.61 [ 1.64 ms] 218.67 [ 3.39 ms] 451.57 [ 1.64 ms] 159.85 [ 4.64 ms] [nb= 13]
16 x 896 x 896 471.80 [ 1.62 ms] 209.23 [ 3.66 ms] 472.10 [ 1.62 ms] 490.05 [ 1.56 ms] [nb= 13]
16 x 900 x 900 461.07 [ 1.68 ms] 208.75 [ 3.70 ms] 463.44 [ 1.67 ms] 483.46 [ 1.60 ms] [nb= 13]
16 x 945 x 945 383.32 [ 2.22 ms] 129.26 [ 6.59 ms] 385.42 [ 2.21 ms] 464.24 [ 1.83 ms] [nb= 12]
16 x 960 x 960 460.95 [ 1.91 ms] 214.25 [ 4.10 ms] 461.48 [ 1.90 ms] 485.00 [ 1.81 ms] [nb= 11]
16 x 972 x 972 460.40 [ 1.96 ms] 201.12 [ 4.48 ms] 466.29 [ 1.93 ms] 464.10 [ 1.94 ms] [nb= 11]
16 x 980 x 980 480.05 [ 1.91 ms] 208.41 [ 4.39 ms] 482.06 [ 1.90 ms] 484.93 [ 1.89 ms] [nb= 11]
16 x1000 x 1000 463.97 [ 2.06 ms] 209.39 [ 4.55 ms] 465.85 [ 2.05 ms] 489.52 [ 1.95 ms] [nb= 10]
16 x1008 x 1008 473.54 [ 2.05 ms] 211.16 [ 4.59 ms] 472.48 [ 2.05 ms] 375.41 [ 2.58 ms] [nb= 10]
16 x1024 x 1024 463.64 [ 2.16 ms] 221.84 [ 4.51 ms] 481.30 [ 2.08 ms] 501.26 [ 1.99 ms] [nb= 10]
16 x1029 x 1029 419.99 [ 2.40 ms] 148.83 [ 6.78 ms] 420.84 [ 2.40 ms] 234.40 [ 4.31 ms] [nb= 10]
16 x1050 x 1050 425.48 [ 2.47 ms] 102.56 [ 10.25 ms] 427.90 [ 2.46 ms] 430.03 [ 2.45 ms] [nb= 10]
16 x1080 x 1080 462.02 [ 2.41 ms] 196.93 [ 5.65 ms] 462.80 [ 2.40 ms] 253.64 [ 4.39 ms] [nb= 9]
16 x1120 x 1120 467.83 [ 2.56 ms] 211.68 [ 5.65 ms] 467.15 [ 2.56 ms] 153.14 [ 7.81 ms] [nb= 8]
16 x1125 x 1125 394.53 [ 3.06 ms] 166.73 [ 7.24 ms] 395.78 [ 3.05 ms] 478.98 [ 2.52 ms] [nb= 8]
16 x1134 x 1134 427.38 [ 2.87 ms] 164.24 [ 7.47 ms] 430.40 [ 2.85 ms] 480.76 [ 2.55 ms] [nb= 8]
16 x1152 x 1152 466.34 [ 2.71 ms] 183.69 [ 6.89 ms] 466.98 [ 2.71 ms] 469.82 [ 2.69 ms] [nb= 8]
16 x1176 x 1176 474.82 [ 2.78 ms] 182.06 [ 7.24 ms] 478.67 [ 2.76 ms] 475.21 [ 2.78 ms] [nb= 8]
16 x1200 x 1200 458.44 [ 3.00 ms] 158.14 [ 8.68 ms] 458.62 [ 2.99 ms] 455.17 [ 3.02 ms] [nb= 7]
16 x1215 x 1215 405.72 [ 3.47 ms] 163.39 [ 8.62 ms] 403.88 [ 3.49 ms] 405.03 [ 3.48 ms] [nb= 7]
16 x1225 x 1225 481.32 [ 2.97 ms] 178.58 [ 8.01 ms] 479.50 [ 2.98 ms] 234.22 [ 6.11 ms] [nb= 7]
16 x1250 x 1250 418.50 [ 3.56 ms] 171.34 [ 8.70 ms] 419.05 [ 3.56 ms] 482.82 [ 3.09 ms] [nb= 7]
16 x1260 x 1260 448.76 [ 3.37 ms] 88.92 [ 17.03 ms] 448.34 [ 3.38 ms] 480.82 [ 3.15 ms] [nb= 7]
16 x1280 x 1280 451.26 [ 3.46 ms] 161.48 [ 9.68 ms] 453.08 [ 3.45 ms] 452.22 [ 3.46 ms] [nb= 6]
16 x1296 x 1296 480.61 [ 3.33 ms] 170.00 [ 9.42 ms] 482.20 [ 3.32 ms] 339.56 [ 4.72 ms] [nb= 6]
16 x1323 x 1323 377.04 [ 4.43 ms] 159.84 [ 10.44 ms] 374.51 [ 4.46 ms] 475.00 [ 3.51 ms] [nb= 6]
16 x1344 x 1344 484.81 [ 3.55 ms] 119.01 [ 14.47 ms] 489.38 [ 3.52 ms] 484.19 [ 3.56 ms] [nb= 6]
16 x1350 x 1350 421.71 [ 4.12 ms] 164.06 [ 10.59 ms] 422.59 [ 4.11 ms] 424.84 [ 4.09 ms] [nb= 6]
16 x1372 x 1372 445.03 [ 4.03 ms] 164.17 [ 10.93 ms] 448.89 [ 4.00 ms] 292.32 [ 6.14 ms] [nb= 6]
16 x1400 x 1400 470.34 [ 3.97 ms] 135.43 [ 13.80 ms] 470.67 [ 3.97 ms] 458.68 [ 4.08 ms] [nb= 5]
16 x1440 x 1440 439.82 [ 4.50 ms] 139.45 [ 14.18 ms] 438.70 [ 4.51 ms] 463.12 [ 4.27 ms] [nb= 5]
16 x1458 x 1458 474.52 [ 4.27 ms] 200.25 [ 10.12 ms] 475.15 [ 4.27 ms] 477.82 [ 4.24 ms] [nb= 5]
16 x1470 x 1470 401.99 [ 5.13 ms] 92.36 [ 22.31 ms] 408.39 [ 5.05 ms] 471.70 [ 4.37 ms] [nb= 5]
16 x1500 x 1500 433.46 [ 4.95 ms] 113.83 [ 18.85 ms] 434.49 [ 4.94 ms] 430.91 [ 4.98 ms] [nb= 5]
16 x1512 x 1512 469.40 [ 4.64 ms] 179.66 [ 12.14 ms] 473.55 [ 4.60 ms] 472.05 [ 4.62 ms] [nb= 5]
16 x1536 x 1536 336.38 [ 6.69 ms] 157.13 [ 14.32 ms] 420.35 [ 5.35 ms] 468.69 [ 4.80 ms] [nb= 4]
16 x1568 x 1568 336.53 [ 6.97 ms] 159.12 [ 14.74 ms] 480.03 [ 4.88 ms] 341.18 [ 6.87 ms] [nb= 4]
16 x1575 x 1575 302.73 [ 7.81 ms] 165.06 [ 14.33 ms] 463.14 [ 5.11 ms] 250.28 [ 9.45 ms] [nb= 4]
16 x1600 x 1600 336.15 [ 7.26 ms] 125.13 [ 19.51 ms] 475.61 [ 5.13 ms] 476.01 [ 5.13 ms] [nb= 4]
16 x1620 x 1620 328.32 [ 7.62 ms] 125.82 [ 19.89 ms] 469.05 [ 5.34 ms] 469.59 [ 5.33 ms] [nb= 4]
16 x1680 x 1680 335.30 [ 8.03 ms] 134.82 [ 19.97 ms] 470.27 [ 5.72 ms] 467.35 [ 5.76 ms] [nb= 4]
16 x1701 x 1701 305.72 [ 9.03 ms] 113.54 [ 24.30 ms] 457.57 [ 6.03 ms] 306.64 [ 9.00 ms] [nb= 4]
16 x1715 x 1715 305.04 [ 9.20 ms] 101.35 [ 27.68 ms] 450.96 [ 6.22 ms] 236.72 [ 11.85 ms] [nb= 4]
16 x1728 x 1728 335.60 [ 8.49 ms] 141.13 [ 20.18 ms] 473.28 [ 6.02 ms] 343.60 [ 8.29 ms] [nb= 4]
16 x1750 x 1750 319.08 [ 9.15 ms] 129.90 [ 22.48 ms] 455.63 [ 6.41 ms] 164.16 [ 17.79 ms] [nb= 3]
16 x1764 x 1764 329.40 [ 9.01 ms] 132.94 [ 22.32 ms] 438.38 [ 6.77 ms] 432.85 [ 6.86 ms] [nb= 3]
16 x1792 x 1792 337.12 [ 9.08 ms] 159.94 [ 19.15 ms] 475.56 [ 6.44 ms] 332.43 [ 9.21 ms] [nb= 3]
16 x1800 x 1800 331.75 [ 9.31 ms] 99.97 [ 30.91 ms] 462.83 [ 6.68 ms] 460.91 [ 6.70 ms] [nb= 3]
16 x1875 x 1875 301.52 [ 11.12 ms] 101.57 [ 33.01 ms] 419.99 [ 7.98 ms] 324.05 [ 10.35 ms] [nb= 3]
16 x1890 x 1890 319.61 [ 10.66 ms] 85.20 [ 39.98 ms] 458.96 [ 7.42 ms] 366.57 [ 9.29 ms] [nb= 3]
16 x1920 x 1920 337.12 [ 10.43 ms] 111.35 [ 31.57 ms] 463.68 [ 7.58 ms] 259.65 [ 13.54 ms] [nb= 3]
16 x1944 x 1944 333.33 [ 10.81 ms] 131.67 [ 27.37 ms] 467.69 [ 7.71 ms] 333.56 [ 10.80 ms] [nb= 3]
16 x1960 x 1960 332.88 [ 11.01 ms] 107.42 [ 34.11 ms] 460.95 [ 7.95 ms] 355.90 [ 10.29 ms] [nb= 3]
16 x2000 x 2000 333.62 [ 11.43 ms] 104.34 [ 36.56 ms] 455.34 [ 8.38 ms] 222.81 [ 17.12 ms] [nb= 3]
16 x2016 x 2016 335.99 [ 11.54 ms] 102.87 [ 37.68 ms] 463.87 [ 8.36 ms] 307.27 [ 12.61 ms] [nb= 3]
16 x2025 x 2025 293.51 [ 13.32 ms] 105.01 [ 37.24 ms] 421.79 [ 9.27 ms] 324.80 [ 12.04 ms] [nb= 3]
16 x2048 x 2048 253.37 [ 15.79 ms] 153.92 [ 25.99 ms] 479.09 [ 8.35 ms] 480.63 [ 8.32 ms] [nb= 2]
16 x2058 x 2058 321.07 [ 12.58 ms] 93.70 [ 43.11 ms] 429.18 [ 9.41 ms] 433.28 [ 9.32 ms] [nb= 2]
16 x2100 x 2100 327.38 [ 12.85 ms] 76.46 [ 55.00 ms] 436.49 [ 9.64 ms] 418.56 [ 10.05 ms] [nb= 2]
16 x2160 x 2160 334.20 [ 13.31 ms] 94.12 [ 47.27 ms] 449.48 [ 9.90 ms] 309.32 [ 14.38 ms] [nb= 2]
16 x2187 x 2187 301.78 [ 15.11 ms] 130.61 [ 34.92 ms] 419.17 [ 10.88 ms] 417.63 [ 10.92 ms] [nb= 2]
16 x2205 x 2205 297.07 [ 15.61 ms] 91.45 [ 50.70 ms] 410.05 [ 11.31 ms] 428.99 [ 10.81 ms] [nb= 2]
16 x2240 x 2240 332.98 [ 14.37 ms] 101.65 [ 47.07 ms] 452.66 [ 10.57 ms] 400.02 [ 11.96 ms] [nb= 2]
16 x2250 x 2250 318.07 [ 15.18 ms] 88.91 [ 54.30 ms] 410.10 [ 11.77 ms] 405.68 [ 11.90 ms] [nb= 2]
16 x2268 x 2268 331.10 [ 14.82 ms] 104.44 [ 46.97 ms] 443.30 [ 11.07 ms] 289.89 [ 16.92 ms] [nb= 2]
16 x2304 x 2304 336.49 [ 15.05 ms] 111.95 [ 45.22 ms] 456.53 [ 11.09 ms] 436.33 [ 11.60 ms] [nb= 2]
16 x2352 x 2352 335.38 [ 15.73 ms] 94.37 [ 55.90 ms] 436.33 [ 12.09 ms] 404.67 [ 13.04 ms] [nb= 2]
16 x2400 x 2400 331.52 [ 16.57 ms] 95.05 [ 57.79 ms] 434.29 [ 12.65 ms] 429.58 [ 12.79 ms] [nb= 2]
16 x2401 x 2401 296.94 [ 18.51 ms] 93.88 [ 58.56 ms] 401.97 [ 13.68 ms] 324.07 [ 16.96 ms] [nb= 2]
16 x2430 x 2430 320.60 [ 17.56 ms] 96.51 [ 58.35 ms] 414.31 [ 13.59 ms] 434.20 [ 12.97 ms] [nb= 2]
16 x2450 x 2450 312.62 [ 18.31 ms] 130.68 [ 43.80 ms] 425.27 [ 13.46 ms] 429.26 [ 13.34 ms] [nb= 2]
16 x2500 x 2500 318.23 [ 18.73 ms] 94.76 [ 62.90 ms] 431.42 [ 13.82 ms] 443.60 [ 13.44 ms] [nb= 2]
16 x2520 x 2520 331.55 [ 18.27 ms] 123.93 [ 48.87 ms] 435.57 [ 13.90 ms] 405.85 [ 14.92 ms] [nb= 2]
16 x2560 x 2560 338.19 [ 18.48 ms] 95.37 [ 65.54 ms] 430.70 [ 14.51 ms] 224.63 [ 27.82 ms] [nb= 2]
16 x2592 x 2592 334.79 [ 19.14 ms] 100.65 [ 63.66 ms] 437.05 [ 14.66 ms] 219.92 [ 29.13 ms] [nb= 2]
16 x2625 x 2625 303.64 [ 21.64 ms] 130.96 [ 50.18 ms] 388.86 [ 16.90 ms] 415.21 [ 15.83 ms] [nb= 2]
16 x2646 x 2646 322.12 [ 20.73 ms] 77.84 [ 85.77 ms] 413.50 [ 16.15 ms] 429.86 [ 15.53 ms] [nb= 1]
16 x2688 x 2688 336.35 [ 20.49 ms] 90.01 [ 76.55 ms] 437.98 [ 15.73 ms] 387.28 [ 17.79 ms] [nb= 1]
16 x2700 x 2700 324.08 [ 21.45 ms] 85.56 [ 81.26 ms] 425.73 [ 16.33 ms] 284.92 [ 24.40 ms] [nb= 1]
16 x2744 x 2744 332.12 [ 21.62 ms] 103.39 [ 69.45 ms] 436.37 [ 16.46 ms] 449.67 [ 15.97 ms] [nb= 1]
16 x2800 x 2800 335.37 [ 22.29 ms] 107.44 [ 69.59 ms] 408.93 [ 18.28 ms] 261.91 [ 28.55 ms] [nb= 1]
16 x2835 x 2835 300.28 [ 25.53 ms] 133.85 [ 57.27 ms] 381.55 [ 20.09 ms] 374.40 [ 20.47 ms] [nb= 1]
16 x2880 x 2880 337.41 [ 23.44 ms] 85.09 [ 92.97 ms] 440.19 [ 17.97 ms] 387.76 [ 20.40 ms] [nb= 1]
16 x2916 x 2916 330.85 [ 24.51 ms] 91.32 [ 88.80 ms] 415.56 [ 19.51 ms] 295.19 [ 27.47 ms] [nb= 1]
16 x2940 x 2940 328.67 [ 25.08 ms] 132.72 [ 62.11 ms] 410.95 [ 20.06 ms] 424.60 [ 19.41 ms] [nb= 1]
16 x3000 x 3000 330.08 [ 26.00 ms] 77.80 [110.33 ms] 428.07 [ 20.05 ms] 413.89 [ 20.74 ms] [nb= 1]
16 x3024 x 3024 336.70 [ 25.90 ms] 81.54 [106.95 ms] 408.02 [ 21.37 ms] 386.46 [ 22.57 ms] [nb= 1]
16 x3072 x 3072 336.18 [ 26.77 ms] 95.30 [ 94.44 ms] 387.19 [ 23.24 ms] 411.01 [ 21.90 ms] [nb= 1]
[ ]: