Timing versus DRR size

import numpy as np
import torch

from diffdrr.drr import DRR
from diffdrr.data import load_example_ct
from diffdrr.visualization import plot_drr
# Read in the volume
volume, spacing = load_example_ct()

# Get parameters for the detector
bx, by, bz = np.array(volume.shape) * np.array(spacing) / 2
detector_kwargs = {
    "sdr"   : 0.1,
    "theta" : np.pi,
    "phi"   : 0,
    "gamma" : np.pi / 2,
    "bx"    : bx,
    "by"    : by,
    "bz"    : bz,
}
height = 100

drr = DRR(volume, spacing, height=height, delx=4.0).to("cuda" if torch.cuda.is_available() else "cpu")

del drr
8.73 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
height = 200

drr = DRR(volume, spacing, height=height, delx=4.0).to("cuda" if torch.cuda.is_available() else "cpu")

del drr
28.7 ms ± 97.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
height = 300

drr = DRR(volume, spacing, height=height, delx=4.0).to("cuda" if torch.cuda.is_available() else "cpu")

del drr
62.1 ms ± 81.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
height = 400

drr = DRR(volume, spacing, height=height, delx=4.0).to("cuda" if torch.cuda.is_available() else "cpu")

del drr
109 ms ± 78.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
height = 500

drr = DRR(volume, spacing, height=height, delx=4.0).to("cuda" if torch.cuda.is_available() else "cpu")

del drr
OutOfMemoryError: CUDA out of memory. Tried to allocate 1.22 GiB (GPU 0; 10.76 GiB total capacity; 4.39 GiB already allocated; 1.05 GiB free; 7.13 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF