Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,21 @@ impl CudaBuilder {
self
}

/// Enable fast math approximations globally (equivalent to NVCC's `--use_fast_math`).
/// Sets `ftz=true`, `fast_sqrt=true`, `fast_div=true`, and `fma_contraction=true`.
/// Individual flags can still be overridden afterward.
///
/// Note: this sacrifices IEEE 754 compliance for performance. Single-precision
/// division and square root will have up to 2 ULP error, and denormal values
/// will be flushed to zero.
pub fn fast_math(mut self) -> Self {
self.ftz = true;
self.fast_sqrt = true;
self.fast_div = true;
self.fma_contraction = true;
self
}

/// Use a fast approximation for single-precision floating point square root.
pub fn fast_sqrt(mut self, fast_sqrt: bool) -> Self {
self.fast_sqrt = fast_sqrt;
Expand Down