Lightweight version of torch.Stream context manager which avoids current_stream and device lookups.
Source code in vllm/v1/worker/gpu/async_utils.py
| @contextlib.contextmanager
def stream(to_stream: torch.cuda.Stream, from_stream: torch.cuda.Stream):
"""Lightweight version of torch.Stream context manager which
avoids current_stream and device lookups.
"""
try:
torch.cuda.set_stream(to_stream)
yield
finally:
torch.cuda.set_stream(from_stream)
|