Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ grant the users the right to the use of patent under the requirement of Apache 2

============================================================================

This product includes source code derived from the DataDog/toto project:

Toto – Timeseries-Optimized Transformer for Observability
Copyright 2025 Datadog, Inc.
Licensed under the Apache License, Version 2.0
https://github.com/DataDog/toto

============================================================================

Apache Commons Collections
Copyright 2001-2019 The Apache Software Foundation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ public class AINodeTestUtils {
new AbstractMap.SimpleEntry<>(
"chronos2", new FakeModelInfo("chronos2", "t5", "builtin", "active")),
new AbstractMap.SimpleEntry<>(
"moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")))
"moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")),
new AbstractMap.SimpleEntry<>(
"toto", new FakeModelInfo("toto", "toto", "builtin", "active")))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

public static final Map<String, FakeModelInfo> BUILTIN_MODEL_MAP;
Expand Down
1 change: 1 addition & 0 deletions iotdb-core/ainode/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ poetry.lock
# generated by pyinstaller
/dist/
/build/

8 changes: 5 additions & 3 deletions iotdb-core/ainode/build_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,26 +423,28 @@ def verify_poetry_env():
[str(poetry_exe), "lock"],
cwd=str(script_dir),
env=venv_env,
check=True,
check=False,
capture_output=True,
text=True,
)
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr)
if result.returncode != 0:
print(f"ERROR: poetry lock failed with exit code {result.returncode}")
sys.exit(1)
verify_poetry_env() # Verify after lock

accelerator = detect_accelerator()
print(f"Selected accelerator: {accelerator}")

print("Running poetry install...")
subprocess.run(
[str(poetry_exe), "lock"],
[str(poetry_exe), "install", "--no-root"],
cwd=str(script_dir),
env=venv_env,
check=True,
capture_output=True,
text=True,
)
verify_poetry_env() # Verify before install
Expand Down
13 changes: 13 additions & 0 deletions iotdb-core/ainode/iotdb/ainode/core/model/model_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,17 @@ def __repr__(self):
},
transformers_registered=True,
),
"toto": ModelInfo(
model_id="toto",
category=ModelCategory.BUILTIN,
state=ModelStates.INACTIVE,
model_type="toto",
pipeline_cls="pipeline_toto.TotoPipeline",
repo_id="Datadog/Toto-Open-Base-1.0",
auto_map={
"AutoConfig": "configuration_toto.TotoConfig",
"AutoModelForCausalLM": "modeling_toto.TotoForPrediction",
},
transformers_registered=True,
),
}
17 changes: 17 additions & 0 deletions iotdb-core/ainode/iotdb/ainode/core/model/toto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

from typing import List, Optional

from transformers import PretrainedConfig


class TotoConfig(PretrainedConfig):
"""
Configuration class for the Toto time series forecasting model.

Toto (Time Series Optimized Transformer for Observability) is a foundation model
for multivariate time series forecasting developed by Datadog. It uses a decoder-only
architecture with per-variate patch-based causal scaling, proportional time-variate
factorized attention, and a Student-T mixture prediction head.

Reference: https://github.com/DataDog/toto
"""

model_type = "toto"

def __init__(
self,
patch_size: int = 32,
stride: int = 32,
embed_dim: int = 1024,
num_layers: int = 18,
num_heads: int = 16,
mlp_hidden_dim: int = 2816,
dropout: float = 0.0,
spacewise_every_n_layers: int = 3,
scaler_cls: str = "per_variate_causal",
output_distribution_classes: Optional[List[str]] = None,
output_distribution_kwargs: Optional[dict] = None,
spacewise_first: bool = True,
use_memory_efficient_attention: bool = True,
stabilize_with_global: bool = True,
scale_factor_exponent: float = 10.0,
**kwargs,
):
self.patch_size = patch_size
self.stride = stride
self.embed_dim = embed_dim
self.num_layers = num_layers
self.num_heads = num_heads
self.mlp_hidden_dim = mlp_hidden_dim
self.dropout = dropout
self.spacewise_every_n_layers = spacewise_every_n_layers
self.scaler_cls = scaler_cls
self.output_distribution_classes = output_distribution_classes or [
"student_t_mixture"
]
# k_components=5 is the default used by Datadog/Toto-Open-Base-1.0
self.output_distribution_kwargs = output_distribution_kwargs or {
"k_components": 5
}
self.spacewise_first = spacewise_first
self.use_memory_efficient_attention = use_memory_efficient_attention
self.stabilize_with_global = stabilize_with_global
self.scale_factor_exponent = scale_factor_exponent

super().__init__(**kwargs)
20 changes: 20 additions & 0 deletions iotdb-core/ainode/iotdb/ainode/core/model/toto/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This file includes code derived from DataDog/toto
# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
# Copyright 2025 Datadog, Inc.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This file includes code derived from DataDog/toto
# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
# Copyright 2025 Datadog, Inc.
127 changes: 127 additions & 0 deletions iotdb-core/ainode/iotdb/ainode/core/model/toto/data/util/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This file includes code derived from DataDog/toto
# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
# Copyright 2025 Datadog, Inc.

from functools import reduce
from typing import NamedTuple

import numpy as np
import torch
import torch.utils.data
from einops import repeat
from jaxtyping import Bool, Float, Int, Shaped


def pad_array(
values: Shaped[torch.Tensor, "*batch variates series_len"], # noqa: F722
patch_stride: int,
) -> Shaped[torch.Tensor, "*batch variates padded_length"]: # noqa: F722
"""
Makes sure that the series length is divisible by the patch_stride
by adding left-padding.
"""
if isinstance(values, np.ndarray):
values = torch.from_numpy(values)
series_len = values.shape[-1]
padded_length = int(np.ceil(series_len / patch_stride) * patch_stride)
if values.ndim == 2:
padded_values = torch.zeros((values.shape[0], padded_length), dtype=values.dtype, device=values.device)
elif values.ndim == 3:
padded_values = torch.zeros(
(values.shape[0], values.shape[1], padded_length),
dtype=values.dtype,
device=values.device,
)
else:
raise ValueError(f"Unsupported number of dimensions: {values.ndim}")
padded_values[..., -series_len:] = values

return padded_values


def pad_id_mask(
id_mask: Int[torch.Tensor, "*batch variates series_len"], # noqa: F722
patch_stride: int,
) -> Int[torch.Tensor, "*batch variates padded_length"]: # noqa: F722
"""
Makes sure that the series length is divisible by the patch_stride
by adding left-padding to the id mask.
"""
series_len = id_mask.shape[-1]
padded_length = int(np.ceil(series_len / patch_stride) * patch_stride)
padding_amount = padded_length - series_len
left_edge: Int[torch.Tensor, "*batch variates"] = id_mask[..., 0] # noqa: F722
if id_mask.ndim == 2:
padding = repeat(
left_edge,
"variates -> variates padding_amount",
padding_amount=padding_amount,
)
id_mask = torch.cat([padding, id_mask], dim=1)
elif id_mask.ndim == 3:
padding = repeat(
left_edge,
"batch variates -> batch variates padding_amount",
padding_amount=padding_amount,
)
id_mask = torch.cat([padding, id_mask], dim=2)
else:
raise ValueError(f"Unsupported number of dimensions: {id_mask.ndim}")

return id_mask


class MaskedTimeseries(NamedTuple):
series: Float[torch.Tensor, "*batch variates series_len"] # noqa: F722
padding_mask: Bool[torch.Tensor, "*batch variates series_len"] # noqa: F722
id_mask: Int[torch.Tensor, "*batch variates #series_len"] # noqa: F722
timestamp_seconds: Int[torch.Tensor, "*batch variates series_len"] # noqa: F722
time_interval_seconds: Int[torch.Tensor, "*batch variates"] # noqa: F722
num_exogenous_variables: int = 0

def to(self, device: torch.device) -> "MaskedTimeseries":
return MaskedTimeseries(
series=self.series.to(device),
padding_mask=self.padding_mask.to(device),
id_mask=self.id_mask.to(device),
timestamp_seconds=self.timestamp_seconds.to(device),
time_interval_seconds=self.time_interval_seconds.to(device),
num_exogenous_variables=self.num_exogenous_variables,
)


def is_extreme_value(t: torch.Tensor) -> torch.Tensor:
if torch.is_floating_point(t):
max_value = torch.finfo(t.dtype).max
else:
max_value = torch.iinfo(t.dtype).max

return reduce(
torch.logical_or,
(
torch.isinf(t),
torch.isnan(t),
t.abs() >= max_value / 2,
),
)


def replace_extreme_values(t: torch.Tensor, replacement: float = 0.0) -> torch.Tensor:
return torch.where(is_extreme_value(t), torch.tensor(replacement, dtype=t.dtype, device=t.device), t)
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This file includes code derived from DataDog/toto
# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
# Copyright 2025 Datadog, Inc.
Loading