Skip to content

Data and I/O

Data handling utilities and model artifact download helpers.

Data module

project_name.data

QM9Dataset

Bases: Dataset

QM9 dataset wrapper from torch_geometric.

Source code in src/project_name/data.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class QM9Dataset(Dataset):
    """QM9 dataset wrapper from torch_geometric."""

    def __init__(self, data_path: Path) -> None:
        """Initialize the QM9 dataset.

        Args:
            data_path: Path to the data directory where QM9 will be stored.
        """
        self.data_path = Path(data_path)
        self.dataset = self._load_dataset()

    def _load_dataset(self) -> QM9:
        """Load QM9 dataset, checking if it already exists locally.

        Downloads the dataset on first instantiation if it doesn't exist.

        Returns:
            QM9 dataset from torch_geometric.
        """
        raw_path = self.data_path / "raw"
        raw_path.mkdir(parents=True, exist_ok=True)

        print("Loading QM9 dataset (downloading if not already present)...")
        dataset = QM9(root=str(self.data_path))
        print(f"Dataset ready at {self.data_path}")
        return dataset

    def __len__(self) -> int:
        """Return the length of the dataset."""
        return len(self.dataset)

    def __getitem__(self, index: int):
        """Return a given sample from the dataset."""
        return self.dataset[index]

    def preprocess(self, output_folder: Path) -> None:
        """Preprocess the raw data and save it to the output folder."""

__getitem__

__getitem__(index: int)

Return a given sample from the dataset.

Source code in src/project_name/data.py
39
40
41
def __getitem__(self, index: int):
    """Return a given sample from the dataset."""
    return self.dataset[index]

__init__

__init__(data_path: Path) -> None

Initialize the QM9 dataset.

Parameters:

Name Type Description Default
data_path Path

Path to the data directory where QM9 will be stored.

required
Source code in src/project_name/data.py
10
11
12
13
14
15
16
17
def __init__(self, data_path: Path) -> None:
    """Initialize the QM9 dataset.

    Args:
        data_path: Path to the data directory where QM9 will be stored.
    """
    self.data_path = Path(data_path)
    self.dataset = self._load_dataset()

__len__

__len__() -> int

Return the length of the dataset.

Source code in src/project_name/data.py
35
36
37
def __len__(self) -> int:
    """Return the length of the dataset."""
    return len(self.dataset)

preprocess

preprocess(output_folder: Path) -> None

Preprocess the raw data and save it to the output folder.

Source code in src/project_name/data.py
43
44
def preprocess(self, output_folder: Path) -> None:
    """Preprocess the raw data and save it to the output folder."""

Model download

project_name.download_model