Source code for basicgym.envs.simulator.function

# Copyright (c) 2023, Haruka Kiyohara, Ren Kishimoto, HAKUHODO Technologies Inc., and Hanjuku-kaso Co., Ltd. All rights reserved.
# Licensed under the Apache 2.0 License.

"""Mathematical Functions used in Synthetic System ."""
from dataclasses import dataclass
from typing import Optional

import numpy as np
from sklearn.utils import check_scalar, check_random_state

from .base import BaseRewardFunction
from .base import BaseStateTransitionFunction
from ...utils import sigmoid


[docs]@dataclass class StateTransitionFunction(BaseStateTransitionFunction): """Class to define the state transition function. Bases: :class:`basicgym.BaseStateTransitionFunction` Imported as: :class:`basicgym.envs.simulator.StateTransitionFunction` Tip ------- Use :class:`BaseStateTransitionFunction` to define a custom StateTransitionFunction. Parameters ------- state_dim: int Dimension of the state. action_dim: int Dimension of the action (context). random_state: int, default=None (>= 0) Random state. """ state_dim: int action_dim: int random_state: Optional[int] = None def __post_init__(self): check_scalar( self.state_dim, name="state_dim", target_type=int, min_val=1, ) check_scalar( self.action_dim, name="action_dim", target_type=int, min_val=1, ) self.random_ = check_random_state(self.random_state) self.state_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.state_dim, self.state_dim) ) self.action_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.state_dim, self.action_dim) ) self.state_action_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.state_dim, self.action_dim) )
[docs] def step( self, state: np.ndarray, action: np.ndarray, ) -> np.ndarray: """Update the state based on the presented action. Parameters ------- state: array-like of shape (state_dim, ) Current state. action: array-like of shape (action_dim, ) Indicating the action chosen by the agent. Return ------- state: array-like of shape (state_dim, ) Next state. """ state = state / self.state_dim action = action / self.action_dim state = ( self.state_coef @ state + self.action_coef @ action + (self.state_action_coef @ action).T @ state ) state = state / np.linalg.norm(state, ord=2) return state
[docs]@dataclass class RewardFunction(BaseRewardFunction): """Class to define the reward function. Bases: :class:`basicgym.BaseRewardFunction` Imported as: :class:`basicgym.envs.simulator.RewardFunction` Tip ------- Use :class:`BaseRewardFunction` to define a custom RewardFunction. Parameters ------- state_dim: int Dimension of the state. action_dim: int Dimension of the action (context). reward_type: {"continuous", "binary"}, default="continuous" Reward type. reward_std: float, default=0.0 (>=0) Noise level of the reward. Applicable only when reward_type is "continuous". random_state: int, default=None (>= 0) Random state. """ state_dim: int action_dim: int reward_type: str = "continuous" # "binary" reward_std: float = 0.0 random_state: Optional[int] = None def __post_init__(self): check_scalar( self.state_dim, name="state_dim", target_type=int, min_val=1, ) check_scalar( self.action_dim, name="action_dim", target_type=int, min_val=1, ) check_scalar(self.reward_std, name="reward_std", target_type=float, min_val=0.0) if self.reward_type not in ["continuous", "binary"]: raise ValueError( f'reward_type must be either "continuous" or "binary", but {self.reward_type} is given' ) self.random_ = check_random_state(self.random_state) self.state_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.state_dim,) ) self.action_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.action_dim,) ) self.state_action_coef = self.random_.normal( loc=0.0, scale=1.0, size=(self.state_dim, self.action_dim) )
[docs] def mean_reward_function( self, state: np.ndarray, action: np.ndarray, ) -> float: """Linear expected immediate reward function. Parameters ------- state: array-like of shape (state_dim, ) State in the RL environment. action: array-like of shape (action_dim, ) Indicating the action chosen by the agent. Return ------- mean_reward_function: float Expected immediate reward function conditioned on the state and action. """ state = state / self.state_dim action = action / self.action_dim logit = ( self.state_coef.T @ state + self.action_coef.T @ action + state.T @ self.state_action_coef @ action ) mean_reward_function = ( logit if self.reward_type == "continuous" else sigmoid(logit) ) return mean_reward_function