Ppo
PPOHparams
    
              Bases: HParams
anneal_lr = struct.field(pytree_node=False, default=True)
  
      class-attribute
      instance-attribute
  
    Whether to anneal the learning rate linearly to 0 at the end of training.
budget = struct.field(pytree_node=False, default=1000000)
  
      class-attribute
      instance-attribute
  
    Number of environment frames to train for.
clip_eps = 0.2
  
      class-attribute
      instance-attribute
  
    PPO clip parameter.
clip_value_loss = struct.field(pytree_node=False, default=True)
  
      class-attribute
      instance-attribute
  
    Whether to clip the value loss in the PPO loss.
ent_coef = 0.01
  
      class-attribute
      instance-attribute
  
    Entropy coefficient in the total loss.
gae_lambda = 0.95
  
      class-attribute
      instance-attribute
  
    Lambda parameter of the TD(lambda) return.
lr = 0.00025
  
      class-attribute
      instance-attribute
  
    Starting learning rate.
max_grad_norm = 0.5
  
      class-attribute
      instance-attribute
  
    Maximum gradient norm for clipping.
normalise_advantage = struct.field(pytree_node=False, default=True)
  
      class-attribute
      instance-attribute
  
    Whether to normalise the advantages in the PPO loss.
num_envs = struct.field(pytree_node=False, default=16)
  
      class-attribute
      instance-attribute
  
    Number of parallel environments to run.
num_epochs = struct.field(pytree_node=False, default=1)
  
      class-attribute
      instance-attribute
  
    Number of epochs to train for.
num_minibatches = struct.field(pytree_node=False, default=8)
  
      class-attribute
      instance-attribute
  
    Number of minibatches to split the data into for training.
num_steps = struct.field(pytree_node=False, default=128)
  
      class-attribute
      instance-attribute
  
    Number of steps to run in each environment per update.
vf_coef = 0.5
  
      class-attribute
      instance-attribute
  
    Value function coefficient in the total loss.