I'm building an actor-critic reinforcement learning algorithm to solve environments. I want to use a single encoder to find representation of my environment.
When I share the encoder with the actor and the critic, my network isn't learning anything:
class Encoder(nn.Module): def __init__(self, state_dim): super(Encoder, self).__init__() self.l1 = nn.Linear(state_dim, 512) def forward(self, state): a = F.relu(self.l1(state)) return a class Actor(nn.Module): def __init__(self, state_dim, action_dim, max_action): super(Actor, self).__init__() self.l1 = nn.Linear(state_dim, 128) self.l3 = nn.Linear(128, action_dim) self.max_action = max_action def forward(self, state): a = F.relu(self.l1(state)) # a = F.relu(self.l2(a)) a = torch.tanh(self.l3(a)) * self.max_action return a class Critic(nn.Module): def __init__(self, state_dim, action_dim): super(Critic, self).__init__() self.l1 = nn.Linear(state_dim + action_dim, 128) self.l3 = nn.Linear(128, 1) def forward(self, state, action): state_action = torch.cat([state, action], 1) q = F.relu(self.l1(state_action)) # q = F.relu(self.l2(q)) q = self.l3(q) return q However, when I use different encoder for the actor and different for the critic, it learn properly.
class Actor(nn.Module): def __init__(self, state_dim, action_dim, max_action): super(Actor, self).__init__() self.l1 = nn.Linear(state_dim, 400) self.l2 = nn.Linear(400, 300) self.l3 = nn.Linear(300, action_dim) self.max_action = max_action def forward(self, state): a = F.relu(self.l1(state)) a = F.relu(self.l2(a)) a = torch.tanh(self.l3(a)) * self.max_action return a class Critic(nn.Module): def __init__(self, state_dim, action_dim): super(Critic, self).__init__() self.l1 = nn.Linear(state_dim + action_dim, 400) self.l2 = nn.Linear(400, 300) self.l3 = nn.Linear(300, 1) def forward(self, state, action): state_action = torch.cat([state, action], 1) q = F.relu(self.l1(state_action)) q = F.relu(self.l2(q)) q = self.l3(q) return q I'm pretty sure its because of the optimizer. In the shared encoder code, I define it as follows:
self.actor_optimizer = optim.Adam(list(self.actor.parameters())+ list(self.encoder.parameters())) self.critic_optimizer = optim.Adam(list(self.critic.parameters())) +list(self.encoder.parameters())) In the separate encoder, its just:
self.actor_optimizer = optim.Adam((self.actor.parameters())) self.critic_optimizer = optim.Adam((self.critic.parameters())) two optimizers must be because of the actor critic algorithm, in which the loss of the actor is the value.
How can I combine two optimizers to optimize correctly the encoder?