Add rlpd tricks

This commit is contained in:
Adil Zouitine
2025-01-15 15:49:24 +01:00
committed by Michel Aractingi
parent 2fd78879f6
commit 46827fb002
2 changed files with 170 additions and 7 deletions

View File

@@ -266,7 +266,8 @@ class SACPolicy(
# critics subsample size
min_q, _ = q_targets.min(dim=0) # Get values from min operation
min_q = min_q - (temperature * next_log_probs)
if self.config.use_backup_entropy:
min_q = min_q - (temperature * next_log_probs)
td_target = rewards + (1 - done) * self.config.discount * min_q