Enhance SAC configuration and replay buffer with asynchronous prefetching support

- Added async_prefetch parameter to SACConfig for improved buffer management.
- Implemented get_iterator method in ReplayBuffer to support asynchronous prefetching of batches.
- Updated learner_server to utilize the new iterator for online and offline sampling, enhancing training efficiency.
This commit is contained in:
AdilZouitine
2025-04-03 14:23:50 +00:00
committed by Michel Aractingi
parent 70130b9841
commit 7c2c67fc3c
3 changed files with 132 additions and 482 deletions

View File

@@ -42,8 +42,6 @@ class CriticNetworkConfig:
final_activation: str | None = None
@dataclass
class ActorNetworkConfig:
hidden_dims: list[int] = field(default_factory=lambda: [256, 256])
@@ -94,6 +92,7 @@ class SACConfig(PreTrainedConfig):
online_env_seed: Seed for the online environment.
online_buffer_capacity: Capacity of the online replay buffer.
offline_buffer_capacity: Capacity of the offline replay buffer.
async_prefetch: Whether to use asynchronous prefetching for the buffers.
online_step_before_learning: Number of steps before learning starts.
policy_update_freq: Frequency of policy updates.
discount: Discount factor for the SAC algorithm.
@@ -154,6 +153,7 @@ class SACConfig(PreTrainedConfig):
online_env_seed: int = 10000
online_buffer_capacity: int = 100000
offline_buffer_capacity: int = 100000
async_prefetch: bool = False
online_step_before_learning: int = 100
policy_update_freq: int = 1