CoCalc -- run_diffuser

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/examples/rl/run_diffuser_locomotion.py
¹⁴⁴⁸ views
1
import d4rl  # noqa
2
import gym
3
import tqdm
4
from diffusers.experimental import ValueGuidedRLPipeline
5

6

7
config = dict(
8
    n_samples=64,
9
    horizon=32,
10
    num_inference_steps=20,
11
    n_guide_steps=2,  # can set to 0 for faster sampling, does not use value network
12
    scale_grad_by_std=True,
13
    scale=0.1,
14
    eta=0.0,
15
    t_grad_cutoff=2,
16
    device="cpu",
17
)
18

19

20
if __name__ == "__main__":
21
    env_name = "hopper-medium-v2"
22
    env = gym.make(env_name)
23

24
    pipeline = ValueGuidedRLPipeline.from_pretrained(
25
        "bglick13/hopper-medium-v2-value-function-hor32",
26
        env=env,
27
    )
28

29
    env.seed(0)
30
    obs = env.reset()
31
    total_reward = 0
32
    total_score = 0
33
    T = 1000
34
    rollout = [obs.copy()]
35
    try:
36
        for t in tqdm.tqdm(range(T)):
37
            # call the policy
38
            denorm_actions = pipeline(obs, planning_horizon=32)
39

40
            # execute action in environment
41
            next_observation, reward, terminal, _ = env.step(denorm_actions)
42
            score = env.get_normalized_score(total_reward)
43

44
            # update return
45
            total_reward += reward
46
            total_score += score
47
            print(
48
                f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:"
49
                f" {total_score}"
50
            )
51

52
            # save observations for rendering
53
            rollout.append(next_observation.copy())
54

55
            obs = next_observation
56
    except KeyboardInterrupt:
57
        pass
58

59
    print(f"Total reward: {total_reward}")
60

61
Product

Resources

Company