pip install -e .





Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp[nb]" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp[nb]" -q





Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp[nb]" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp[nb]" -q





Copied!







%load_ext autoreload
%autoreload 2

from jax import numpy as jnp, random as jr

from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)

from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.utils import list_array_uniform

%load_ext autoreload
%autoreload 2

from jax import numpy as jnp, random as jr

from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)

from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.utils import list_array_uniform





Copied!







%load_ext autoreload
%autoreload 2

from jax import numpy as jnp, random as jr

from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)

from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.utils import list_array_uniform

%load_ext autoreload
%autoreload 2

from jax import numpy as jnp, random as jr

from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)

from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.utils import list_array_uniform





Copied!







M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

<Figure size 640x480 with 0 Axes>





Copied!







M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

<Figure size 640x480 with 0 Axes>





Copied!







A, B = tmaze_env.A, tmaze_env.B
A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

# [position], [cue], [reward]
C = [jnp.zeros(a.shape[0]) for a in A]

rewarding_modality = -1

C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
C[rewarding_modality] = C[rewarding_modality].at[2].set(-3.0)

D = list_array_uniform([b.shape[0] for b in B])

# make 9 different agents to simulate in parallel
batch_size = 9


agent = Agent(
    A, B, C, D, 
    E=None,
    pA=None,
    pB=None,
    policy_len=5,
    A_dependencies=A_dependencies, 
    B_dependencies=B_dependencies,
    use_utility=True,
    use_states_info_gain=True,
    sampling_mode='full',
    action_selection='stochastic',
    gamma=4.0,
    batch_size=batch_size,
    learn_A=False,
    learn_B=False
)

A, B = tmaze_env.A, tmaze_env.B
A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

# [position], [cue], [reward]
C = [jnp.zeros(a.shape[0]) for a in A]

rewarding_modality = -1

C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
C[rewarding_modality] = C[rewarding_modality].at[2].set(-3.0)

D = list_array_uniform([b.shape[0] for b in B])

# make 9 different agents to simulate in parallel
batch_size = 9


agent = Agent(
    A, B, C, D, 
    E=None,
    pA=None,
    pB=None,
    policy_len=5,
    A_dependencies=A_dependencies, 
    B_dependencies=B_dependencies,
    use_utility=True,
    use_states_info_gain=True,
    sampling_mode='full',
    action_selection='stochastic',
    gamma=4.0,
    batch_size=batch_size,
    learn_A=False,
    learn_B=False
)





Copied!







A, B = tmaze_env.A, tmaze_env.B
A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

# [position], [cue], [reward]
C = [jnp.zeros(a.shape[0]) for a in A]

rewarding_modality = -1

C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
C[rewarding_modality] = C[rewarding_modality].at[2].set(-3.0)

D = list_array_uniform([b.shape[0] for b in B])

# make 9 different agents to simulate in parallel
batch_size = 9


agent = Agent(
    A, B, C, D, 
    E=None,
    pA=None,
    pB=None,
    policy_len=5,
    A_dependencies=A_dependencies, 
    B_dependencies=B_dependencies,
    use_utility=True,
    use_states_info_gain=True,
    sampling_mode='full',
    action_selection='stochastic',
    gamma=4.0,
    batch_size=batch_size,
    learn_A=False,
    learn_B=False
)

A, B = tmaze_env.A, tmaze_env.B
A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

# [position], [cue], [reward]
C = [jnp.zeros(a.shape[0]) for a in A]

rewarding_modality = -1

C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
C[rewarding_modality] = C[rewarding_modality].at[2].set(-3.0)

D = list_array_uniform([b.shape[0] for b in B])

# make 9 different agents to simulate in parallel
batch_size = 9


agent = Agent(
    A, B, C, D, 
    E=None,
    pA=None,
    pB=None,
    policy_len=5,
    A_dependencies=A_dependencies, 
    B_dependencies=B_dependencies,
    use_utility=True,
    use_states_info_gain=True,
    sampling_mode='full',
    action_selection='stochastic',
    gamma=4.0,
    batch_size=batch_size,
    learn_A=False,
    learn_B=False
)





Copied!







print("A tensors")
print(agent.A[0].shape)
print(agent.A[1].shape)
print(agent.A[2].shape)
print(agent.A[3].shape)
print(agent.A[4].shape)
print(agent.A[5].shape)
print(agent.A[6].shape)
print()
print("B tensors")
print(agent.B[0].shape)
print(agent.B[1].shape)
print(agent.B[2].shape)
print(agent.B[3].shape)
print()
print("C tensors")
print(agent.C[0].shape)
print(agent.C[1].shape)
print(agent.C[2].shape)
print(agent.C[3].shape)
print(agent.C[4].shape)
print(agent.C[5].shape)
print(agent.C[6].shape)
print()
print("D tensors")
print(agent.D[0].shape)
print(agent.D[1].shape)
print(agent.D[2].shape)
print(agent.D[3].shape)
print()
print("A and B dependencies")
print(agent.A_dependencies)
print(agent.B_dependencies)

print("A tensors")
print(agent.A[0].shape)
print(agent.A[1].shape)
print(agent.A[2].shape)
print(agent.A[3].shape)
print(agent.A[4].shape)
print(agent.A[5].shape)
print(agent.A[6].shape)
print()
print("B tensors")
print(agent.B[0].shape)
print(agent.B[1].shape)
print(agent.B[2].shape)
print(agent.B[3].shape)
print()
print("C tensors")
print(agent.C[0].shape)
print(agent.C[1].shape)
print(agent.C[2].shape)
print(agent.C[3].shape)
print(agent.C[4].shape)
print(agent.C[5].shape)
print(agent.C[6].shape)
print()
print("D tensors")
print(agent.D[0].shape)
print(agent.D[1].shape)
print(agent.D[2].shape)
print(agent.D[3].shape)
print()
print("A and B dependencies")
print(agent.A_dependencies)
print(agent.B_dependencies)

A tensors
(9, 25, 25)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)

B tensors
(9, 25, 25, 5)
(9, 2, 2, 1)
(9, 2, 2, 1)
(9, 2, 2, 1)

C tensors
(9, 25)
(9, 3)
(9, 3)
(9, 3)
(9, 3)
(9, 3)
(9, 3)

D tensors
(9, 25)
(9, 2)
(9, 2)
(9, 2)

A and B dependencies
[[0], [0, 1], [0, 2], [0, 3], [0, 1], [0, 2], [0, 3]]
[[0], [1], [2], [3]]





Copied!







print("A tensors")
print(agent.A[0].shape)
print(agent.A[1].shape)
print(agent.A[2].shape)
print(agent.A[3].shape)
print(agent.A[4].shape)
print(agent.A[5].shape)
print(agent.A[6].shape)
print()
print("B tensors")
print(agent.B[0].shape)
print(agent.B[1].shape)
print(agent.B[2].shape)
print(agent.B[3].shape)
print()
print("C tensors")
print(agent.C[0].shape)
print(agent.C[1].shape)
print(agent.C[2].shape)
print(agent.C[3].shape)
print(agent.C[4].shape)
print(agent.C[5].shape)
print(agent.C[6].shape)
print()
print("D tensors")
print(agent.D[0].shape)
print(agent.D[1].shape)
print(agent.D[2].shape)
print(agent.D[3].shape)
print()
print("A and B dependencies")
print(agent.A_dependencies)
print(agent.B_dependencies)

print("A tensors")
print(agent.A[0].shape)
print(agent.A[1].shape)
print(agent.A[2].shape)
print(agent.A[3].shape)
print(agent.A[4].shape)
print(agent.A[5].shape)
print(agent.A[6].shape)
print()
print("B tensors")
print(agent.B[0].shape)
print(agent.B[1].shape)
print(agent.B[2].shape)
print(agent.B[3].shape)
print()
print("C tensors")
print(agent.C[0].shape)
print(agent.C[1].shape)
print(agent.C[2].shape)
print(agent.C[3].shape)
print(agent.C[4].shape)
print(agent.C[5].shape)
print(agent.C[6].shape)
print()
print("D tensors")
print(agent.D[0].shape)
print(agent.D[1].shape)
print(agent.D[2].shape)
print(agent.D[3].shape)
print()
print("A and B dependencies")
print(agent.A_dependencies)
print(agent.B_dependencies)

A tensors
(9, 25, 25)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)
(9, 3, 25, 2)

B tensors
(9, 25, 25, 5)
(9, 2, 2, 1)
(9, 2, 2, 1)
(9, 2, 2, 1)

C tensors
(9, 25)
(9, 3)
(9, 3)
(9, 3)
(9, 3)
(9, 3)
(9, 3)

D tensors
(9, 25)
(9, 2)
(9, 2)
(9, 2)

A and B dependencies
[[0], [0, 1], [0, 2], [0, 3], [0, 1], [0, 2], [0, 3]]
[[0], [1], [2], [3]]





Copied!







key = jr.PRNGKey(1)
T = 10
_, info = rollout(agent, tmaze_env, num_timesteps=T, rng_key=key)

key = jr.PRNGKey(1)
T = 10
_, info = rollout(agent, tmaze_env, num_timesteps=T, rng_key=key)





Copied!







key = jr.PRNGKey(1)
T = 10
_, info = rollout(agent, tmaze_env, num_timesteps=T, rng_key=key)

key = jr.PRNGKey(1)
T = 10
_, info = rollout(agent, tmaze_env, num_timesteps=T, rng_key=key)





Copied!







images = []
for t in range(T):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

images = []
for t in range(T):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

<Figure size 640x480 with 0 Axes>





Copied!







images = []
for t in range(T):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

images = []
for t in range(T):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

<Figure size 640x480 with 0 Axes>





Copied!







# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

ani = animate(images)
HTML(ani.to_html5_video())

# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

ani = animate(images)
HTML(ani.to_html5_video())





Copied!







# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

ani = animate(images)
HTML(ani.to_html5_video())

# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

ani = animate(images)
HTML(ani.to_html5_video())

T-Maze Task with Distractors¶

Initial Setup¶

Creating the Environment¶

Creating the Agent¶

Rollout an agent episode¶