Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q





Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q





Copied!







%load_ext autoreload
%autoreload 2

import numpy as np 
import jax.numpy as jnp
import jax.tree_util as jtu
import mctx
from jax import random as jr


from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)
from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.planning.mcts import mcts_policy_search
from pymdp.utils import list_array_zeros, list_array_uniform

from copy import copy

key = jr.PRNGKey(0)

%load_ext autoreload
%autoreload 2

import numpy as np 
import jax.numpy as jnp
import jax.tree_util as jtu
import mctx
from jax import random as jr


from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)
from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.planning.mcts import mcts_policy_search
from pymdp.utils import list_array_zeros, list_array_uniform

from copy import copy

key = jr.PRNGKey(0)





Copied!







%load_ext autoreload
%autoreload 2

import numpy as np 
import jax.numpy as jnp
import jax.tree_util as jtu
import mctx
from jax import random as jr


from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)
from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.planning.mcts import mcts_policy_search
from pymdp.utils import list_array_zeros, list_array_uniform

from copy import copy

key = jr.PRNGKey(0)

%load_ext autoreload
%autoreload 2

import numpy as np 
import jax.numpy as jnp
import jax.tree_util as jtu
import mctx
from jax import random as jr


from pymdp.envs.generalized_tmaze import (
    GeneralizedTMazeEnv, parse_maze, get_maze_matrix 
)
from pymdp.envs.rollout import rollout
from pymdp.agent import Agent
from pymdp.planning.mcts import mcts_policy_search
from pymdp.utils import list_array_zeros, list_array_uniform

from copy import copy

key = jr.PRNGKey(0)





Copied!







# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani





Copied!







# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani

# make animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


def animate(images, savefile=None, interval=1000):
    # Make a bigger figure (pick whatever looks good)
    fig = plt.figure(figsize=(6, 6), dpi=150)

    # Axes that fills the entire figure
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_off()

    im = ax.imshow(images[0], animated=True)

    def update(k):
        im.set_data(images[k])
        return (im,)

    ani = animation.FuncAnimation(
        fig, update, frames=len(images), interval=interval, blit=True, repeat_delay=1000
    )

    if savefile is not None:
        ani.save(savefile)

    plt.close(fig)
    return ani





Copied!







M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

<Figure size 640x480 with 0 Axes>





Copied!







M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

M = get_maze_matrix()
key = jr.PRNGKey(0)

key, subkey = jr.split(key)
env_info_m = parse_maze(M, subkey)

tmaze_env = GeneralizedTMazeEnv(env_info_m)

init_obs, init_state = tmaze_env.reset(key)
tmaze_env.render(states=init_state, mode="human")

<Figure size 640x480 with 0 Axes>





Copied!







def make_aif_agent(tmaze_env, batch_size=5):
    A, B = tmaze_env.A, tmaze_env.B
    A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

    # [position], [cue], [reward]
    C = list_array_zeros([a.shape[0] for a in A])

    rewarding_modality = -1

    C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
    C[rewarding_modality] = C[rewarding_modality].at[2].set(-2.0)

    D = list_array_uniform([b.shape[0] for b in B])

    agent = Agent(
        A, B, C, D, 
        E=None,
        pA=None,
        pB=None,
        policy_len=1,
        A_dependencies=A_dependencies, 
        B_dependencies=B_dependencies,
        use_utility=True,
        use_states_info_gain=True,
        sampling_mode='full',
        action_selection='stochastic',
        gamma=4.0,
        batch_size=batch_size,
        learn_A=False,
        learn_B=False
    )

    return agent

def make_aif_agent(tmaze_env, batch_size=5):
    A, B = tmaze_env.A, tmaze_env.B
    A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

    # [position], [cue], [reward]
    C = list_array_zeros([a.shape[0] for a in A])

    rewarding_modality = -1

    C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
    C[rewarding_modality] = C[rewarding_modality].at[2].set(-2.0)

    D = list_array_uniform([b.shape[0] for b in B])

    agent = Agent(
        A, B, C, D, 
        E=None,
        pA=None,
        pB=None,
        policy_len=1,
        A_dependencies=A_dependencies, 
        B_dependencies=B_dependencies,
        use_utility=True,
        use_states_info_gain=True,
        sampling_mode='full',
        action_selection='stochastic',
        gamma=4.0,
        batch_size=batch_size,
        learn_A=False,
        learn_B=False
    )

    return agent





Copied!







def make_aif_agent(tmaze_env, batch_size=5):
    A, B = tmaze_env.A, tmaze_env.B
    A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

    # [position], [cue], [reward]
    C = list_array_zeros([a.shape[0] for a in A])

    rewarding_modality = -1

    C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
    C[rewarding_modality] = C[rewarding_modality].at[2].set(-2.0)

    D = list_array_uniform([b.shape[0] for b in B])

    agent = Agent(
        A, B, C, D, 
        E=None,
        pA=None,
        pB=None,
        policy_len=1,
        A_dependencies=A_dependencies, 
        B_dependencies=B_dependencies,
        use_utility=True,
        use_states_info_gain=True,
        sampling_mode='full',
        action_selection='stochastic',
        gamma=4.0,
        batch_size=batch_size,
        learn_A=False,
        learn_B=False
    )

    return agent

def make_aif_agent(tmaze_env, batch_size=5):
    A, B = tmaze_env.A, tmaze_env.B
    A_dependencies, B_dependencies = tmaze_env.A_dependencies, tmaze_env.B_dependencies

    # [position], [cue], [reward]
    C = list_array_zeros([a.shape[0] for a in A])

    rewarding_modality = -1

    C[rewarding_modality] = C[rewarding_modality].at[1].set(1.0)
    C[rewarding_modality] = C[rewarding_modality].at[2].set(-2.0)

    D = list_array_uniform([b.shape[0] for b in B])

    agent = Agent(
        A, B, C, D, 
        E=None,
        pA=None,
        pB=None,
        policy_len=1,
        A_dependencies=A_dependencies, 
        B_dependencies=B_dependencies,
        use_utility=True,
        use_states_info_gain=True,
        sampling_mode='full',
        action_selection='stochastic',
        gamma=4.0,
        batch_size=batch_size,
        learn_A=False,
        learn_B=False
    )

    return agent





Copied!







timesteps, batch_size = 6, 9
agent = make_aif_agent(tmaze_env, batch_size=batch_size)
_, info = rollout(agent, tmaze_env, num_timesteps=timesteps, rng_key=key, policy_search=mcts_policy_search(search_algo=mctx.gumbel_muzero_policy, max_depth=3, num_simulations=1024))

timesteps, batch_size = 6, 9
agent = make_aif_agent(tmaze_env, batch_size=batch_size)
_, info = rollout(agent, tmaze_env, num_timesteps=timesteps, rng_key=key, policy_search=mcts_policy_search(search_algo=mctx.gumbel_muzero_policy, max_depth=3, num_simulations=1024))





Copied!







timesteps, batch_size = 6, 9
agent = make_aif_agent(tmaze_env, batch_size=batch_size)
_, info = rollout(agent, tmaze_env, num_timesteps=timesteps, rng_key=key, policy_search=mcts_policy_search(search_algo=mctx.gumbel_muzero_policy, max_depth=3, num_simulations=1024))

timesteps, batch_size = 6, 9
agent = make_aif_agent(tmaze_env, batch_size=batch_size)
_, info = rollout(agent, tmaze_env, num_timesteps=timesteps, rng_key=key, policy_search=mcts_policy_search(search_algo=mctx.gumbel_muzero_policy, max_depth=3, num_simulations=1024))





Copied!







info.keys()

info.keys()

dict_keys(['action', 'action_weights', 'empirical_prior', 'env_state', 'observation', 'qpi', 'qs', 'search_tree'])





Copied!







info.keys()

info.keys()

dict_keys(['action', 'action_weights', 'empirical_prior', 'env_state', 'observation', 'qpi', 'qs', 'search_tree'])





Copied!







print("Info keys:", list(info.keys()))
print("Env state shape:", [s.shape for s in info["env_state"]])

print("Info keys:", list(info.keys()))
print("Env state shape:", [s.shape for s in info["env_state"]])

Info keys: ['action', 'action_weights', 'empirical_prior', 'env_state', 'observation', 'qpi', 'qs', 'search_tree']
Env state shape: [(9, 7), (9, 7), (9, 7), (9, 7)]





Copied!







print("Info keys:", list(info.keys()))
print("Env state shape:", [s.shape for s in info["env_state"]])

print("Info keys:", list(info.keys()))
print("Env state shape:", [s.shape for s in info["env_state"]])

Info keys: ['action', 'action_weights', 'empirical_prior', 'env_state', 'observation', 'qpi', 'qs', 'search_tree']
Env state shape: [(9, 7), (9, 7), (9, 7), (9, 7)]





Copied!







images = []
for t in range(timesteps):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

images = []
for t in range(timesteps):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

<Figure size 640x480 with 0 Axes>





Copied!







images = []
for t in range(timesteps):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

images = []
for t in range(timesteps):
    env_state_t = [s[:, t] for s in info['env_state']]
    images.append(tmaze_env.render(states=env_state_t, mode="rgb_array"))

<Figure size 640x480 with 0 Axes>





Copied!







ani = animate(images)
HTML(ani.to_html5_video())

ani = animate(images)
HTML(ani.to_html5_video())





Copied!







ani = animate(images)
HTML(ani.to_html5_video())

ani = animate(images)
HTML(ani.to_html5_video())

Sophisticated inference with Monte Carlo Tree Search¶

Sophisticated inference¶

Monte-Carlo Tree Search¶

Discounting and sampling outcomes¶

Grid world (generalized TMaze) generative process¶

Create the agent.¶

MCTS based policy search¶

Run active inference¶