Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q





Copied!







import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q

import sys
if "google.colab" in sys.modules:
    %pip install "inferactively-pymdp" -q





Copied!







from pprint import pprint
import itertools
import numpy as np
from jax import numpy as jnp, random as jr
from jax import tree_util as jtu

from pymdp.agent import Agent
from pymdp import distribution

from pprint import pprint
import itertools
import numpy as np
from jax import numpy as jnp, random as jr
from jax import tree_util as jtu

from pymdp.agent import Agent
from pymdp import distribution





Copied!







from pprint import pprint
import itertools
import numpy as np
from jax import numpy as jnp, random as jr
from jax import tree_util as jtu

from pymdp.agent import Agent
from pymdp import distribution

from pprint import pprint
import itertools
import numpy as np
from jax import numpy as jnp, random as jr
from jax import tree_util as jtu

from pymdp.agent import Agent
from pymdp import distribution





Copied!







model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": ["c1"]},
    },
}

B_action_dependencies = [
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
# re-write the action sampling to be controlled by jax.random
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": ["c1"]},
    },
}

B_action_dependencies = [
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
# re-write the action sampling to be controlled by jax.random
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

A_dependencies [[0]]
B_dependencies [[0], [1]]
B_action_dependencies [[0, 1], [0]]
original control dims [2, 3]
flattened control dims [6, 2]
original B shapes [(4, 4, 2, 3), (4, 4, 2)]
flattened B shapes [(1, 4, 4, 6), (1, 4, 4, 2)]
B normalized [Array(True, dtype=bool), Array(True, dtype=bool)]
B flat normalized [Array(True, dtype=bool), Array(True, dtype=bool)]


prior
[Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32),
 Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32)]
post
[Array([[[0., 0., 1., 0.]]], dtype=float32),
 Array([[[0.  , 0.25, 0.25, 0.5 ]]], dtype=float32)]
action
Array([[0, 0]], dtype=int32)
action_multi
Array([[0, 0]], dtype=int32)
action_reconstruct
Array([[0, 0]], dtype=int32)





Copied!







model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": ["c1"]},
    },
}

B_action_dependencies = [
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
# re-write the action sampling to be controlled by jax.random
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": ["c1"]},
    },
}

B_action_dependencies = [
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
# re-write the action sampling to be controlled by jax.random
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

A_dependencies [[0]]
B_dependencies [[0], [1]]
B_action_dependencies [[0, 1], [0]]
original control dims [2, 3]
flattened control dims [6, 2]
original B shapes [(4, 4, 2, 3), (4, 4, 2)]
flattened B shapes [(1, 4, 4, 6), (1, 4, 4, 2)]
B normalized [Array(True, dtype=bool), Array(True, dtype=bool)]
B flat normalized [Array(True, dtype=bool), Array(True, dtype=bool)]


prior
[Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32),
 Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32)]
post
[Array([[[0., 0., 1., 0.]]], dtype=float32),
 Array([[[0.  , 0.25, 0.25, 0.5 ]]], dtype=float32)]
action
Array([[0, 0]], dtype=int32)
action_multi
Array([[0, 0]], dtype=int32)
action_reconstruct
Array([[0, 0]], dtype=int32)





Copied!







model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": []},
    },
}

B_action_dependencies = [   
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": []},
    },
}

B_action_dependencies = [   
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

A_dependencies [[0]]
B_dependencies [[0], [1]]
B_action_dependencies [[0, 1], []]
original control dims [2, 3]
flattened control dims [6, 1]
original B shapes [(4, 4, 2, 3), (4, 4)]
flattened B shapes [(1, 4, 4, 6), (1, 4, 4, 1)]
B normalized [Array(True, dtype=bool), Array(True, dtype=bool)]
B flat normalized [Array(True, dtype=bool), Array(True, dtype=bool)]


prior
[Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32),
 Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32)]
post
[Array([[[0., 0., 1., 0.]]], dtype=float32),
 Array([[[0.  , 0.25, 0.25, 0.5 ]]], dtype=float32)]
action
Array([[0, 0]], dtype=int32)
action_multi
Array([[0, 0]], dtype=int32)
action_reconstruct
Array([[0, 0]], dtype=int32)





Copied!







model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": []},
    },
}

B_action_dependencies = [   
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

model_description = {
    "observations": {
        "o1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"]},
    },
    "controls": {"c1": {"elements": ["up", "down"]}, "c2": {"elements": ["left", "right", "stay"]}},
    "states": {
        "s1": {"elements": ["A", "B", "C", "D"], "depends_on": ["s1"], "controlled_by": ["c1", "c2"]},
        "s2": {"elements": ["A", "B", "C", "D"], "depends_on": ["s2"], "controlled_by": []},
    },
}

B_action_dependencies = [   
    [list(model_description["controls"].keys()).index(i) for i in s["controlled_by"]] 
    for s in model_description["states"].values()
]
num_controls = [len(c["elements"]) for c in model_description["controls"].values()]

model = distribution.compile_model(model_description)

# initialize tensor values
model.A["o1"]["A", "A"] = 1.0
model.A["o1"]["B", "B"] = 1.0
model.A["o1"]["C", "C"] = 1.0
model.A["o1"]["D", "D"] = 1.0

for i, state in enumerate(model_description["states"].keys()):
    controls = list(itertools.product(*[
        model_description["controls"][c]["elements"] for c in model_description["states"][state]["controlled_by"]
    ]))
    for control in controls:
        model.B[i][("B", "A", *control)] = 1.0
        model.B[i][("C", "B", *control)] = 1.0
        model.B[i][("D", "C", *control)] = 1.0
        model.B[i][("D", "D", *control)] = 1.0

agent = Agent(
    model.A, model.B,
    B_action_dependencies=B_action_dependencies,
    num_controls=num_controls,
)

# dummy history
action_key, obs_key = jr.split(jr.PRNGKey(0))
action = agent.policies[jr.choice(action_key, len(agent.policies))]
observation = [jr.randint(obs_key, (1, 1), 0, d) for d in agent.num_obs]
qs_hist = jtu.tree_map(lambda x: jnp.expand_dims(x, 0), agent.D)

prior = agent.update_empirical_prior(action, qs_hist)
qs = agent.infer_states(observations=observation, empirical_prior=prior)

q_pi, G = agent.infer_policies(qs)
action = agent.sample_action(q_pi)
action_multi = agent.decode_multi_actions(action)
action_reconstruct = agent.encode_multi_actions(action_multi)

print("A_dependencies", agent.A_dependencies)
print("B_dependencies", agent.B_dependencies)
print("B_action_dependencies", agent.B_action_dependencies)
print("original control dims", agent.num_controls_multi)
print("flattened control dims", agent.num_controls)
print("original B shapes", [a.data.shape for a in model.B])
print("flattened B shapes", [a.shape for a in agent.B])
print("B normalized", [jnp.isclose(a.data.sum(0), 1.).all() for a in model.B])
print("B flat normalized", [jnp.isclose(a.sum(1), 1.).all() for a in agent.B])

print("\n")
print("prior")
pprint([p.round(2) for p in prior])
print("post")
pprint([p.round(2) for p in qs])
print("action")
pprint(action)
print("action_multi")
pprint(action_multi)
print("action_reconstruct")
pprint(action_reconstruct)

A_dependencies [[0]]
B_dependencies [[0], [1]]
B_action_dependencies [[0, 1], []]
original control dims [2, 3]
flattened control dims [6, 1]
original B shapes [(4, 4, 2, 3), (4, 4)]
flattened B shapes [(1, 4, 4, 6), (1, 4, 4, 1)]
B normalized [Array(True, dtype=bool), Array(True, dtype=bool)]
B flat normalized [Array(True, dtype=bool), Array(True, dtype=bool)]


prior
[Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32),
 Array([[0.  , 0.25, 0.25, 0.5 ]], dtype=float32)]
post
[Array([[[0., 0., 1., 0.]]], dtype=float32),
 Array([[[0.  , 0.25, 0.25, 0.5 ]]], dtype=float32)]
action
Array([[0, 0]], dtype=int32)
action_multi
Array([[0, 0]], dtype=int32)
action_reconstruct
Array([[0, 0]], dtype=int32)

Complex action dependencies¶

Multiple action dependencies¶

No action dependency¶