# Install tna package (uncomment for Google Colab)
# !pip install tna-py

import tna
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.dpi'] = 150

print(f"TNA version: {tna.__version__}")

TNA version: 0.1.0

# Load the built-in dataset of coded collaborative regulation behaviors
group_regulation_long = tna.load_group_regulation_long()
print(f"Shape: {group_regulation_long.shape}")
group_regulation_long.head(10)

Shape: (27533, 6)

# Convert long-format event log into sequences for TNA
prepared_data = tna.prepare_data(
    group_regulation_long,
    action="Action",   # column with behavioral states (become network nodes)
    actor="Actor",     # column with participant IDs (one sequence per actor)
    time="Time"        # column with timestamps (for ordering and session splitting)
)
prepared_data

TNAData(sessions=2000, actions=9, actors=2000)

# View the wide-format sequence data (rows = sequences, columns = positions)
print("Sequence data shape:", prepared_data.sequence_data.shape)
prepared_data.sequence_data.head()

Sequence data shape: (2000, 26)

# View the preserved metadata (e.g., Achiever group) for each sequence
prepared_data.meta_data.head()

# Wide-format data (rows = sequences, columns = time steps)
group_regulation = tna.load_group_regulation()
print("Wide-format shape:", group_regulation.shape)
group_regulation.head()

Wide-format shape: (2000, 26)

# Pre-computed transition matrix
mat = np.array([
    [0.1, 0.6, 0.3],
    [0.4, 0.2, 0.4],
    [0.3, 0.3, 0.4]
])
labels = ["A", "B", "C"]
model_from_matrix = tna.tna(pd.DataFrame(mat, index=labels, columns=labels))
print(model_from_matrix)

TNA Model
  Type: relative
  States: ['A', 'B', 'C']
  Scaling: none

Transition Matrix:
     A    B    C
A  0.1  0.6  0.3
B  0.4  0.2  0.4
C  0.3  0.3  0.4

Initial Probabilities:
       prob
A  0.333333
B  0.333333
C  0.333333

# Create example one-hot encoded data (e.g., coded classroom observations)
onehot_data = pd.DataFrame({
    "actor": ["s1"] * 6 + ["s2"] * 6,
    "Reading":  [1, 0, 0, 1, 0, 0,  0, 1, 0, 0, 1, 0],
    "Writing":  [0, 1, 0, 0, 1, 0,  1, 0, 0, 1, 0, 0],
    "Discuss":  [0, 0, 1, 0, 0, 1,  0, 0, 1, 0, 0, 1],
})
print("One-hot input:")
print(onehot_data)

# Convert to wide-format sequences (one row per actor)
states = ["Reading", "Writing", "Discuss"]
wide_seq = tna.import_onehot(onehot_data, cols=states, actor="actor")
print("\nWide-format output:")
wide_seq

One-hot input:
   actor  Reading  Writing  Discuss
0     s1        1        0        0
1     s1        0        1        0
2     s1        0        0        1
3     s1        1        0        0
4     s1        0        1        0
5     s1        0        0        1
6     s2        0        1        0
7     s2        1        0        0
8     s2        0        0        1
9     s2        0        1        0
10    s2        1        0        0
11    s2        0        0        1

Wide-format output:

# Build the TNA model from the prepared sequence data
model = tna.tna(prepared_data)
print(model)

TNA Model
  Type: relative
  States: ['adapt', 'cohesion', 'consensus', 'coregulate', 'discuss', 'emotion', 'monitor', 'plan', 'synthesis']
  Scaling: none

Transition Matrix:
               adapt  cohesion  consensus  coregulate   discuss   emotion   monitor      plan  synthesis
adapt       0.000000  0.273084   0.477407    0.021611  0.058939  0.119843  0.033399  0.015717   0.000000
cohesion    0.002950  0.027139   0.497935    0.119174  0.059587  0.115634  0.033038  0.141003   0.003540
consensus   0.004740  0.014852   0.082003    0.187707  0.188023  0.072681  0.046611  0.395797   0.007584
coregulate  0.016244  0.036041   0.134518    0.023350  0.273604  0.172081  0.086294  0.239086   0.018782
discuss     0.071374  0.047583   0.321185    0.084282  0.194887  0.105796  0.022273  0.011643   0.140977
emotion     0.002467  0.325344   0.320409    0.034191  0.101868  0.076842  0.036306  0.099753   0.002820
monitor     0.011165  0.055827   0.159107    0.057920  0.375436  0.090719  0.018144  0.215632   0.016050
plan        0.000975  0.025175   0.290401    0.017216  0.067890  0.146825  0.075524  0.374208   0.001787
synthesis   0.234663  0.033742   0.466258    0.044479  0.062883  0.070552  0.012270  0.075153   0.000000

Initial Probabilities:
              prob
adapt       0.0115
cohesion    0.0605
consensus   0.2140
coregulate  0.0190
discuss     0.1755
emotion     0.1515
monitor     0.1440
plan        0.2045
synthesis   0.0195

# Inspect the transition probability matrix
weights_df = model.to_dataframe()
weights_df.round(3)

# Inspect initial probabilities
init_df = pd.Series(model.inits, index=model.labels, name="Initial Probability")
init_df.round(3)

adapt         0.012
cohesion      0.060
consensus     0.214
coregulate    0.019
discuss       0.176
emotion       0.152
monitor       0.144
plan          0.204
synthesis     0.020
Name: Initial Probability, dtype: float64

# Model summary
model.summary()

{'n_states': 9,
 'type': 'relative',
 'scaling': [],
 'n_edges': np.int64(78),
 'density': np.float64(0.9629629629629629),
 'mean_weight': np.float64(0.1153846153846154),
 'max_weight': np.float64(0.49793510324483775),
 'has_self_loops': np.True_}

# minimum: hide edges below 0.05; cut: fade edges below 0.1
tna.plot_network(model, minimum=0.05, cut=0.1)
plt.show()

tna.plot_histogram(model)
plt.show()

# Bar chart of how often each state appears across all sequences
tna.plot_frequencies(model)
plt.show()

# Build frequency model and plot mosaicfmodel = tna.ftna(prepared_data)tna.plot_mosaic(fmodel)plt.show()

# Prune: remove edges with weight below 0.05
pruned = tna.prune(model, threshold=0.05)

print(f"Original edges: {model.summary()['n_edges']}")
print(f"Pruned edges:   {pruned.summary()['n_edges']}")

Original edges: 78
Pruned edges:   46

# Plot the pruned network
tna.plot_network(pruned, cut=0.1)
plt.show()

# Find cliques of size 2, 3, and 4 with decreasing thresholds
cliques_of_two   = tna.cliques(model, size=2, threshold=0.1)   # dyads
cliques_of_three = tna.cliques(model, size=3, threshold=0.05)  # triads
cliques_of_four  = tna.cliques(model, size=4, threshold=0.03)  # quads

print(cliques_of_two)

Cliques of size 2 (threshold=0.1)
Number of cliques found: 5

Clique 1: cohesion, emotion
  cohesion: 0.027  0.116
  emotion: 0.325  0.077

Clique 2: consensus, coregulate
  consensus: 0.082  0.188
  coregulate: 0.135  0.023

Clique 3: consensus, discuss
  consensus: 0.082  0.188
  discuss: 0.321  0.195

Clique 4: consensus, plan
  consensus: 0.082  0.396
  plan: 0.290  0.374

Clique 5: discuss, emotion
  discuss: 0.195  0.106
  emotion: 0.102  0.077

print(cliques_of_three)

Cliques of size 3 (threshold=0.05)
Number of cliques found: 3

Clique 1: consensus, coregulate, discuss
  consensus: 0.082  0.188  0.188
  coregulate: 0.135  0.023  0.274
  discuss: 0.321  0.084  0.195

Clique 2: consensus, discuss, emotion
  consensus: 0.082  0.188  0.073
  discuss: 0.321  0.195  0.106
  emotion: 0.320  0.102  0.077

Clique 3: consensus, emotion, plan
  consensus: 0.082  0.073  0.396
  emotion: 0.320  0.077  0.100
  plan: 0.290  0.147  0.374

print(cliques_of_four)

Cliques of size 4 (threshold=0.03)
Number of cliques found: 5

Clique 1: cohesion, coregulate, discuss, emotion
  cohesion: 0.027  0.119  0.060  0.116
  coregulate: 0.036  0.023  0.274  0.172
  discuss: 0.048  0.084  0.195  0.106
  emotion: 0.325  0.034  0.102  0.077

Clique 2: cohesion, coregulate, emotion, monitor
  cohesion: 0.027  0.119  0.116  0.033
  coregulate: 0.036  0.023  0.172  0.086
  emotion: 0.325  0.034  0.077  0.036
  monitor: 0.056  0.058  0.091  0.018

Clique 3: consensus, coregulate, discuss, emotion
  consensus: 0.082  0.188  0.188  0.073
  coregulate: 0.135  0.023  0.274  0.172
  discuss: 0.321  0.084  0.195  0.106
  emotion: 0.320  0.034  0.102  0.077

Clique 4: consensus, coregulate, emotion, monitor
  consensus: 0.082  0.188  0.073  0.047
  coregulate: 0.135  0.023  0.172  0.086
  emotion: 0.320  0.034  0.077  0.036
  monitor: 0.159  0.058  0.091  0.018

Clique 5: consensus, emotion, monitor, plan
  consensus: 0.082  0.073  0.047  0.396
  emotion: 0.320  0.077  0.036  0.100
  monitor: 0.159  0.091  0.018  0.216
  plan: 0.290  0.147  0.076  0.374

# Compute all centrality measures for each state
centrality_df = tna.centralities(model)
centrality_df.round(4)

# Plot centralities as faceted bar charts
tna.plot_centralities(centrality_df)
plt.show()

# Compute edge betweenness for all transitions
edge_betweenness = tna.betweenness_network(model)

# Show the betweenness values
edge_betweenness.to_dataframe().round(3)

# Plot edge betweenness network
tna.plot_network(edge_betweenness, cut=0.1, title="Edge Betweenness Network")
plt.show()

# Centrality stability: case-dropping bootstrapcs_result = tna.estimate_cs(model, iter=200, seed=42)print("CS coefficients:", cs_result.cs_coefficients)

# Detect communities using the default algorithm (leading eigenvector)
comms = tna.communities(model)
print(comms)

Community Detection Results

  leading_eigen: 2 communities

Assignments:
            leading_eigen
adapt                   0
cohesion                0
consensus               0
coregulate              1
discuss                 1
emotion                 0
monitor                 1
plan                    1
synthesis               0

# Plot communities: nodes colored by community assignment
tna.plot_communities(comms, cut=0.1)
plt.show()

# Try multiple community detection methods
comms_multi = tna.communities(model, methods=["leading_eigen", "louvain", "fast_greedy"])
print(comms_multi)

Community Detection Results

  leading_eigen: 2 communities
  louvain: 2 communities
  fast_greedy: 2 communities

Assignments:
            leading_eigen  louvain  fast_greedy
adapt                   0        0            0
cohesion                0        0            0
consensus               0        0            0
coregulate              1        1            1
discuss                 1        1            1
emotion                 0        0            0
monitor                 1        1            1
plan                    1        0            0
synthesis               0        0            0

# Resample sequences 1000 times and assess edge stability
np.random.seed(265)  # for reproducibility
boot = tna.bootstrap_tna(model, iter=1000, level=0.05, seed=265)

# Extract the bootstrap summary table
boot_df = boot.summary()
boot_df.head(10)

# Keep only edges that survived the bootstrap and sort by weight
sig_edges = boot_df[boot_df["sig"] == True].sort_values("weight", ascending=False)
print(f"{len(sig_edges)} out of {len(boot_df)} edges are significant")
sig_edges.head(15)

51 out of 78 edges are significant

# Plot the bootstrapped network (only significant edges)
tna.plot_network(boot.model, cut=0.1, title="Bootstrapped Network (significant edges)")
plt.show()

# Each row is one sequence; colors represent states at each position
tna.plot_sequences(prepared_data, max_sequences=200)
plt.show()

# Proportion of each state at each sequence position
tna.plot_sequences(prepared_data, plot_type="distribution")
plt.show()

# Build group models directly from the prepared data using the Achiever metadata column
group_model = tna.group_tna(prepared_data, group="Achiever")
print(group_model)
print()

# Summary statistics per group
group_model.summary()

GroupTNA with 2 groups:
  High: 9 states, 76 edges
  Low: 9 states, 75 edges

# Access individual models using dict-style indexing
print(group_model["High"])
print()
print("Group names:", group_model.names())

TNA Model
  Type: relative
  States: ['adapt', 'cohesion', 'consensus', 'coregulate', 'discuss', 'emotion', 'monitor', 'plan', 'synthesis']
  Scaling: none

Transition Matrix:
               adapt  cohesion  consensus  coregulate   discuss   emotion   monitor      plan  synthesis
adapt       0.000000  0.262411   0.517730    0.000000  0.035461  0.141844  0.028369  0.014184   0.000000
cohesion    0.005330  0.043710   0.536247    0.081023  0.040512  0.118337  0.017058  0.151386   0.006397
consensus   0.004127  0.019752   0.083432    0.170991  0.232606  0.081368  0.035377  0.364387   0.007960
coregulate  0.022371  0.035794   0.108501    0.013423  0.234899  0.203579  0.096197  0.266219   0.019016
discuss     0.023964  0.061907   0.424863    0.072391  0.169246  0.112332  0.016475  0.012481   0.106340
emotion     0.003226  0.325806   0.336129    0.023226  0.121935  0.062581  0.031613  0.090323   0.005161
monitor     0.011058  0.048973   0.159558    0.050553  0.369668  0.096367  0.018957  0.225908   0.018957
plan        0.001383  0.031466   0.293914    0.023859  0.060166  0.182227  0.075726  0.327801   0.003458
synthesis   0.143885  0.028777   0.575540    0.014388  0.028777  0.064748  0.000000  0.143885   0.000000

Initial Probabilities:
             prob
adapt       0.012
cohesion    0.082
consensus   0.212
coregulate  0.005
discuss     0.180
emotion     0.169
monitor     0.129
plan        0.188
synthesis   0.023

Group names: ['High', 'Low']

# Plot all group networks side by side (automatic multi-panel)
tna.plot_network(group_model, minimum=0.05, cut=0.1)
plt.show()

# Prune all groups at once — returns a new GroupTNA with pruned models
pruned_group = tna.prune(group_model, threshold=0.05)
print(pruned_group)

# Compare edge counts
for name in group_model:
    orig = group_model[name].summary()["n_edges"]
    prun = pruned_group[name].summary()["n_edges"]
    print(f"  {name}: {orig} → {prun} edges")

GroupTNA with 2 groups:
  High: 9 states, 42 edges
  Low: 9 states, 48 edges
  High: 76 → 42 edges
  Low: 75 → 48 edges

# Centralities across groups — returns a single DataFrame with a 'group' column
group_cent = tna.centralities(group_model, measures=["OutStrength", "InStrength", "Betweenness"])
group_cent

# Communities per group
group_comms = tna.communities(group_model)
for name, result in group_comms.items():
    print(f"{name}: {result.counts}")
    print(result.assignments)
    print()

High: {'leading_eigen': 2}
            leading_eigen
adapt                   1
cohesion                1
consensus               1
coregulate              0
discuss                 0
emotion                 1
monitor                 0
plan                    0
synthesis               1

Low: {'leading_eigen': 2}
            leading_eigen
adapt                   0
cohesion                0
consensus               0
coregulate              1
discuss                 1
emotion                 1
monitor                 1
plan                    1
synthesis               0

# Permutation test: compare High vs Low achievers
perm_result = tna.permutation_test(
    group_model["High"], group_model["Low"],
    iter=500, seed=42, level=0.05
)

# Show significant edge differences
sig_perm = perm_result.edges["stats"][
    perm_result.edges["stats"]["p_value"] < 0.05
].sort_values("p_value")

print(f"{len(sig_perm)} significant edge differences found")
sig_perm

42 significant edge differences found

# Difference network: High vs Low achievers
tna.plot_compare(group_model["High"], group_model["Low"])
plt.show()

# Compare subsequence patterns between High and Low achievers
cs_result = tna.compare_sequences(group_model)
print(f"Total patterns found: {len(cs_result)}")
print(f"Columns: {list(cs_result.columns)}")
print()

# Show unigrams (single-state frequencies)
unigrams = cs_result[~cs_result['pattern'].str.contains('->')]
print(f"Unigram patterns ({len(unigrams)} states):")
print(unigrams.to_string(index=False))
print()

# Show top bigrams by frequency difference
bigrams = cs_result[cs_result['pattern'].str.count('->') == 1].copy()
bigrams['prop_diff'] = (bigrams['prop_High'] - bigrams['prop_Low']).abs()
print(f"Top 10 bigrams by proportion difference ({len(bigrams)} total):")
bigrams.nlargest(10, 'prop_diff')[['pattern', 'freq_High', 'freq_Low', 'prop_High', 'prop_Low']]

Total patterns found: 918
Columns: ['pattern', 'freq_High', 'freq_Low', 'prop_High', 'prop_Low']

Unigram patterns (9 states):
   pattern  freq_High  freq_Low  prop_High  prop_Low
     adapt        155       399   0.011297  0.028888
  cohesion       1018       821   0.074193  0.059441
 consensus       3651      3146   0.266088  0.227773
coregulate        959      1174   0.069893  0.084999
   discuss       2166      2101   0.157860  0.152114
   emotion       1686      1389   0.122877  0.100565
   monitor        668       848   0.048684  0.061396
      plan       3102      3521   0.226077  0.254923
 synthesis        316       413   0.023030  0.029902

Top 10 bigrams by proportion difference (67 total):

# Permutation test for statistical significance (100 iterations for speed)
cs_test = tna.compare_sequences(group_model, test=True, iter_=100, seed=42)

# Show patterns with smallest p-values
print("Patterns with smallest adjusted p-values:")
cs_test.head(15)[['pattern', 'freq_High', 'freq_Low', 'prop_High', 'prop_Low', 'effect_size', 'p_value']]

Patterns with smallest adjusted p-values:

# Cluster sequences into 3 tactics using PAM with Hamming distance
clust = tna.cluster_sequences(prepared_data, k=3, dissimilarity="hamming", method="pam")
print(clust)
print(f"\nCluster sizes: {clust.sizes}")
print(f"Silhouette score: {clust.silhouette:.4f}")

ClusterResult(k=3, method='pam', dissimilarity='hamming', silhouette=0.1904)

Cluster sizes: [893 713 394]
Silhouette score: 0.1904

# Compare distance metrics (using first 200 sequences for slower metrics)
subset = prepared_data.sequence_data.iloc[:200]
print("Distance metric comparison (PAM, k=3, n=200):")
for metric in ["hamming", "lcs", "osa"]:
    result = tna.cluster_sequences(subset, k=3, dissimilarity=metric)
    print(f"  {metric:>7s}: sizes={result.sizes}, silhouette={result.silhouette:.4f}")

print("\nLinkage method comparison (Hamming, k=3, full data):")
for method in ["pam", "complete", "average", "ward.D2"]:
    result = tna.cluster_sequences(prepared_data, k=3, method=method)
    print(f"  {method:>8s}: sizes={result.sizes}, silhouette={result.silhouette:.4f}")

Distance metric comparison (PAM, k=3, n=200):
  hamming: sizes=[77 53 70], silhouette=0.1372

      lcs: sizes=[74 80 46], silhouette=0.2048

      osa: sizes=[78 76 46], silhouette=0.1496

Linkage method comparison (Hamming, k=3, full data):

       pam: sizes=[893 713 394], silhouette=0.1904

  complete: sizes=[1639  108  253], silhouette=0.2831

   average: sizes=[1768  230    2], silhouette=0.3064

   ward.D2: sizes=[1130  377  493], silhouette=0.2354

# Sweep k values and compare silhouette scores
print("Silhouette scores for different k values:")
for k in range(2, 6):
    result = tna.cluster_sequences(prepared_data, k=k)
    print(f"  k={k}: silhouette={result.silhouette:.4f}, sizes={result.sizes}")

Silhouette scores for different k values:

  k=2: silhouette=0.1839, sizes=[ 982 1018]

  k=3: silhouette=0.1904, sizes=[893 713 394]

  k=4: silhouette=0.1161, sizes=[606 455 351 588]

  k=5: silhouette=0.1277, sizes=[330 475 220 657 318]

# Step 1: Cluster sequences into tactics
clust = tna.cluster_sequences(prepared_data, k=3, dissimilarity="hamming", method="pam")

# Step 2: Add tactic labels to the sequence data
tactic_data = prepared_data.sequence_data.copy()
tactic_data["Tactic"] = [f"Tactic {c}" for c in clust.assignments]

# Step 3: Build a TNA model for each tactic
tactic_model = tna.group_tna(tactic_data, group="Tactic")
print(tactic_model)
print()
tactic_model.summary()

GroupTNA with 3 groups:
  Tactic 1: 9 states, 78 edges
  Tactic 2: 9 states, 78 edges
  Tactic 3: 9 states, 76 edges

# Compare transition networks across tactics
tna.plot_network(tactic_model, minimum=0.05, cut=0.1)
plt.show()

# Centralities per tactic — which states are central in each behavioral strategy?
tactic_cent = tna.centralities(tactic_model, measures=["OutStrength", "InStrength", "Betweenness"])
tactic_cent

import tna
import pandas as pd

# 1. Load and prepare data
my_data = pd.read_csv("your_data.csv")
prepared = tna.prepare_data(my_data, action="event", actor="user_id", time="timestamp")

# 2. Build model
model = tna.tna(prepared)

# 3. Visualize
tna.plot_network(model, minimum=0.05, cut=0.1)
tna.plot_histogram(model)
tna.plot_frequencies(model)

# 4. Prune
pruned = tna.prune(model, threshold=0.05)
tna.plot_network(pruned, cut=0.1)

# 5. Cliques
print(tna.cliques(model, size=2, threshold=0.1))
print(tna.cliques(model, size=3, threshold=0.05))

# 6. Centralities
tna.plot_centralities(tna.centralities(model))
tna.plot_network(tna.betweenness_network(model), cut=0.1)

# 7. Communities
tna.plot_communities(tna.communities(model), cut=0.1)

# 8. Bootstrap
boot = tna.bootstrap_tna(model, iter=1000, level=0.05, seed=265)
tna.plot_network(boot.model, cut=0.1)

# 9. Sequences
tna.plot_sequences(prepared)

# 10. Group models (from metadata column)
gm = tna.group_tna(prepared, group="achievement")
tna.plot_network(gm)                    # side-by-side networks
tna.centralities(gm)                    # centralities with group column
tna.prune(gm, threshold=0.05)           # prune each group
tna.permutation_test(gm["A"], gm["B"])  # compare two groups
tna.compare_sequences(gm)               # compare sequence patterns
tna.compare_sequences(gm, test=True)    # with permutation test

# 11. Sequence clustering (tactics)
clust = tna.cluster_sequences(prepared, k=3, dissimilarity="hamming")
tactic_data = prepared.sequence_data.copy()
tactic_data["Tactic"] = [f"Tactic {c}" for c in clust.assignments]
tactic_model = tna.group_tna(tactic_data, group="Tactic")
tna.plot_network(tactic_model)          # per-tactic networks

# 12. Import one-hot encoded data
wide = tna.import_onehot(onehot_df, cols=["State_A", "State_B"], actor="id")
model_oh = tna.tna(wide)

	action_1	action_2	action_3	action_4	action_5	action_6	action_7	action_8	action_9	action_10	...	action_17	action_18	action_19	action_20	action_21	action_22	action_23	action_24	action_25	action_26
.session_id
1000_1	discuss	discuss	consensus	plan	cohesion	consensus	discuss	consensus	plan	plan	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1001_1	cohesion	consensus	plan	plan	monitor	plan	consensus	discuss	consensus	plan	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1002_1	discuss	adapt	cohesion	consensus	discuss	emotion	cohesion	coregulate	discuss	discuss	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1003_1	discuss	emotion	cohesion	consensus	coregulate	coregulate	plan	plan	consensus	coregulate	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1004_1	cohesion	plan	consensus	plan	consensus	discuss	discuss	synthesis	consensus	discuss	...	cohesion	coregulate	consensus	consensus	coregulate	discuss	NaN	NaN	NaN	NaN

	Actor	Achiever	Group	Course	Time
.session_id
1000_1	1000	High	100.0	B	2025-01-01 09:12:00.562642574
1001_1	1001	Low	101.0	B	2025-01-01 09:18:40.756721020
1002_1	1002	Low	101.0	B	2025-01-01 09:18:53.756721020
1003_1	1003	Low	101.0	B	2025-01-01 09:18:05.756721020
1004_1	1004	Low	101.0	B	2025-01-01 09:22:26.756721020

	T1	T2	T3	T4	T5	T6	T7	T8	T9	T10	...	T17	T18	T19	T20	T21	T22	T23	T24	T25	T26
0	cohesion	consensus	discuss	synthesis	adapt	consensus	plan	consensus	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	plan	emotion	consensus	discuss	synthesis	adapt	emotion	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	consensus	coregulate	monitor	consensus	plan	emotion	consensus	monitor	consensus	coregulate	...	plan	plan	consensus	monitor	consensus	plan	emotion	plan	consensus	discuss
3	monitor	emotion	plan	discuss	synthesis	consensus	discuss	cohesion	consensus	plan	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	discuss	emotion	cohesion	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	W0_T1	W0_T2	W0_T3	W1_T1	W1_T2	W1_T3	W2_T1	W2_T2	W2_T3	W3_T1	W3_T2	W3_T3	W4_T1	W4_T2	W4_T3	W5_T1	W5_T2	W5_T3
0	Reading	NaN	NaN	NaN	Writing	NaN	NaN	NaN	Discuss	Reading	NaN	NaN	NaN	Writing	NaN	NaN	NaN	Discuss
1	NaN	Writing	NaN	Reading	NaN	NaN	NaN	NaN	Discuss	NaN	Writing	NaN	Reading	NaN	NaN	NaN	NaN	Discuss

	OutStrength	InStrength	ClosenessIn	ClosenessOut	Closeness	Betweenness	BetweennessRSP	Diffusion	Clustering
adapt	1.0000	0.3446	0.0083	0.0152	0.0248	1.0	1.0	5.5863	0.3370
cohesion	0.9729	0.8116	0.0138	0.0124	0.0265	0.0	19.0	5.2086	0.2996
consensus	0.9180	2.6672	0.0351	0.0125	0.0383	30.0	103.0	4.6597	0.1608
coregulate	0.9766	0.5666	0.0155	0.0150	0.0210	0.0	27.0	5.1479	0.3058
discuss	0.8051	1.1882	0.0196	0.0131	0.0271	16.0	53.0	4.6276	0.2397
emotion	0.9232	0.8941	0.0141	0.0121	0.0231	5.0	36.0	5.0699	0.2905
monitor	0.9819	0.3457	0.0076	0.0137	0.0193	0.0	11.0	5.1568	0.2889
plan	0.6258	1.1938	0.0274	0.0115	0.0274	9.0	61.0	3.4875	0.2875
synthesis	1.0000	0.1915	0.0100	0.0158	0.0243	7.0	3.0	5.5825	0.3586

Transition Network Analysis (TNA) Tutorial¶

Introduction¶

Why TNA?¶

The Building Blocks of TNA¶

1. Installation & Setup¶

2. Getting Started with Long-Format Data¶

3. Understanding `prepare_data()`¶

Alternative Input Formats¶

Importing One-Hot Encoded Data¶

4. Building the TNA Model¶

5. Visualizations¶

5.1 Transition Network Plot¶

5.2 Histogram of Edge Weights¶

5.3 Frequency Distribution of States¶

5.4 Mosaic Plot¶

6. Pruning¶

7. Patterns: Cliques¶

8. Centralities¶

8.1 Node-Level Centrality Measures¶

8.2 Edge-Level Measures: Edge Betweenness¶

8.3 Centrality Stability¶

9. Community Detection¶

10. Bootstrapping¶

10.1 Why Bootstrap?¶

10.2 Results¶

10.3 Bootstrapped Network¶

11. Sequence Plots¶

12. Group Models¶

12.1 Permutation Testing Between Groups¶

12.2 Difference Network¶

12.3 Comparing Sequence Patterns¶

13. Sequence Clustering (Tactics)¶

13.1 Why Cluster Sequences?¶

13.2 Distance Metrics¶

13.3 Clustering Methods¶

13.4 Choosing the Number of Clusters¶

13.5 Building TNA Models per Tactic¶

14. Complete Workflow at a Glance¶

	Actor	Achiever	Group	Course	Time	Action
0	1	High	1.0	A	2025-01-01 08:27:07.712698221	cohesion
1	1	High	1.0	A	2025-01-01 08:35:20.712698221	consensus
2	1	High	1.0	A	2025-01-01 08:42:18.712698221	discuss
3	1	High	1.0	A	2025-01-01 08:50:00.712698221	synthesis
4	1	High	1.0	A	2025-01-01 08:52:25.712698221	adapt
5	1	High	1.0	A	2025-01-01 08:57:31.712698221	consensus
6	1	High	1.0	A	2025-01-01 08:58:04.712698221	plan
7	1	High	1.0	A	2025-01-01 09:05:00.712698221	consensus
8	2	High	1.0	A	2025-01-01 08:27:33.712698221	plan
9	2	High	1.0	A	2025-01-01 08:33:45.712698221	emotion

	adapt	cohesion	consensus	coregulate	discuss	emotion	monitor	plan	synthesis
adapt	0.000	0.273	0.477	0.022	0.059	0.120	0.033	0.016	0.000
cohesion	0.003	0.027	0.498	0.119	0.060	0.116	0.033	0.141	0.004
consensus	0.005	0.015	0.082	0.188	0.188	0.073	0.047	0.396	0.008
coregulate	0.016	0.036	0.135	0.023	0.274	0.172	0.086	0.239	0.019
discuss	0.071	0.048	0.321	0.084	0.195	0.106	0.022	0.012	0.141
emotion	0.002	0.325	0.320	0.034	0.102	0.077	0.036	0.100	0.003
monitor	0.011	0.056	0.159	0.058	0.375	0.091	0.018	0.216	0.016
plan	0.001	0.025	0.290	0.017	0.068	0.147	0.076	0.374	0.002
synthesis	0.235	0.034	0.466	0.044	0.063	0.071	0.012	0.075	0.000

	from	to	weight	p_value	sig	cr_lower	cr_upper	ci_lower	ci_upper
0	cohesion	adapt	0.002950	0.480519	False	0.002212	0.003687	0.000594	0.005700
1	consensus	adapt	0.004740	0.173826	False	0.003555	0.005925	0.003130	0.006352
2	coregulate	adapt	0.016244	0.156843	False	0.012183	0.020305	0.010810	0.022272
3	discuss	adapt	0.071374	0.000999	True	0.053531	0.089218	0.063041	0.079214
4	emotion	adapt	0.002467	0.577423	False	0.001851	0.003084	0.000720	0.004458
5	monitor	adapt	0.011165	0.287712	False	0.008374	0.013957	0.006206	0.016417
6	plan	adapt	0.000975	0.550450	False	0.000731	0.001218	0.000320	0.001848
7	synthesis	adapt	0.234663	0.000999	True	0.175997	0.293328	0.202432	0.267998
8	adapt	cohesion	0.273084	0.001998	True	0.204813	0.341356	0.236137	0.310988
9	cohesion	cohesion	0.027139	0.108891	False	0.020354	0.033923	0.019429	0.035850

	from	to	weight	p_value	sig	cr_lower	cr_upper	ci_lower	ci_upper
18	cohesion	consensus	0.497935	0.000999	True	0.373451	0.622419	0.475015	0.521243
17	adapt	consensus	0.477407	0.000999	True	0.358055	0.596758	0.431948	0.520174
25	synthesis	consensus	0.466258	0.000999	True	0.349693	0.582822	0.426488	0.505009
64	consensus	plan	0.395797	0.000999	True	0.296848	0.494746	0.383257	0.407910
41	monitor	discuss	0.375436	0.000999	True	0.281577	0.469295	0.352214	0.401393
69	plan	plan	0.374208	0.000999	True	0.280656	0.467760	0.362409	0.386589
13	emotion	cohesion	0.325344	0.000999	True	0.244008	0.406680	0.308078	0.342700
21	discuss	consensus	0.321185	0.000999	True	0.240888	0.401481	0.306922	0.336784
22	emotion	consensus	0.320409	0.000999	True	0.240307	0.400511	0.302499	0.337725
24	plan	consensus	0.290401	0.000999	True	0.217801	0.363001	0.278729	0.301506
38	coregulate	discuss	0.273604	0.000999	True	0.205203	0.342005	0.254195	0.291943
8	adapt	cohesion	0.273084	0.001998	True	0.204813	0.341356	0.236137	0.310988
65	coregulate	plan	0.239086	0.000999	True	0.179315	0.298858	0.218761	0.258977
7	synthesis	adapt	0.234663	0.000999	True	0.175997	0.293328	0.202432	0.267998
68	monitor	plan	0.215632	0.000999	True	0.161724	0.269539	0.194958	0.236826

	n_states	type	scaling	n_edges	density	mean_weight	max_weight	has_self_loops
group
High	9	relative	[]	76	0.938272	0.118421	0.575540	True
Low	9	relative	[]	75	0.925926	0.120000	0.461957	True

	group	OutStrength	InStrength	Betweenness
adapt	High	1.000000	0.215346	1.0
cohesion	High	0.956290	0.814888	0.0
consensus	High	0.916568	2.952482	30.0
coregulate	High	0.986577	0.436432	0.0
discuss	High	0.830754	1.124025	16.0
emotion	High	0.937419	1.000801	5.0
monitor	High	0.981043	0.300815	0.0
plan	High	0.672199	1.268773	11.0
synthesis	High	1.000000	0.167289	7.0
adapt	Low	1.000000	0.452279	4.0
cohesion	Low	0.993395	0.798538	0.0
consensus	Low	0.919646	2.415674	25.0
coregulate	Low	0.968401	0.689185	3.0
discuss	Low	0.778747	1.214121	14.0
emotion	Low	0.905983	0.807917	3.0
monitor	Low	0.982500	0.392745	0.0
plan	Low	0.584686	1.146514	8.0
synthesis	Low	1.000000	0.216385	0.0

	edge_name	diff_true	effect_size	p_value
38	consensus -> discuss	0.096072	9.800758	0.001996
76	discuss -> synthesis	-0.070251	-6.280752	0.001996
71	synthesis -> plan	0.119821	5.892463	0.001996
70	plan -> plan	-0.087513	-6.955485	0.001996
65	consensus -> plan	-0.067687	-5.392796	0.001996
56	consensus -> monitor	-0.024207	-4.640464	0.001996
55	cohesion -> monitor	-0.035783	-3.962377	0.001996
52	plan -> emotion	0.066760	7.404535	0.001996
48	coregulate -> emotion	0.057669	3.479972	0.001996
41	emotion -> discuss	0.044235	3.682664	0.001996
40	discuss -> discuss	-0.052006	-4.281572	0.001996
39	coregulate -> discuss	-0.070863	-3.543739	0.001996
37	cohesion -> discuss	-0.042712	-3.940997	0.001996
32	emotion -> coregulate	-0.024171	-3.693955	0.001996
79	plan -> synthesis	0.003152	2.898779	0.001996
10	cohesion -> cohesion	0.037105	4.679806	0.001996
13	discuss -> cohesion	0.029053	4.234649	0.001996
19	cohesion -> consensus	0.085785	3.377973	0.001996
22	discuss -> consensus	0.210284	14.505543	0.001996
26	synthesis -> consensus	0.190513	4.781482	0.001996
8	synthesis -> adapt	-0.158254	-4.827859	0.001996
11	consensus -> cohesion	0.010559	3.786004	0.001996
28	cohesion -> coregulate	-0.085423	-5.210336	0.001996
29	consensus -> coregulate	-0.036023	-3.396109	0.001996
4	discuss -> adapt	-0.096159	-11.276056	0.001996
35	synthesis -> coregulate	-0.052456	-3.108636	0.003992
44	synthesis -> discuss	-0.059458	-3.182909	0.003992
47	consensus -> emotion	0.018719	2.944978	0.003992
21	coregulate -> consensus	-0.047633	-3.175563	0.003992
30	coregulate -> coregulate	-0.018176	-2.675244	0.003992
34	plan -> coregulate	0.012527	3.697559	0.003992
16	plan -> cohesion	0.011864	2.942219	0.003992
50	emotion -> emotion	-0.031436	-2.888892	0.005988
66	coregulate -> plan	0.049676	2.613874	0.009980
31	discuss -> coregulate	-0.024118	-2.733761	0.009980
77	emotion -> synthesis	0.005161	2.661137	0.009980
62	synthesis -> monitor	-0.021390	-2.597739	0.011976
58	discuss -> monitor	-0.011759	-2.517596	0.013972
43	plan -> discuss	-0.014566	-2.266660	0.019960
27	adapt -> coregulate	-0.029891	-2.360321	0.023952
73	cohesion -> synthesis	0.006397	2.195816	0.033932
3	coregulate -> adapt	0.011219	1.999853	0.049900

	pattern	freq_High	freq_Low	prop_High	prop_Low
40	discuss->consensus	851	418	0.066897	0.032626
69	plan->plan	948	1356	0.074522	0.105838
24	consensus->discuss	789	401	0.062023	0.031299
38	discuss->adapt	48	234	0.003773	0.018264
14	cohesion->consensus	503	341	0.039541	0.026616
67	plan->emotion	527	377	0.041428	0.029426
48	emotion->consensus	521	388	0.040956	0.030284
46	discuss->synthesis	213	344	0.016744	0.026850
33	coregulate->discuss	210	329	0.016508	0.025679
10	adapt->consensus	73	170	0.005739	0.013269

	pattern	freq_High	freq_Low	prop_High	prop_Low	effect_size	p_value
0	adapt	155	399	0.011297	0.028888	15.838830	0.089109
1	cohesion	1018	821	0.074193	0.059441	7.453087	0.089109
2	consensus	3651	3146	0.266088	0.227773	14.827639	0.089109
3	coregulate	959	1174	0.069893	0.084999	6.394814	0.089109
4	emotion	1686	1389	0.122877	0.100565	11.488188	0.089109
5	monitor	668	848	0.048684	0.061396	6.937058	0.089109
6	plan	3102	3521	0.226077	0.254923	7.138419	0.089109
7	synthesis	316	413	0.023030	0.029902	4.255809	0.089109
8	adapt->cohesion	37	102	0.002909	0.007961	8.969993	0.772277
9	adapt->consensus	73	170	0.005739	0.013269	8.197910	0.772277
10	adapt->discuss	5	25	0.000393	0.001951	5.433557	0.772277
11	cohesion->cohesion	41	5	0.003223	0.000390	8.096168	0.772277
12	cohesion->consensus	503	341	0.039541	0.026616	9.826334	0.772277
13	cohesion->monitor	16	40	0.001258	0.003122	4.703406	0.772277
14	cohesion->plan	142	97	0.011163	0.007571	3.205518	0.772277

	n_states	type	scaling	n_edges	density	mean_weight	max_weight	has_self_loops
group
Tactic 1	9	relative	[]	78	0.962963	0.115385	0.476440	True
Tactic 2	9	relative	[]	78	0.962963	0.115385	0.492308	True
Tactic 3	9	relative	[]	76	0.938272	0.118421	0.542373	True

Transition Network Analysis (TNA) Tutorial¶

Introduction¶

Why TNA?¶

The Building Blocks of TNA¶

1. Installation & Setup¶

2. Getting Started with Long-Format Data¶

3. Understanding prepare_data()¶

Alternative Input Formats¶

Importing One-Hot Encoded Data¶

4. Building the TNA Model¶

5. Visualizations¶

5.1 Transition Network Plot¶

5.2 Histogram of Edge Weights¶

5.3 Frequency Distribution of States¶

5.4 Mosaic Plot¶

6. Pruning¶

7. Patterns: Cliques¶

8. Centralities¶

8.1 Node-Level Centrality Measures¶

8.2 Edge-Level Measures: Edge Betweenness¶

8.3 Centrality Stability¶

9. Community Detection¶

10. Bootstrapping¶

10.1 Why Bootstrap?¶

10.2 Results¶

10.3 Bootstrapped Network¶

11. Sequence Plots¶

12. Group Models¶

12.1 Permutation Testing Between Groups¶

12.2 Difference Network¶

12.3 Comparing Sequence Patterns¶

13. Sequence Clustering (Tactics)¶

13.1 Why Cluster Sequences?¶

13.2 Distance Metrics¶

13.3 Clustering Methods¶

13.4 Choosing the Number of Clusters¶

13.5 Building TNA Models per Tactic¶

14. Complete Workflow at a Glance¶

3. Understanding `prepare_data()`¶