welch-s-t-test-for-independent-samples

2024-03-27 16:12

Carl Thomé

Tags:

test
welch

In [1]:

import numpy as np
import pandas as pd
import scipy.stats as stats


def compute_t_test():
    # Create synthetic data for testing.
    group1 = np.random.normal(0.7, 0.4, size=9)
    group2 = np.random.normal(0.5, 0.4, size=30)

    # Compute mean, variance and size of groups.
    variance1 = np.var(group1, ddof=1)
    variance2 = np.var(group2, ddof=1)

    df1 = len(group1) - 1
    df2 = len(group2) - 1

    mean1 = np.mean(group1)
    mean2 = np.mean(group2)

    size1 = len(group1)
    size2 = len(group2)

    # T-test for means of two independent samples from descriptive statistics.
    # This is a test for the null hypothesis that two independent samples have identical average (expected) values.
    t_test, p_value = stats.ttest_ind_from_stats(
        mean1=mean1, std1=np.sqrt(variance1), nobs1=size1,
        mean2=mean2, std2=np.sqrt(variance2), nobs2=size2,
        equal_var=False,
    )

    # Collect results.
    scores = {
        "Degree of freedom 1": df1,
        "Degree of freedom 2": df2,
        "T-test": t_test,
        "p-value": p_value,
    }
    return scores


scores = compute_t_test()
pd.DataFrame(scores, index=[0])

Out[1]:

	Degree of freedom 1	Degree of freedom 2	T-test	p-value
0	8	29	1.556304	0.136493

In [2]:

%timeit compute_t_test()

186 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [3]:

n = 0
m = 10000
for _ in range(m):
    scores = compute_t_test()
    n += int(scores["p-value"] < 0.05)
n/m

Out[3]:

0.2266

Comments