{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOPlCR4Xyzt0objFeCNDNUM",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/carlthome/0ca8e2624654358c3e9442dd9d089a1d/welch-s-t-test-for-independent-samples.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81
        },
        "id": "ifVSETFJpJyL",
        "outputId": "30b0616c-71a0-4271-de36-6c99a6bc98d6"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   Degree of freedom 1  Degree of freedom 2    T-test   p-value\n",
              "0                    8                   29  1.556304  0.136493"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-53db22ef-667e-460f-b7ac-c4f7c3a48695\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Degree of freedom 1</th>\n",
              "      <th>Degree of freedom 2</th>\n",
              "      <th>T-test</th>\n",
              "      <th>p-value</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>8</td>\n",
              "      <td>29</td>\n",
              "      <td>1.556304</td>\n",
              "      <td>0.136493</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-53db22ef-667e-460f-b7ac-c4f7c3a48695')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-53db22ef-667e-460f-b7ac-c4f7c3a48695 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-53db22ef-667e-460f-b7ac-c4f7c3a48695');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "summary": "{\n  \"name\": \"pd\",\n  \"rows\": 1,\n  \"fields\": [\n    {\n      \"column\": \"Degree of freedom 1\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": null,\n        \"min\": 8,\n        \"max\": 8,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          8\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Degree of freedom 2\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": null,\n        \"min\": 29,\n        \"max\": 29,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          29\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"T-test\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": null,\n        \"min\": 1.556304247842829,\n        \"max\": 1.556304247842829,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          1.556304247842829\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"p-value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": null,\n        \"min\": 0.13649272437710608,\n        \"max\": 0.13649272437710608,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          0.13649272437710608\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 1
        }
      ],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import scipy.stats as stats\n",
        "\n",
        "\n",
        "def compute_t_test():\n",
        "    # Create synthetic data for testing.\n",
        "    group1 = np.random.normal(0.7, 0.4, size=9)\n",
        "    group2 = np.random.normal(0.5, 0.4, size=30)\n",
        "\n",
        "    # Compute mean, variance and size of groups.\n",
        "    variance1 = np.var(group1, ddof=1)\n",
        "    variance2 = np.var(group2, ddof=1)\n",
        "\n",
        "    df1 = len(group1) - 1\n",
        "    df2 = len(group2) - 1\n",
        "\n",
        "    mean1 = np.mean(group1)\n",
        "    mean2 = np.mean(group2)\n",
        "\n",
        "    size1 = len(group1)\n",
        "    size2 = len(group2)\n",
        "\n",
        "    # T-test for means of two independent samples from descriptive statistics.\n",
        "    # This is a test for the null hypothesis that two independent samples have identical average (expected) values.\n",
        "    t_test, p_value = stats.ttest_ind_from_stats(\n",
        "        mean1=mean1, std1=np.sqrt(variance1), nobs1=size1,\n",
        "        mean2=mean2, std2=np.sqrt(variance2), nobs2=size2,\n",
        "        equal_var=False,\n",
        "    )\n",
        "\n",
        "    # Collect results.\n",
        "    scores = {\n",
        "        \"Degree of freedom 1\": df1,\n",
        "        \"Degree of freedom 2\": df2,\n",
        "        \"T-test\": t_test,\n",
        "        \"p-value\": p_value,\n",
        "    }\n",
        "    return scores\n",
        "\n",
        "\n",
        "scores = compute_t_test()\n",
        "pd.DataFrame(scores, index=[0])"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit compute_t_test()"
      ],
      "metadata": {
        "id": "9kb8JmaP_WwW",
        "outputId": "3bc961e6-6d53-4791-d595-013f7976157a",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "186 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "n = 0\n",
        "m = 10000\n",
        "for _ in range(m):\n",
        "    scores = compute_t_test()\n",
        "    n += int(scores[\"p-value\"] < 0.05)\n",
        "n/m"
      ],
      "metadata": {
        "id": "JtlHWmpS9xW7",
        "outputId": "5834083e-a686-402a-911e-04abf7957f38",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.2266"
            ]
          },
          "metadata": {},
          "execution_count": 3
        }
      ]
    }
  ]
}