{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyOPlCR4Xyzt0objFeCNDNUM", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 81 }, "id": "ifVSETFJpJyL", "outputId": "30b0616c-71a0-4271-de36-6c99a6bc98d6" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Degree of freedom 1 Degree of freedom 2 T-test p-value\n", "0 8 29 1.556304 0.136493" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Degree of freedom 1Degree of freedom 2T-testp-value
08291.5563040.136493
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"pd\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"Degree of freedom 1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 8,\n \"max\": 8,\n \"num_unique_values\": 1,\n \"samples\": [\n 8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Degree of freedom 2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 29,\n \"max\": 29,\n \"num_unique_values\": 1,\n \"samples\": [\n 29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"T-test\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 1.556304247842829,\n \"max\": 1.556304247842829,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.556304247842829\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"p-value\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.13649272437710608,\n \"max\": 0.13649272437710608,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.13649272437710608\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 1 } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as stats\n", "\n", "\n", "def compute_t_test():\n", " # Create synthetic data for testing.\n", " group1 = np.random.normal(0.7, 0.4, size=9)\n", " group2 = np.random.normal(0.5, 0.4, size=30)\n", "\n", " # Compute mean, variance and size of groups.\n", " variance1 = np.var(group1, ddof=1)\n", " variance2 = np.var(group2, ddof=1)\n", "\n", " df1 = len(group1) - 1\n", " df2 = len(group2) - 1\n", "\n", " mean1 = np.mean(group1)\n", " mean2 = np.mean(group2)\n", "\n", " size1 = len(group1)\n", " size2 = len(group2)\n", "\n", " # T-test for means of two independent samples from descriptive statistics.\n", " # This is a test for the null hypothesis that two independent samples have identical average (expected) values.\n", " t_test, p_value = stats.ttest_ind_from_stats(\n", " mean1=mean1, std1=np.sqrt(variance1), nobs1=size1,\n", " mean2=mean2, std2=np.sqrt(variance2), nobs2=size2,\n", " equal_var=False,\n", " )\n", "\n", " # Collect results.\n", " scores = {\n", " \"Degree of freedom 1\": df1,\n", " \"Degree of freedom 2\": df2,\n", " \"T-test\": t_test,\n", " \"p-value\": p_value,\n", " }\n", " return scores\n", "\n", "\n", "scores = compute_t_test()\n", "pd.DataFrame(scores, index=[0])" ] }, { "cell_type": "code", "source": [ "%timeit compute_t_test()" ], "metadata": { "id": "9kb8JmaP_WwW", "outputId": "3bc961e6-6d53-4791-d595-013f7976157a", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "186 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ] }, { "cell_type": "code", "source": [ "n = 0\n", "m = 10000\n", "for _ in range(m):\n", " scores = compute_t_test()\n", " n += int(scores[\"p-value\"] < 0.05)\n", "n/m" ], "metadata": { "id": "JtlHWmpS9xW7", "outputId": "5834083e-a686-402a-911e-04abf7957f38", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.2266" ] }, "metadata": {}, "execution_count": 3 } ] } ] }