{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Multiple Linear Regression\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preliminaries\n", "As before, we need to start by:\n", "1. Loading the Pandas and Statsmodels libraries\n", "2. Reading the data from a CSV file\n", "3. Fixing the column names using Panda's `rename()` method\n", "4. Converting the \"AirEntrain\" column to a categorical variable" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | No | \n", "Cement | \n", "Slag | \n", "FlyAsh | \n", "Water | \n", "SP | \n", "CoarseAgg | \n", "FineAgg | \n", "AirEntrain | \n", "Strength | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "273.0 | \n", "82.0 | \n", "105.0 | \n", "210.0 | \n", "9.0 | \n", "904.0 | \n", "680.0 | \n", "No | \n", "34.990 | \n", "
1 | \n", "2 | \n", "163.0 | \n", "149.0 | \n", "191.0 | \n", "180.0 | \n", "12.0 | \n", "843.0 | \n", "746.0 | \n", "Yes | \n", "32.272 | \n", "
2 | \n", "3 | \n", "162.0 | \n", "148.0 | \n", "191.0 | \n", "179.0 | \n", "16.0 | \n", "840.0 | \n", "743.0 | \n", "Yes | \n", "35.450 | \n", "
Dep. Variable: | Strength | R-squared: | 0.827 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.812 | \n", "
Method: | Least Squares | F-statistic: | 56.21 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 1.68e-32 | \n", "
Time: | 07:11:31 | Log-Likelihood: | -284.49 | \n", "
No. Observations: | 103 | AIC: | 587.0 | \n", "
Df Residuals: | 94 | BIC: | 610.7 | \n", "
Df Model: | 8 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
const | 115.2834 | 142.786 | 0.807 | 0.421 | -168.222 | 398.789 | \n", "
No | -0.0077 | 0.021 | -0.372 | 0.711 | -0.049 | 0.033 | \n", "
Cement | 0.0826 | 0.047 | 1.758 | 0.082 | -0.011 | 0.176 | \n", "
Slag | -0.0225 | 0.065 | -0.346 | 0.730 | -0.152 | 0.107 | \n", "
FlyAsh | 0.0668 | 0.048 | 1.380 | 0.171 | -0.029 | 0.163 | \n", "
Water | -0.2165 | 0.142 | -1.520 | 0.132 | -0.499 | 0.066 | \n", "
SP | 0.2518 | 0.213 | 1.181 | 0.241 | -0.172 | 0.675 | \n", "
CoarseAgg | -0.0479 | 0.056 | -0.857 | 0.393 | -0.159 | 0.063 | \n", "
FineAgg | -0.0356 | 0.057 | -0.622 | 0.536 | -0.149 | 0.078 | \n", "
Omnibus: | 2.168 | Durbin-Watson: | 1.715 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.338 | Jarque-Bera (JB): | 2.183 | \n", "
Skew: | -0.309 | Prob(JB): | 0.336 | \n", "
Kurtosis: | 2.644 | Cond. No. | 4.36e+05 | \n", "
\n", " | No | \n", "Yes | \n", "
---|---|---|
0 | \n", "1 | \n", "0 | \n", "
1 | \n", "0 | \n", "1 | \n", "
2 | \n", "0 | \n", "1 | \n", "
3 | \n", "1 | \n", "0 | \n", "
4 | \n", "1 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "
98 | \n", "1 | \n", "0 | \n", "
99 | \n", "1 | \n", "0 | \n", "
100 | \n", "0 | \n", "1 | \n", "
101 | \n", "0 | \n", "1 | \n", "
102 | \n", "1 | \n", "0 | \n", "
103 rows × 2 columns
\n", "\n", " | AirEntrain_Yes | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "1 | \n", "
2 | \n", "1 | \n", "
\n", " | AirEntrain_Yes | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "1 | \n", "
2 | \n", "1 | \n", "
\n", " | const | \n", "No | \n", "Cement | \n", "Slag | \n", "FlyAsh | \n", "Water | \n", "SP | \n", "CoarseAgg | \n", "FineAgg | \n", "AirEntrain_Yes | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1.0 | \n", "1 | \n", "273.0 | \n", "82.0 | \n", "105.0 | \n", "210.0 | \n", "9.0 | \n", "904.0 | \n", "680.0 | \n", "0 | \n", "
1 | \n", "1.0 | \n", "2 | \n", "163.0 | \n", "149.0 | \n", "191.0 | \n", "180.0 | \n", "12.0 | \n", "843.0 | \n", "746.0 | \n", "1 | \n", "
2 | \n", "1.0 | \n", "3 | \n", "162.0 | \n", "148.0 | \n", "191.0 | \n", "179.0 | \n", "16.0 | \n", "840.0 | \n", "743.0 | \n", "1 | \n", "
3 | \n", "1.0 | \n", "4 | \n", "162.0 | \n", "148.0 | \n", "190.0 | \n", "179.0 | \n", "19.0 | \n", "838.0 | \n", "741.0 | \n", "0 | \n", "
4 | \n", "1.0 | \n", "5 | \n", "154.0 | \n", "112.0 | \n", "144.0 | \n", "220.0 | \n", "10.0 | \n", "923.0 | \n", "658.0 | \n", "0 | \n", "
Dep. Variable: | Strength | R-squared: | 0.924 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.916 | \n", "
Method: | Least Squares | F-statistic: | 125.1 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 5.83e-48 | \n", "
Time: | 07:11:31 | Log-Likelihood: | -242.38 | \n", "
No. Observations: | 103 | AIC: | 504.8 | \n", "
Df Residuals: | 93 | BIC: | 531.1 | \n", "
Df Model: | 9 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
const | 41.5005 | 95.617 | 0.434 | 0.665 | -148.375 | 231.376 | \n", "
No | -0.0173 | 0.014 | -1.251 | 0.214 | -0.045 | 0.010 | \n", "
Cement | 0.0962 | 0.031 | 3.063 | 0.003 | 0.034 | 0.159 | \n", "
Slag | 0.0157 | 0.044 | 0.359 | 0.720 | -0.071 | 0.102 | \n", "
FlyAsh | 0.0869 | 0.032 | 2.684 | 0.009 | 0.023 | 0.151 | \n", "
Water | -0.1380 | 0.095 | -1.446 | 0.151 | -0.328 | 0.051 | \n", "
SP | 0.1902 | 0.143 | 1.334 | 0.186 | -0.093 | 0.473 | \n", "
CoarseAgg | -0.0160 | 0.037 | -0.428 | 0.669 | -0.090 | 0.058 | \n", "
FineAgg | -0.0021 | 0.038 | -0.053 | 0.957 | -0.078 | 0.074 | \n", "
AirEntrain_Yes | -6.0683 | 0.559 | -10.848 | 0.000 | -7.179 | -4.957 | \n", "
Omnibus: | 4.217 | Durbin-Watson: | 1.637 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.121 | Jarque-Bera (JB): | 3.635 | \n", "
Skew: | 0.351 | Prob(JB): | 0.162 | \n", "
Kurtosis: | 3.594 | Cond. No. | 4.37e+05 | \n", "
Dep. Variable: | Strength | R-squared: | 0.924 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.916 | \n", "
Method: | Least Squares | F-statistic: | 125.1 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 5.83e-48 | \n", "
Time: | 07:11:31 | Log-Likelihood: | -242.38 | \n", "
No. Observations: | 103 | AIC: | 504.8 | \n", "
Df Residuals: | 93 | BIC: | 531.1 | \n", "
Df Model: | 9 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
Intercept | 41.5005 | 95.617 | 0.434 | 0.665 | -148.375 | 231.376 | \n", "
AirEntrain[T.Yes] | -6.0683 | 0.559 | -10.848 | 0.000 | -7.179 | -4.957 | \n", "
No | -0.0173 | 0.014 | -1.251 | 0.214 | -0.045 | 0.010 | \n", "
Cement | 0.0962 | 0.031 | 3.063 | 0.003 | 0.034 | 0.159 | \n", "
Slag | 0.0157 | 0.044 | 0.359 | 0.720 | -0.071 | 0.102 | \n", "
Water | -0.1380 | 0.095 | -1.446 | 0.151 | -0.328 | 0.051 | \n", "
CoarseAgg | -0.0160 | 0.037 | -0.428 | 0.669 | -0.090 | 0.058 | \n", "
FlyAsh | 0.0869 | 0.032 | 2.684 | 0.009 | 0.023 | 0.151 | \n", "
SP | 0.1902 | 0.143 | 1.334 | 0.186 | -0.093 | 0.473 | \n", "
FineAgg | -0.0021 | 0.038 | -0.053 | 0.957 | -0.078 | 0.074 | \n", "
Omnibus: | 4.217 | Durbin-Watson: | 1.637 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.121 | Jarque-Bera (JB): | 3.635 | \n", "
Skew: | 0.351 | Prob(JB): | 0.162 | \n", "
Kurtosis: | 3.594 | Cond. No. | 4.37e+05 | \n", "
\n", " | No | \n", "Cement | \n", "Slag | \n", "FlyAsh | \n", "Water | \n", "SP | \n", "CoarseAgg | \n", "FineAgg | \n", "Strength | \n", "
---|---|---|---|---|---|---|---|---|---|
No | \n", "1.00 | \n", "-0.03 | \n", "-0.08 | \n", "0.34 | \n", "-0.14 | \n", "-0.33 | \n", "0.22 | \n", "-0.31 | \n", "0.19 | \n", "
Cement | \n", "-0.03 | \n", "1.00 | \n", "-0.24 | \n", "-0.49 | \n", "0.22 | \n", "-0.11 | \n", "-0.31 | \n", "0.06 | \n", "0.46 | \n", "
Slag | \n", "-0.08 | \n", "-0.24 | \n", "1.00 | \n", "-0.32 | \n", "-0.03 | \n", "0.31 | \n", "-0.22 | \n", "-0.18 | \n", "-0.33 | \n", "
FlyAsh | \n", "0.34 | \n", "-0.49 | \n", "-0.32 | \n", "1.00 | \n", "-0.24 | \n", "-0.14 | \n", "0.17 | \n", "-0.28 | \n", "0.41 | \n", "
Water | \n", "-0.14 | \n", "0.22 | \n", "-0.03 | \n", "-0.24 | \n", "1.00 | \n", "-0.16 | \n", "-0.60 | \n", "0.11 | \n", "-0.22 | \n", "
SP | \n", "-0.33 | \n", "-0.11 | \n", "0.31 | \n", "-0.14 | \n", "-0.16 | \n", "1.00 | \n", "-0.10 | \n", "0.06 | \n", "-0.02 | \n", "
CoarseAgg | \n", "0.22 | \n", "-0.31 | \n", "-0.22 | \n", "0.17 | \n", "-0.60 | \n", "-0.10 | \n", "1.00 | \n", "-0.49 | \n", "-0.15 | \n", "
FineAgg | \n", "-0.31 | \n", "0.06 | \n", "-0.18 | \n", "-0.28 | \n", "0.11 | \n", "0.06 | \n", "-0.49 | \n", "1.00 | \n", "-0.17 | \n", "
Strength | \n", "0.19 | \n", "0.46 | \n", "-0.33 | \n", "0.41 | \n", "-0.22 | \n", "-0.02 | \n", "-0.15 | \n", "-0.17 | \n", "1.00 | \n", "
Dep. Variable: | Strength | R-squared: | 0.924 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.917 | \n", "
Method: | Least Squares | F-statistic: | 142.2 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 4.73e-49 | \n", "
Time: | 07:12:11 | Log-Likelihood: | -242.38 | \n", "
No. Observations: | 103 | AIC: | 502.8 | \n", "
Df Residuals: | 94 | BIC: | 526.5 | \n", "
Df Model: | 8 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
const | 36.4097 | 8.674 | 4.197 | 0.000 | 19.186 | 53.633 | \n", "
No | -0.0178 | 0.011 | -1.674 | 0.097 | -0.039 | 0.003 | \n", "
Cement | 0.0978 | 0.005 | 18.070 | 0.000 | 0.087 | 0.109 | \n", "
Slag | 0.0180 | 0.006 | 2.819 | 0.006 | 0.005 | 0.031 | \n", "
FlyAsh | 0.0887 | 0.005 | 17.367 | 0.000 | 0.079 | 0.099 | \n", "
Water | -0.1330 | 0.019 | -7.131 | 0.000 | -0.170 | -0.096 | \n", "
SP | 0.1950 | 0.109 | 1.791 | 0.077 | -0.021 | 0.411 | \n", "
CoarseAgg | -0.0141 | 0.005 | -2.964 | 0.004 | -0.023 | -0.005 | \n", "
AirEntrain_Yes | -6.0707 | 0.555 | -10.946 | 0.000 | -7.172 | -4.970 | \n", "
Omnibus: | 4.255 | Durbin-Watson: | 1.637 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.119 | Jarque-Bera (JB): | 3.680 | \n", "
Skew: | 0.352 | Prob(JB): | 0.159 | \n", "
Kurtosis: | 3.601 | Cond. No. | 3.15e+04 | \n", "
Dep. Variable: | Strength | R-squared: | 0.924 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.917 | \n", "
Method: | Least Squares | F-statistic: | 142.2 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 4.73e-49 | \n", "
Time: | 07:12:11 | Log-Likelihood: | -242.38 | \n", "
No. Observations: | 103 | AIC: | 502.8 | \n", "
Df Residuals: | 94 | BIC: | 526.5 | \n", "
Df Model: | 8 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
Intercept | 36.4097 | 8.674 | 4.197 | 0.000 | 19.186 | 53.633 | \n", "
AirEntrain[T.Yes] | -6.0707 | 0.555 | -10.946 | 0.000 | -7.172 | -4.970 | \n", "
No | -0.0178 | 0.011 | -1.674 | 0.097 | -0.039 | 0.003 | \n", "
Cement | 0.0978 | 0.005 | 18.070 | 0.000 | 0.087 | 0.109 | \n", "
Slag | 0.0180 | 0.006 | 2.819 | 0.006 | 0.005 | 0.031 | \n", "
Water | -0.1330 | 0.019 | -7.131 | 0.000 | -0.170 | -0.096 | \n", "
CoarseAgg | -0.0141 | 0.005 | -2.964 | 0.004 | -0.023 | -0.005 | \n", "
FlyAsh | 0.0887 | 0.005 | 17.367 | 0.000 | 0.079 | 0.099 | \n", "
SP | 0.1950 | 0.109 | 1.791 | 0.077 | -0.021 | 0.411 | \n", "
Omnibus: | 4.255 | Durbin-Watson: | 1.637 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.119 | Jarque-Bera (JB): | 3.680 | \n", "
Skew: | 0.352 | Prob(JB): | 0.159 | \n", "
Kurtosis: | 3.601 | Cond. No. | 3.15e+04 | \n", "
\n", " | mean | \n", "std dev | \n", "
---|---|---|
const | \n", "1.0 | \n", "0.0 | \n", "
No | \n", "-0.0 | \n", "1.0 | \n", "
Cement | \n", "0.0 | \n", "1.0 | \n", "
Slag | \n", "0.0 | \n", "1.0 | \n", "
FlyAsh | \n", "-0.0 | \n", "1.0 | \n", "
Water | \n", "-0.0 | \n", "1.0 | \n", "
SP | \n", "0.0 | \n", "1.0 | \n", "
CoarseAgg | \n", "0.0 | \n", "1.0 | \n", "
AirEntrain_Yes | \n", "-0.0 | \n", "1.0 | \n", "
Dep. Variable: | Strength | R-squared: | 0.924 | \n", "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.917 | \n", "
Method: | Least Squares | F-statistic: | 142.2 | \n", "
Date: | Fri, 12 Nov 2021 | Prob (F-statistic): | 4.73e-49 | \n", "
Time: | 07:12:12 | Log-Likelihood: | -13.650 | \n", "
No. Observations: | 103 | AIC: | 45.30 | \n", "
Df Residuals: | 94 | BIC: | 69.01 | \n", "
Df Model: | 8 | \n", " | |
Covariance Type: | nonrobust | \n", " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", "|
---|---|---|---|---|---|---|
const | 1.665e-16 | 0.028 | 5.84e-15 | 1.000 | -0.057 | 0.057 | \n", "
No | -0.0575 | 0.034 | -1.674 | 0.097 | -0.126 | 0.011 | \n", "
Cement | 0.8336 | 0.046 | 18.070 | 0.000 | 0.742 | 0.925 | \n", "
Slag | 0.1175 | 0.042 | 2.819 | 0.006 | 0.035 | 0.200 | \n", "
FlyAsh | 0.8180 | 0.047 | 17.367 | 0.000 | 0.724 | 0.911 | \n", "
Water | -0.2903 | 0.041 | -7.131 | 0.000 | -0.371 | -0.209 | \n", "
SP | 0.0591 | 0.033 | 1.791 | 0.077 | -0.006 | 0.125 | \n", "
CoarseAgg | -0.1342 | 0.045 | -2.964 | 0.004 | -0.224 | -0.044 | \n", "
AirEntrain_Yes | -0.3282 | 0.030 | -10.946 | 0.000 | -0.388 | -0.269 | \n", "
Omnibus: | 4.255 | Durbin-Watson: | 1.637 | \n", "
---|---|---|---|
Prob(Omnibus): | 0.119 | Jarque-Bera (JB): | 3.680 | \n", "
Skew: | 0.352 | Prob(JB): | 0.159 | \n", "
Kurtosis: | 3.601 | Cond. No. | 4.11 | \n", "
\n", " | No | \n", "Cement | \n", "Slag | \n", "FlyAsh | \n", "Water | \n", "SP | \n", "CoarseAgg | \n", "FineAgg | \n", "AirEntrain_Yes | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "273.0 | \n", "82.0 | \n", "105.0 | \n", "210.0 | \n", "9.0 | \n", "904.0 | \n", "680.0 | \n", "0 | \n", "
1 | \n", "2 | \n", "163.0 | \n", "149.0 | \n", "191.0 | \n", "180.0 | \n", "12.0 | \n", "843.0 | \n", "746.0 | \n", "1 | \n", "
2 | \n", "3 | \n", "162.0 | \n", "148.0 | \n", "191.0 | \n", "179.0 | \n", "16.0 | \n", "840.0 | \n", "743.0 | \n", "1 | \n", "
\n", " | variable | \n", "coefficient | \n", "
---|---|---|
0 | \n", "No | \n", "-0.017346 | \n", "
1 | \n", "Cement | \n", "0.096195 | \n", "
2 | \n", "Slag | \n", "0.015681 | \n", "
3 | \n", "FlyAsh | \n", "0.086950 | \n", "
4 | \n", "Water | \n", "-0.138011 | \n", "
5 | \n", "SP | \n", "0.190158 | \n", "
6 | \n", "CoarseAgg | \n", "-0.016038 | \n", "
7 | \n", "FineAgg | \n", "-0.002053 | \n", "
8 | \n", "AirEntrain_Yes | \n", "-6.068252 | \n", "
\n", " | feature_idx | \n", "cv_scores | \n", "avg_score | \n", "feature_names | \n", "ci_bound | \n", "std_dev | \n", "std_err | \n", "
---|---|---|---|---|---|---|---|
9 | \n", "(0, 1, 2, 3, 4, 5, 6, 7, 8) | \n", "[0.5998230505243064, 0.8609825359671671, 0.776... | \n", "0.482171 | \n", "(No, Cement, Slag, FlyAsh, Water, SP, CoarseAg... | \n", "0.249123 | \n", "0.656901 | \n", "0.121983 | \n", "
8 | \n", "(0, 1, 2, 3, 5, 6, 7, 8) | \n", "[0.7198251274370655, 0.8714981991943249, 0.754... | \n", "0.538054 | \n", "(No, Cement, Slag, FlyAsh, SP, CoarseAgg, Fine... | \n", "0.174763 | \n", "0.460825 | \n", "0.085573 | \n", "
7 | \n", "(1, 2, 3, 5, 6, 7, 8) | \n", "[0.36909818696153174, 0.8029635284896339, 0.88... | \n", "0.489133 | \n", "(Cement, Slag, FlyAsh, SP, CoarseAgg, FineAgg,... | \n", "0.193566 | \n", "0.510403 | \n", "0.09478 | \n", "
6 | \n", "(1, 2, 3, 6, 7, 8) | \n", "[-0.8836565141299193, 0.706855501290562, 0.793... | \n", "0.383902 | \n", "(Cement, Slag, FlyAsh, CoarseAgg, FineAgg, Air... | \n", "0.234757 | \n", "0.619018 | \n", "0.114949 | \n", "
5 | \n", "(1, 2, 3, 7, 8) | \n", "[-1.069902143989971, 0.7714511447380333, 0.721... | \n", "0.203047 | \n", "(Cement, Slag, FlyAsh, FineAgg, AirEntrain_Yes) | \n", "0.312601 | \n", "0.824281 | \n", "0.153065 | \n", "
4 | \n", "(1, 2, 3, 8) | \n", "[-1.4776700426094087, 0.7791381638911108, 0.97... | \n", "0.1767 | \n", "(Cement, Slag, FlyAsh, AirEntrain_Yes) | \n", "0.364705 | \n", "0.961673 | \n", "0.178578 | \n", "
3 | \n", "(1, 3, 8) | \n", "[-2.8515869804653056, 0.7005985979007634, 0.85... | \n", "-0.118087 | \n", "(Cement, FlyAsh, AirEntrain_Yes) | \n", "0.610482 | \n", "1.609749 | \n", "0.298923 | \n", "
2 | \n", "(1, 3) | \n", "[-3.062125555844749, 0.2731746363441382, 0.902... | \n", "-0.732105 | \n", "(Cement, FlyAsh) | \n", "0.797207 | \n", "2.102115 | \n", "0.390353 | \n", "
1 | \n", "(1,) | \n", "[-3.449038765590842, 0.011736536060135583, 0.5... | \n", "-3.406733 | \n", "(Cement,) | \n", "2.042631 | \n", "5.386109 | \n", "1.000175 | \n", "