# Simple Linear Regression

## Import Libraries

In [1]:
# import libraries
import pandas as pd
from statsmodels.formula.api import ols

## Load and Verify Dataset

In [2]:
# load dataset and create dataframe
df = pd.read_csv('data/edincome.csv').round(1)

In [3]:
# verify first few records
df.head()

Unnamed: 0,Education,Income
0,10.0,32.1
1,10.4,36.5
2,10.7,23.9
3,11.1,52.3
4,11.4,30.2


## Run Regression

In [4]:
slr = ols('Income ~ Education',df).fit()

## Review Results and Evaluate Model

In [5]:
print(slr.params)

Intercept   -23.176365
Education     5.574237
dtype: float64


In [6]:
slr.summary()

0,1,2,3
Dep. Variable:,Income,R-squared:,0.878
Model:,OLS,Adj. R-squared:,0.875
Method:,Least Squares,F-statistic:,238.4
Date:,"Mon, 06 Dec 2021",Prob (F-statistic):,1.17e-16
Time:,12:57:46,Log-Likelihood:,-119.61
No. Observations:,35,AIC:,243.2
Df Residuals:,33,BIC:,246.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-23.1764,5.918,-3.917,0.000,-35.216,-11.137
Education,5.5742,0.361,15.440,0.000,4.840,6.309

0,1,2,3
Omnibus:,2.854,Durbin-Watson:,2.535
Prob(Omnibus):,0.24,Jarque-Bera (JB):,1.726
Skew:,0.502,Prob(JB):,0.422
Kurtosis:,3.42,Cond. No.,75.8


In [7]:
print(slr.rsquared)

0.8784032808796992


In [8]:
print(slr.mse_model)

13766.191657863852


## Generate Predictions

In [9]:
# predict new points
data = {'Education': [12,16,18]}
df_predict = pd.DataFrame(data).round(1)

In [10]:
df_predict['Income'] = slr.predict(df_predict).round(1)

In [11]:
df_predict

Unnamed: 0,Education,Income
0,12,43.7
1,16,66.0
2,18,77.2
