Simple Linear Regression¶

Import Libraries¶

# import libraries
import pandas as pd
from statsmodels.formula.api import ols

# load dataset and create dataframe
df = pd.read_csv('data/edincome.csv').round(1)

# verify first few records
df.head()

slr = ols('Income ~ Education',df).fit()

print(slr.params)

Intercept   -23.176365
Education     5.574237
dtype: float64

slr.summary()

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

print(slr.rsquared)

0.8784032808796992

print(slr.mse_model)

13766.191657863852

# predict new points
data = {'Education': [12,16,18]}
df_predict = pd.DataFrame(data).round(1)

df_predict['Income'] = slr.predict(df_predict).round(1)

df_predict