새싹/TIL

[핀테커스] 231019 Bayes Theorem - 실습

jykim23 2023. 10. 20. 16:57

 

 

import pandas as pd

def bayesian_table(table, prior, likelihood):
    if 'posterior' in table.columns:
        table['prior'] = table['posterior']
    else:
        table['prior'] = prior
    table['likelihood'] = likelihood
    table['joint'] = table['prior'] * table['likelihood']
    norm_const = table['joint'].sum()
    table['posterior'] = table['joint'] / norm_const
    print(table)
    return table
    
    
ex3_table = pd.DataFrame(index=['JarA', 'JarB'])
ex4_table = pd.DataFrame(index=['JarA', 'JarB'])

prior = 0.5
likelihood_W = (0.9, 0.2) # W
likelihood_B = (0.1, 0.8) # B

# ex3
bayesian_table(ex3_table, prior, likelihood_B)
bayesian_table(ex3_table, prior, likelihood_B)
print(ex3_table)

# ex4
bayesian_table(ex4_table, prior, likelihood_B)
bayesian_table(ex4_table, prior, likelihood_W)
print(ex4_table)



data = pd.read_csv('1019_PlayTennis.csv')

feature_set = data.columns
data_size = data.shape[0]


feature_table = pd.DataFrame()
for feature in feature_set:
    tmp = []
    for v, c in data[feature].value_counts().items():
        tmp.append(f'{c/data_size:.6f}')
    feature_table[feature] = pd.Series(tmp)
#     Outlook Temperature  Humidity      Wind Play Tennis
# 0  0.357143    0.428571  0.500000  0.571429    0.642857
# 1  0.357143    0.285714  0.500000  0.428571    0.357143
# 2  0.285714    0.285714       NaN       NaN         NaN

print(data[data['Play Tennis']=='Yes'])
print(data[data['Play Tennis']=='No'])