Process of Python
Hello, dear friend, you can consult us at any time if you have any questions, add WeChat: daixieit
Process of Python
Question 1:
import pandas as pd
from scipy.stats import skew
#import matplotlib.pyplot as plt
from google.colab import files
files.upload()
# Reading Excel files
excel_file_Q1 = "HW2-----1.xlsx"
#part1
sheet_name_Q1_1 = 2
data_frame_Q1_1 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_1)
# Calculate daily rate of return
data_frame_Q1_1['Daily Return'] = (data_frame_Q1_1['Close*'] - data_frame_Q1_1["Open"]) / data_frame_Q1_1['Close*']
print(data_frame_Q1_1["Daily Return"].describe())
print("Skewness of Daily Return: ", skew(data_frame_Q1_1['Daily Return']))
print(data_frame_Q1_1.tail())
print('\n------------------------------------------------------------------------------------------------------------------------\n')
#part2
sheet_name_Q1_2 = 1
data_frame_Q1_2 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_2)
# Calculating weekly returns
data_frame_Q1_2['Weekly Return'] = (data_frame_Q1_2['Close*'] - data_frame_Q1_2["Open"]) / data_frame_Q1_2['Close*']
print(data_frame_Q1_2["Weekly Return"].describe())
# Print the skewness of weekly returns
print("Skewness of Weekly Return: ", skew(data_frame_Q1_2['Weekly Return']))
print(data_frame_Q1_2.tail())
print('\n------------------------------------------------------------------------------------------------------------------------\n')
#part3
sheet_name_Q1_3 = 0
data_frame_Q1_3 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_3)
# Calculate monthly return
data_frame_Q1_3['Monthly Return'] = (data_frame_Q1_3['Close*'] - data_frame_Q1_3["Open"]) / data_frame_Q1_3['Close*']
# Calculate the standard deviation of monthly returns
data_frame_Q1_3['Std. Monthly Return'] = data_frame_Q1_3['Monthly Return'].std(ddof=1)
print(data_frame_Q1_3["Monthly Return"].describe())
# Print the skewness of monthly returns
print("Skewness of Monthly Return: ", skew(data_frame_Q1_3['Monthly Return']))
print(data_frame_Q1_3.tail())
print('\n------------------------------------------------------------------------------------------------------------------------\n')
#part4
sheet_name_Q1_4 = 3
data_frame_Q1_4 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_4)
# Calculation of annual rate of return
data_frame_Q1_4['Annual Return'] = (data_frame_Q1_4['Close*'] - data_frame_Q1_4["Open"]) / data_frame_Q1_4['Close*']
print(data_frame_Q1_4["Annual Return"].describe())
# Print the skewness of annual returns
print("Skewness of Annual Return: ", skew(data_frame_Q1_4['Annual Return']))
print(data_frame_Q1_4.tail())
print('\n------------------------------------------------------------------------------------------------------------------------\n')
bins_Q1_5=[-3,-2,-1,0,1,2,3]
frequency_table_Q1_5_things=pd.cut(data_frame_Q1_3['Std. Monthly Return'] ,bins_Q1_5,include_lowest=True).value_counts(sort=False)
frequency_table_Q1_5=pd.DataFrame({'Range': frequency_table_Q1_5_things.index,'Frequency':frequency_table_Q1_5_things.values})
print(frequency_table_Q1_5)
Question 2:
import pandas as pd
from scipy.stats import skew
import numpy as np
import matplotlib.pyplot as plt
data={'Size of Claim($)':["0-50","50-100","100-200","200-400",'400-800','Over 800'],
'Frequency':[1278,1496,1190,952,762,203]}
claim_sizes = ['0-50', '50-100', '100-200', '200-400', '400-800', 'over 800']
frequencies = [1278, 1496, 1190, 952, 762, 203]
data_frame_Q2=pd.DataFrame(data)
data_frame_Q2['Frequency Ratio']=data_frame_Q2['Frequency']/data_frame_Q2['Frequency'].sum()
# Calculate cumulative frequency
cumulative_frequencies = np.cumsum(frequencies)
# Calculate cumulative relative frequency
total_claims = np.sum(frequencies)
cumulative_relative_frequencies = cumulative_frequencies / total_claims
plt.step(claim_sizes, cumulative_relative_frequencies, where='post')
plt.title('Empirical distribution function Fn(x)')
plt.xlabel('Claim amount($)')
plt.ylabel('Fn(x)')
plt.grid(True)
plt.show()
import numpy as np
from scipy.stats import skew
#Assuming we have the claim_sizes, cumulative_relative_frequencies, and values data predefined.
#Calculating the probability of having a claim less than 500
index_400 = claim_sizes.index('200-400')
probability_less_than_500 = cumulative_relative_frequencies[index_400]
print("Probability of having a claim less than 500:", probability_less_than_500)
#Calculating the probability of a loss between $500 and $800
index_800 = claim_sizes.index('over 800')
index_500 = claim_sizes.index('400-800')
probability_500_800 = (cumulative_relative_frequencies[index_800] - cumulative_relative_frequencies[index_500])
print("Probability of a loss between $500 and $800:", probability_500_800)
#Calculating the probability of a loss greater than $800 given the loss is greater than $500
probability_gt_800_given_gt_500 = (1 - cumulative_relative_frequencies[index_500]) / (1 - probability_less_than_500)
print("Probability of a loss greater than $800 given the loss is greater than $500:", probability_gt_800_given_gt_500)
#Calculating mean, variance, standard deviation, and skewness
values = [25, 75, 150, 300, 600, 800]
mean = np.mean(values)
variance = np.var(values)
std_dev = np.std(values)
skewness = skew(values)
print("Mean:", mean)
print("Variance:", variance)
print("Standard deviation:", std_dev)
print("Skewness:", skewness)
#Assuming we have data_frame_Q2 defined for the following calculations
#Calculating E(L|L>200)
mean_Q2_6 = 300*data_frame_Q2['Frequency Ratio'][3] + 600*data_frame_Q2['Frequency Ratio'][4] + 800*data_frame_Q2['Frequency Ratio'][5]
print("E(L|L>200)=", mean_Q2_6)
#Calculating Var(L|L>200)
var_Q2_7 = data_frame_Q2['Frequency Ratio'][3]*(300-mean_Q2_6)**2 + data_frame_Q2['Frequency Ratio'][4]*(600-mean_Q2_6)**2 + data_frame_Q2['Frequency Ratio'][5]*(800-mean_Q2_6)**2
print('Var(L|L>200)=', var_Q2_7)
Question 3:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
from google.colab import files
files.upload()
excel_file_Q3="hwdata1_RIM523.xlsx"
sheet_name_Q3="Sheet1"
data_frame_Q3=pd.read_excel(excel_file_Q3, sheet_name=sheet_name_Q3,usecols=[1],header=9)
#data_frame_Q3.columns
print('mean =', data_frame_Q3["Loss Amount ($)"].mean())
print('standard deviation =', data_frame_Q3["Loss Amount ($)"].std(ddof=1))
print('skewness =', data_frame_Q3["Loss Amount ($)"].skew())
# Suppose we have a dataframe named df, which we will sort by the column "Loss Amount ($)".
sorted_data_frame_Q3=data_frame_Q3.sort_values("Loss Amount ($)")
sorted_data_frame_Q3=sorted_data_frame_Q3.reset_index(drop=True)
# Calculate PMF and CDF
sorted_data_frame_Q3['EMF.PMF']=1/530
sorted_data_frame_Q3['EMF.CDF']=sorted_data_frame_Q3['EMF.PMF'].cumsum()
# Plotting empirical functions
plt.plot(sorted_data_frame_Q3["Loss Amount ($)"],sorted_data_frame_Q3['EMF.CDF'], label='Empirical Data', marker='o')
plt.ylabel('CDF')
plt.xlabel('Loss')
plt.legend()
plt.title("Empirical Function")
plt.show()
#Part3
frequency_table_Q3_things=pd.cut(sorted_data_frame_Q3["Loss Amount ($)"] ,bins=20,include_lowest=True).value_counts(sort=False)
frequency_table_Q3=pd.DataFrame({'Range': frequency_table_Q3_things.index,'Frequency':frequency_table_Q3_things.values})
frequency_table_Q3['MidValue']=frequency_table_Q3['Range'].apply(lambda x: x.mid)
frequency_table_Q3['Frequency Ratio']=frequency_table_Q3['Frequency']/530
frequency_table_Q3['Mean']=frequency_table_Q3['Frequency Ratio'].astype(float)*frequency_table_Q3['MidValue'].astype(float)
mean=frequency_table_Q3['Mean'].sum()
frequency_table_Q3['Variance']=frequency_table_Q3['Frequency Ratio'].astype(float)*((frequency_table_Q3['MidValue'].astype(float)
-mean)**2)
std=frequency_table_Q3['Variance'].sum()**0.5
frequency_table_Q3["Before Skew"]=(((frequency_table_Q3['MidValue'].astype(float)-mean)/std)**3)
frequency_table_Q3['skew']=frequency_table_Q3['Frequency Ratio'].astype(float)*frequency_table_Q3["Before Skew"].astype(float)
skew=frequency_table_Q3['skew'].sum()
print("mean of frequency concept=",mean)
print("standard deviation of frequency concept=",std)
print("skewness of frequency concept=", skew)
plt.plot(sorted_data_frame_Q3["Loss Amount ($)"],sorted_data_frame_Q3['EMF.CDF'],label='Empirical Data',marker='o')
plt.ylabel('CDF')
plt.xlabel('Loss')
plt.title("Empirical Function")
frequency_table_Q3['Frequency Ratio.CDF']=frequency_table_Q3['Frequency Ratio'].cumsum()
plt.plot(frequency_table_Q3['MidValue'],frequency_table_Q3['Frequency Ratio.CDF'],label='CDF from frequency table',marker='o')
plt.legend()
plt.show()
4.
# Constants
num_type1_risks = 100
type1_loss_probability = 0.15
num_type2_risks = 100
type2_loss_probability = 0.05
loss_amount = 1
# Mean calculation
mean_type1 = num_type1_risks * type1_loss_probability * loss_amount
mean_type2 = num_type2_risks * type2_loss_probability * loss_amount
mean_total_loss = mean_type1 + mean_type2
# Variance calculation
variance_type1 = num_type1_risks * type1_loss_probability * (1 - type1_loss_probability) * (loss_amount ** 2)
variance_type2 = num_type2_risks * type2_loss_probability * (1 - type2_loss_probability) * (loss_amount ** 2)
variance_total_loss = variance_type1 + variance_type2
# Print the results
print("Mean of total loss: $" + str(mean_total_loss))
print("Variance of total loss: $" + str(variance_total_loss))
5.
import numpy as np
import matplotlib.pyplot as plt
def sample_pmf(k):
probabilities = np.arange(1, 11) / 55 # Probability mass function p = i/55, i=1,2,...,10
values = np.arange(1, 11) # Possible values i=1,2,...,10
samples = np.random.choice(values, size=k, p=probabilities)
return samples
# Values of k to sample from the PMF
k_values = [50, 500, 5000]
# Generate samples and plot histograms
for k in k_values:
samples = sample_pmf(k)
plt.hist(samples, bins=10, edgecolor='black', alpha=0.7)
plt.title(f'Histogram of {k} Samples from PMF')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
6.
import numpy as np
import matplotlib.pyplot as plt
def binomial_pmf(n, p):
k = np.arange(0, n+1)
pmf = np.array([np.math.comb(n, i) * (p**i) * ((1-p)**(n-i)) for i in k])
return pmf
n = 20
p1 = 0.1
p2 = 0.4
pmf_p1 = binomial_pmf(n, p1)
pmf_p2 = binomial_pmf(n, p2)
X_values = np.arange(0, n+1)
probabilities = 0.1 * pmf_p1 + 0.9 * pmf_p2
# Simulate 1000 values of X
X_samples = np.random.choice(X_values, size=1000, p=probabilities)
# Plot histogram
plt.hist(X_samples, bins=n+1, range=[0, n], edgecolor='black', alpha=0.7)
plt.title('Histogram of 1000 Samples of X')
plt.xlabel('Values of X')
plt.ylabel('Frequency')
plt.show()
2023-11-07