Hello, dear friend, you can consult us at any time if you have any questions, add WeChat: daixieit

Process of Python

Question 1:

import pandas as pd

from scipy.stats import skew

#import matplotlib.pyplot as plt

from google.colab import files

files.upload()

# Reading Excel files

excel_file_Q1 = "HW2-----1.xlsx"

#part1

sheet_name_Q1_1 = 2

data_frame_Q1_1 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_1)

# Calculate daily rate of return

data_frame_Q1_1['Daily Return'] = (data_frame_Q1_1['Close*'] - data_frame_Q1_1["Open"]) / data_frame_Q1_1['Close*']

print(data_frame_Q1_1["Daily Return"].describe())

print("Skewness of Daily Return: ", skew(data_frame_Q1_1['Daily Return']))

print(data_frame_Q1_1.tail())

print('\n------------------------------------------------------------------------------------------------------------------------\n')

#part2

sheet_name_Q1_2 = 1

data_frame_Q1_2 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_2)

# Calculating weekly returns

data_frame_Q1_2['Weekly Return'] = (data_frame_Q1_2['Close*'] - data_frame_Q1_2["Open"]) / data_frame_Q1_2['Close*']

print(data_frame_Q1_2["Weekly Return"].describe())

# Print the skewness of weekly returns

print("Skewness of Weekly Return: ", skew(data_frame_Q1_2['Weekly Return']))

print(data_frame_Q1_2.tail())

print('\n------------------------------------------------------------------------------------------------------------------------\n')  

#part3

sheet_name_Q1_3 = 0

data_frame_Q1_3 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_3)

# Calculate monthly return

data_frame_Q1_3['Monthly Return'] = (data_frame_Q1_3['Close*'] - data_frame_Q1_3["Open"]) / data_frame_Q1_3['Close*']

# Calculate the standard deviation of monthly returns

data_frame_Q1_3['Std. Monthly Return'] = data_frame_Q1_3['Monthly Return'].std(ddof=1)

print(data_frame_Q1_3["Monthly Return"].describe())

# Print the skewness of monthly returns

print("Skewness of Monthly Return: ", skew(data_frame_Q1_3['Monthly Return']))

print(data_frame_Q1_3.tail())

print('\n------------------------------------------------------------------------------------------------------------------------\n')

#part4

sheet_name_Q1_4 = 3

data_frame_Q1_4 = pd.read_excel(excel_file_Q1, sheet_name=sheet_name_Q1_4)

# Calculation of annual rate of return

data_frame_Q1_4['Annual Return'] = (data_frame_Q1_4['Close*'] - data_frame_Q1_4["Open"]) / data_frame_Q1_4['Close*']

print(data_frame_Q1_4["Annual Return"].describe())

# Print the skewness of annual returns

print("Skewness of Annual Return: ", skew(data_frame_Q1_4['Annual Return']))

print(data_frame_Q1_4.tail())

print('\n------------------------------------------------------------------------------------------------------------------------\n')  

bins_Q1_5=[-3,-2,-1,0,1,2,3]

frequency_table_Q1_5_things=pd.cut(data_frame_Q1_3['Std. Monthly Return'] ,bins_Q1_5,include_lowest=True).value_counts(sort=False)

frequency_table_Q1_5=pd.DataFrame({'Range': frequency_table_Q1_5_things.index,'Frequency':frequency_table_Q1_5_things.values})

print(frequency_table_Q1_5)

Question 2:

import pandas as pd

from scipy.stats import skew

import numpy as np

import matplotlib.pyplot as plt

data={'Size of Claim($)':["0-50","50-100","100-200","200-400",'400-800','Over 800'],

     'Frequency':[1278,1496,1190,952,762,203]}

claim_sizes = ['0-50', '50-100', '100-200', '200-400', '400-800', 'over 800']

frequencies = [1278, 1496, 1190, 952, 762, 203]

data_frame_Q2=pd.DataFrame(data)

data_frame_Q2['Frequency Ratio']=data_frame_Q2['Frequency']/data_frame_Q2['Frequency'].sum()

# Calculate cumulative frequency

cumulative_frequencies = np.cumsum(frequencies)

# Calculate cumulative relative frequency

total_claims = np.sum(frequencies)

cumulative_relative_frequencies = cumulative_frequencies / total_claims

plt.step(claim_sizes, cumulative_relative_frequencies, where='post')

plt.title('Empirical distribution function Fn(x)')

plt.xlabel('Claim amount($)')

plt.ylabel('Fn(x)')

plt.grid(True)

plt.show()

import numpy as np

from scipy.stats import skew

#Assuming we have the claim_sizes, cumulative_relative_frequencies, and values data predefined.

#Calculating the probability of having a claim less than 500

index_400 = claim_sizes.index('200-400')

probability_less_than_500 = cumulative_relative_frequencies[index_400]

print("Probability of having a claim less than 500:", probability_less_than_500)

#Calculating the probability of a loss between $500 and $800

index_800 = claim_sizes.index('over 800')

index_500 = claim_sizes.index('400-800')

probability_500_800 = (cumulative_relative_frequencies[index_800] - cumulative_relative_frequencies[index_500])

print("Probability of a loss between $500 and $800:", probability_500_800)

#Calculating the probability of a loss greater than $800 given the loss is greater than $500

probability_gt_800_given_gt_500 = (1 - cumulative_relative_frequencies[index_500]) / (1 - probability_less_than_500)

print("Probability of a loss greater than $800 given the loss is greater than $500:", probability_gt_800_given_gt_500)

#Calculating mean, variance, standard deviation, and skewness

values = [25, 75, 150, 300, 600, 800]

mean = np.mean(values)

variance = np.var(values)

std_dev = np.std(values)

skewness = skew(values)

print("Mean:", mean)

print("Variance:", variance)

print("Standard deviation:", std_dev)

print("Skewness:", skewness)

#Assuming we have data_frame_Q2 defined for the following calculations

#Calculating E(L|L>200)

mean_Q2_6 = 300*data_frame_Q2['Frequency Ratio'][3] + 600*data_frame_Q2['Frequency Ratio'][4] + 800*data_frame_Q2['Frequency Ratio'][5]

print("E(L|L>200)=", mean_Q2_6)

#Calculating Var(L|L>200)

var_Q2_7 = data_frame_Q2['Frequency Ratio'][3]*(300-mean_Q2_6)**2 + data_frame_Q2['Frequency Ratio'][4]*(600-mean_Q2_6)**2 + data_frame_Q2['Frequency Ratio'][5]*(800-mean_Q2_6)**2

print('Var(L|L>200)=', var_Q2_7)

Question 3:

import numpy as np

import pandas as pd

from scipy.stats import skew, kurtosis

import matplotlib.pyplot as plt

from google.colab import files

files.upload()

excel_file_Q3="hwdata1_RIM523.xlsx"

sheet_name_Q3="Sheet1"

data_frame_Q3=pd.read_excel(excel_file_Q3, sheet_name=sheet_name_Q3,usecols=[1],header=9)

#data_frame_Q3.columns

print('mean =', data_frame_Q3["Loss Amount ($)"].mean())

print('standard deviation =', data_frame_Q3["Loss Amount ($)"].std(ddof=1))

print('skewness =', data_frame_Q3["Loss Amount ($)"].skew())

# Suppose we have a dataframe named df, which we will sort by the column "Loss Amount ($)".

sorted_data_frame_Q3=data_frame_Q3.sort_values("Loss Amount ($)")

sorted_data_frame_Q3=sorted_data_frame_Q3.reset_index(drop=True)

# Calculate PMF and CDF

sorted_data_frame_Q3['EMF.PMF']=1/530

sorted_data_frame_Q3['EMF.CDF']=sorted_data_frame_Q3['EMF.PMF'].cumsum()

# Plotting empirical functions

plt.plot(sorted_data_frame_Q3["Loss Amount ($)"],sorted_data_frame_Q3['EMF.CDF'], label='Empirical Data', marker='o')

plt.ylabel('CDF')

plt.xlabel('Loss')

plt.legend()

plt.title("Empirical Function")

plt.show()

#Part3

frequency_table_Q3_things=pd.cut(sorted_data_frame_Q3["Loss Amount ($)"] ,bins=20,include_lowest=True).value_counts(sort=False)

frequency_table_Q3=pd.DataFrame({'Range': frequency_table_Q3_things.index,'Frequency':frequency_table_Q3_things.values})

frequency_table_Q3['MidValue']=frequency_table_Q3['Range'].apply(lambda x: x.mid)

frequency_table_Q3['Frequency Ratio']=frequency_table_Q3['Frequency']/530

frequency_table_Q3['Mean']=frequency_table_Q3['Frequency Ratio'].astype(float)*frequency_table_Q3['MidValue'].astype(float)

mean=frequency_table_Q3['Mean'].sum()

frequency_table_Q3['Variance']=frequency_table_Q3['Frequency Ratio'].astype(float)*((frequency_table_Q3['MidValue'].astype(float)

 -mean)**2)

std=frequency_table_Q3['Variance'].sum()**0.5

frequency_table_Q3["Before Skew"]=(((frequency_table_Q3['MidValue'].astype(float)-mean)/std)**3)

frequency_table_Q3['skew']=frequency_table_Q3['Frequency Ratio'].astype(float)*frequency_table_Q3["Before Skew"].astype(float)

skew=frequency_table_Q3['skew'].sum()

print("mean of frequency concept=",mean)

print("standard deviation of frequency concept=",std)

print("skewness of frequency concept=", skew)

plt.plot(sorted_data_frame_Q3["Loss Amount ($)"],sorted_data_frame_Q3['EMF.CDF'],label='Empirical Data',marker='o')

plt.ylabel('CDF')

plt.xlabel('Loss')

plt.title("Empirical Function")

frequency_table_Q3['Frequency Ratio.CDF']=frequency_table_Q3['Frequency Ratio'].cumsum()

plt.plot(frequency_table_Q3['MidValue'],frequency_table_Q3['Frequency Ratio.CDF'],label='CDF from frequency table',marker='o')

plt.legend()

plt.show()

4.

# Constants

num_type1_risks = 100

type1_loss_probability = 0.15

num_type2_risks = 100

type2_loss_probability = 0.05

loss_amount = 1

# Mean calculation

mean_type1 = num_type1_risks * type1_loss_probability * loss_amount

mean_type2 = num_type2_risks * type2_loss_probability * loss_amount

mean_total_loss = mean_type1 + mean_type2

# Variance calculation

variance_type1 = num_type1_risks * type1_loss_probability * (1 - type1_loss_probability) * (loss_amount ** 2)

variance_type2 = num_type2_risks * type2_loss_probability * (1 - type2_loss_probability) * (loss_amount ** 2)

variance_total_loss = variance_type1 + variance_type2

# Print the results

print("Mean of total loss: $" + str(mean_total_loss))

print("Variance of total loss: $" + str(variance_total_loss))

5.

import numpy as np

import matplotlib.pyplot as plt

def sample_pmf(k):

    probabilities = np.arange(1, 11) / 55  # Probability mass function p = i/55, i=1,2,...,10

    values = np.arange(1, 11)  # Possible values i=1,2,...,10

    samples = np.random.choice(values, size=k, p=probabilities)

    return samples

# Values of k to sample from the PMF

k_values = [50, 500, 5000]

# Generate samples and plot histograms

for k in k_values:

    samples = sample_pmf(k)

    plt.hist(samples, bins=10, edgecolor='black', alpha=0.7)

    plt.title(f'Histogram of {k} Samples from PMF')

    plt.xlabel('Values')

    plt.ylabel('Frequency')

    plt.show()

6.  

import numpy as np

import matplotlib.pyplot as plt

def binomial_pmf(n, p):

    k = np.arange(0, n+1)

    pmf = np.array([np.math.comb(n, i) * (p**i) * ((1-p)**(n-i)) for i in k])

    return pmf

n = 20

p1 = 0.1

p2 = 0.4

pmf_p1 = binomial_pmf(n, p1)

pmf_p2 = binomial_pmf(n, p2)

X_values = np.arange(0, n+1)

probabilities = 0.1 * pmf_p1 + 0.9 * pmf_p2

# Simulate 1000 values of X

X_samples = np.random.choice(X_values, size=1000, p=probabilities)

# Plot histogram

plt.hist(X_samples, bins=n+1, range=[0, n], edgecolor='black', alpha=0.7)

plt.title('Histogram of 1000 Samples of X')

plt.xlabel('Values of X')

plt.ylabel('Frequency')

plt.show()