########################################################################################################
#-------------------------------------------------------------------------------------------------------
# Author : A. John Woodill
# Date : 04/07/2015
# Filename : Fama-French-Replication.R
# Code : Fama-French '92 Replication
# Sections :
# (1) CRSP Data setup
# (2) Table 1 pre-beta, post-beta, post-beta (ln(ME))
# (3)
# (**) CRSP and Compustat Data Merge)
# (**) Data Wrangling
#-------------------------------------------------------------------------------------------------------
########################################################################################################
rm(list=ls(all=TRUE))
library(readr) # For reading the data
library(dplyr) # Data wrangling
#-----------------------------------
# (1) CRSP Data setup
#-----------------------------------
setwd("/run/media/john/1TB/Projects/Fama-French Replicatoin/")
crsp <- read_csv(("Crsp.csv"))
# crsp2 <- read.csv("Crsp.csv", stringsAsFactors = FALSE)
# Convert colnames to lower case
colnames(crsp) <- tolower(colnames(crsp))
# Fix missing fyears
crsp$fyear <- substr(crsp$date, 1, 4)
# Add Month
crsp$month <- substr(crsp$date, 5, 6)
# Only keep those stocks with returns at the end of June
# crsp <- crsp %>%
# group_by(permco, fyear) %>%
# mutate(month = substr(date, 5, 6),
# has_June = any(month == "06"))
#
# crsp <- filter(crsp, has_June == TRUE)
# Only keep those stocks with returns at the end of December
# crsp <- crsp %>%
# group_by(permco, fyear) %>%
# mutate(month = substr(date, 5, 6),
# has_Dec = any(month == "12"))
#
# crsp <- filter(crsp, has_Dec == TRUE)
# Calculate Market Equity (ME) : ME = prc*shrout
crsp$me <- (abs(crsp$prc)*crsp$shrout)/1000
crsp <- filter(crsp, me > 0) # Ensure has me
# Remove all obs outside July 1962 - Dec 1990 ( Because of lag need to include 1962)
crsp <- filter(crsp, date >= 19620700 & date <= 19901231)
# Keep only share code 10, 11
crsp <- filter(crsp, shrcd == 10 | shrcd == 11)
# Remove ret with C as value
crsp <- filter(crsp, ret != "C")
crsp$ret <- as.numeric(crsp$ret)
# Write to full sample
write_csv(crsp, "crsp_92_data.csv")
#---------------------------------------------------------
# (2) Table 1 pre-beta, post-beta, post-beta (ln(ME))
#---------------------------------------------------------
library(dplyr)
library(readr)
setwd("/run/media/john/1TB/Projects/Fama-French Replicatoin/")
crsp <- read_csv("crsp_92_data.csv")
# Select only columns needed
crsp <- select(crsp, permco,date, ret, vwretd, ewretd, fyear, month, me)
# Get 10% decile ME for each June and assign to portfolio
crsp$fyear <- as.integer(crsp$fyear)
crsp$month <- as.integer(crsp$month)
# Build portfolios based on ME
crsp$portfo=cut(crsp$me, breaks=quantile(crsp$me,probs=seq(0,1,1/10),na.rm=T),labels=F)
# Lag ewretd
crsp <- crsp %>%
group_by(permco) %>%
arrange(desc(date)) %>%
mutate(lagewretd = lag(ewretd))
# Remove initial lagged variables
crsp <- filter(crsp, lagewretd != "NA")
## convert fyear to a proper number and then exploit for sorting
crsp <- crsp %>%
mutate(fyear = fyear %>% as.integer) %>%
arrange(fyear, month)
## figure out cumulative months available for each year (for each permco)
years <- crsp %>%
group_by(permco, fyear) %>%
summarize(n = n()) %>%
mutate(n_cum = cumsum(n))
# function to get coefficients
# (further optimization should probably focus on improving this function)
get_coefs <- function(.permco, .fyear, .n_cum){
if(.n_cum < 24) {
data_frame(`(Intercept)` = NA_real_, ewretd = NA_real_, lagewretd = NA_real_)
} else {
my_dat <- crsp %>%
filter(permco == .permco, fyear <= .fyear) %>%
mutate(rn = row_number(desc(date)))
lm(ret ~ ewretd + lagewretd, my_dat, subset = rn < 61) %>%
coef %>%
as.list %>%
as_data_frame
}
}
# dplyr option (Takes ~ 2 hours)
models_dplyr <- years %>%
group_by(fyear, permco) %>%
do(get_coefs(.$permco, .$fyear, .$n_cum))
# Remove NA's
models_dplyr <- filter(models_dplyr, ewretd != "NA" | lagewretd != "NA")
models_dplyr$sum <- models_dplyr$ewretd + models_dplyr$lagewretd
# Write out to save
write.csv(models_dplyr, "prerank_betas.csv")
# Read prerank
models_dplyr <- read_csv("prerank_betas.csv")
# Merge with crsp data set
merge <- select(crsp, permco, portfo, me, ret, fyear, month)
prerank <- inner_join(models_dplyr, merge, by = "permco")
# Sum ewretd and lagewretd to get pre-beta
prerank_betas <- prerank %>%
group_by(permco) %>%
summarize(pre_beta = mean(sum), ret = mean(ret), me = mean(me), ewr = mean(ewretd))
# Rank pre-betas and me
prerank_betas$beta_rank=cut(prerank_betas$pre_beta, breaks=quantile(prerank_betas$pre_beta, probs=seq(0,1,1/10), na.rm=T),labels=F)
prerank_betas$portfo=cut(prerank_betas$me, breaks=quantile(prerank_betas$me,probs=seq(0,1,1/10),na.rm=T),labels=F)
prerank_betas <- filter(prerank_betas, beta_rank != "NA" & portfo != "NA")
# Build data frame for pre-ranking betas
df <- prerank_betas %>%
group_by(portfo, beta_rank) %>%
summarise(mer = mean(ewr))
df <- prerank_betas %>%
group_by(beta_rank) %>%
summarise(mer = mean(ewr))
df
table1a <- read_csv("/home/john/Dropbox/UHM/Classes/Fin 701 - International Finance Theory/Replication/Table1_A.csv")
table1a
# Table 1B - Post Ranking Betas
# Function to get coef
get_postcoefs <- function(portfo){
my_dat <- prerank_betas %>%
filter(portfo == portfo) %>%
lm(ret ~ ewr, my_dat) %>%
coef %>%
as.list %>%
as_data_frame
}
postrank <- prerank_betas %>%
group_by(portfo) %>%
do(get_postcoefs(.$portfo))
postrank <- prerank_betas
postbetas <- data.frame(LowB = numeric(),
B2 = numeric(),
B3 = numeric(),
B4 = numeric(),
B5 = numeric(),
B6 = numeric(),
B7 = numeric(),
B8 = numeric(),
B9 = numeric(),
B10 = numeric())
size <- data.frame(LowB = numeric(),
B2 = numeric(),
B3 = numeric(),
B4 = numeric(),
B5 = numeric(),
B6 = numeric(),
B7 = numeric(),
B8 = numeric(),
B9 = numeric(),
B10 = numeric())
# Get Post Rank Betas
for (i in unique(postrank$portfo)){
for (j in unique(postrank$beta_rank)) {
frame <- filter(postrank, portfo == i & beta_rank == j)
postbetas[[i,j]] <- as.numeric(lm(ret ~ ewr, data = frame)$coefficients[2]*100)
}
}
# All post rank betas for portfolio rank
for (i in unique(postrank$portfo)){
frame <- filter(postrank, portfo == i)
betas[[i]] <- lm(ret ~ ewr, data = frame)$coefficients[2]
}
# Get all post rank betas for beta rank
for (i in unique(postrank$beta_rank)){
frame <- filter(postrank, beta_rank == i)
betas[[i]] <- lm(ret ~ ewr, data = frame)$coefficients[2]
}
# Get all betas for portfo data frame
for (i in unique(postrank$portfo)){
frame <- filter(postrank, portfo == i & beta_rank == j)
size[[i,j]] <- mean(log(frame$me))
}
# Get all betas for beta data frame
for (i in unique(postrank$beta_rank)){
frame <- filter(postrank, portfo == i & beta_rank == j)
size[[i,j]] <- mean(log(frame$me))
}
}
portsize <- postrank %>%
group_by(portfo) %>%
summarize(sizeme = mean(log(me)))
portsize <- postrank %>%
group_by(beta_rank) %>%
summarize(sizeme = mean(log(me)))
### NOT FINISHED
#----------------------------------------
# (**) CRSP and Compustat Data Merge
#----------------------------------------
setwd
Fama-French-Replication.R_三因子模型_
5星 · 超过95%的资源 51 浏览量
2021-10-02
16:30:51
上传
评论 2
收藏 4KB ZIP 举报
呼啸庄主
- 粉丝: 74
- 资源: 4702
评论9