Author

Hongyi Xu

Published

July 3, 2024

Modified

July 14, 2024

> Back to research

S1. Objective

This file follows file “DealScan_Data_Renegotiation.html” and file “DealScan_Loan_Path.qmd”. The aim of this file is to extract the loan path and I also further clean the file Link_table_2024.RData to gather all pair of identifiers for a unique firm over its life-cycle.

Here is a notes on the WRDS Financial Covenants Table. The document there provides names and definitions on each financial covenants terms in the new DealScan data.

Codes and variables in S3. Connect and build the loan paths are updated relative to file “DealScan_Loan_Path.qmd”.

S2. Firm’s Life-cycle Identifier

In order to identify the loan path (or deal path), I first need to identify the life-cycle of a firm’s identifiers, including gvkey, Borrower_Id, borrowercompanyid, etc. So, I start with the link table created in the first section > HTML Manual.

Show the code
if (!file.exists("dt.Link_table_2024.RData")) {
  ## import dataset 
  load("Link_table_2024.RData")
  
  ## get the new company identifiers from network analysis 
  Link_table.newgroups <- Link_table %>% 
    group_or(id1 = "gvkey", id2 = "borrowercompanyid")
  ## join back to the original dataset 
  dt.Link_table <- Link_table %>%
    mutate(facilitystartdate = as.Date(facilitystartdate),
           `LoanConnector Company ID` = as.integer(`LoanConnector Company ID`)
           ) %>% 
    left_join(Link_table.newgroups, by = c("gvkey", "borrowercompanyid")) %>% 
    select(starts_with("facility"), gvkey_group1, everything(.)) 
  
  ## export the updated link table 
  save(dt.Link_table, file = "dt.Link_table_2024.RData")
} else {
  load("dt.Link_table_2024.RData")
} 

## a preview 
dt.Link_table %>% 
  arrange(facilitystartdate) %>%
  filter(gvkey_group1 == 22) %>% 
  gt() %>% 
  tab_header(title = "New Link table for Aeroflex Inc. (previously known as ARX Inc.)", 
             subtitle = html('<span class="red-star">★</span>Now the variable `gvkey_group1` is used to identify all loan level observations from the same firm.')
             ) %>% 
  tab_options(table.font.size = 10, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(10)),
    locations = cells_body()
  ) %>% 
  data_color(
    columns = c("gvkey", "borrowercompanyid"),
    method = "factor",
    palette = "viridis", 
    reverse = TRUE
    ) %>% 
  cols_hide(columns = matches("score|match") ) # hide columns
New Link table for Aeroflex Inc. (previously known as ARX Inc.)
Now the variable `gvkey_group1` is used to identify all loan level observations from the same firm.
facilityid facilitystartdate gvkey_group1 gvkey borrowercompanyid ds_company cs_company ds_ticker cs_ticker Company Name LoanConnector Company ID
5171 1989-04-24 22 1056 2463 ARX Inc ARX INC NA ARX ARX Inc 31224
10226 1991-10-10 22 1056 2463 ARX Inc ARX INC NA ARX ARX Inc 31224
10227 1991-10-10 22 1056 2463 ARX Inc ARX INC NA ARX ARX Inc 31224
27968 1994-04-11 22 1056 2463 ARX Inc ARX INC NA ARX ARX Inc 31224
27969 1994-04-11 22 1056 2463 ARX Inc ARX INC NA ARX ARX Inc 31224
62943 1996-03-15 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARX Aeroflex Inc 8532
62944 1996-03-15 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARX Aeroflex Inc 8532
67177 1999-02-25 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARX Aeroflex Inc 8532
67178 1999-02-25 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARX Aeroflex Inc 8532
67179 1999-02-25 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARX Aeroflex Inc 8532
145204 2003-02-14 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
145320 2003-02-14 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
194507 2006-03-21 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
214917 2007-04-13 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
214918 2007-04-13 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
214919 2007-04-13 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
219384 2007-08-15 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
221951 2007-08-15 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
223289 2007-08-15 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
269533 2011-05-09 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
269534 2011-05-09 22 1056 30274 Aeroflex Inc AEROFLEX INC NA ARXX Aeroflex Inc 8532
303619 2013-05-28 22 184639 30274 Aeroflex Inc AEROFLEX HOLDING CORP NA ARX Aeroflex Inc 8532
304052 2013-05-28 22 184639 30274 Aeroflex Inc AEROFLEX HOLDING CORP NA ARX Aeroflex Inc 8532
  • An example from the web to use network analysis for more granular grouping assignments.
Show the code
# Load the necessary libraries
library(dplyr)
library(igraph)

# Manually generating data
Names <- c(rep('James,Gordon',6)
          ,rep('Amanda',3)
          ,rep('Gordon,Amanda',8)
          ,rep('Edward,Gordon,Amanda',3)
          ,rep('Anna',3)
          ,rep('Anna,Leonard',3)
          )
Initial_Group <- rep(1:6,c(6,3,8,3,3,3))
Final_Group <- rep(c('A','B'),c(20,6))
data <- data.frame(Names,Initial_Group,Final_Group)

data %>%
  select(Names)%>%
  distinct() %>%
  separate(Names, c('first', 'second'), extra = 'merge', fill = 'right')%>%
  separate_rows(second) %>%
  mutate(second = coalesce(second, as.character(cumsum(is.na(second))))) %>%
  graph_from_data_frame()%>%
  components() %>%
  getElement('membership') %>%
  imap(~str_detect(data$Names, .y)*.x) %>%
  invoke(pmax, .) %>% # `invoke` is applied to apply function to the same location in each element under a list 
  cbind(data, value = LETTERS[.], value1 = .) 

In the above example for Aeroflex Inc. (previously known as ARX Inc.), one company may match multiple combinations of two identifiers - gvkey and borrowercompanyid/LoanConnector Company ID - and I recreate a new identifier gvkey_group1 to uniquely identify one firm’s life-cycle in the dataset. borrowercompanyid is the borrower identifier in the Legacy DealScan dataset and LoanConnector Company ID is for the new LPC DealScan.

In the current version of the link table, dt.Link_table, facility/tranche level information is also included. A cleaner version of the link table will only include borrower level observations.

One additional thing to notice is that although no missing values observed in variable borrowercompanyid (and also gvkey), LoanConnector Company ID contains missing value, around 66 observations.

S3. Connect and build the loan paths

Here are the steps to achieve this:

  1. load the dataset for the new link table - dt.Link_table and for all Deal-Tranches with borrower information - DealScan_borrower. The dataset dt.name_match is used to guide the selection of variables for specific analyses. > refer to section s3.4 in this document.

  2. Cross validate the changes in the loan amount using the item “Cash flows from financing activities” from the “CONSOLIDATED STATEMENTS OF CASH FLOWS”.

    One example from JACO ELECTRONICS INC (gvkey_group1 == 1894) and its SEC filing. Search Term Loan in the filing and you will much more renegotiations than ones recorded in the DealScan.

Also in 10-K/Q !!!

Alternatively, go to the 10-K/Q and find the information about all debts in the Note section. Normally this information is in the Exhibit.

E.g. Form 10-K from Nvidia listed its detailed debt profile in its Note 12 - Debt.

Let’s start!

Condense all remarks - Purpose_Remark,Deal_Remark,Tranche_Remark,Performance_Pricing_Remark - into varible All_Remark.

Show the code
## load the necessary tables
if (!exists("dt.Link_table")) {
  ## check whether the variable `dt.Link_table` is there. > if yes, then .... 
  load(file = "dt.Link_table_2024.RData")
} 

# # file <- "DealScan_borrower.Rdata"
# # from <- "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Data_Renegotiations_Jun2024"
# # to <- "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Loan_Path_Jun2024"
{ # Procedure to check and copy necessary files: 
  copy_file(from = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Data_Renegotiations_Jun2024", to = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Loan_Path_Jun2024", file = "DealScan_borrower.Rdata")
  
  copy_file(from = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Data_Renegotiations_Jun2024", to = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Loan_Path_Jun2024", file = "DealScan_name_match.RData") 
}
[1] TRUE
Show the code
## load the dealscan data for only borrowers information > DealScan_borrower 
load(file = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Data_Renegotiations_Jun2024/DealScan_borrower.RData") 
### load the dealscan variable names > dt.name_match 
load(file = "~/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Projects_2024/Dealscan_LinkTable/DealScan_Data_Renegotiations_Jun2024/DealScan_name_match.RData") 

name_covenants_17 <- dt.name_match %>% # financial covenants 
  filter(grepl(pattern = "^17:", x = group_header)) %>% 
  .$orig_name

name_covenants_19 <- dt.name_match %>% # general covenants 
  filter(grepl(pattern = "^19:", x = group_header)) %>% 
  .$orig_name

name_renegotiation <- c( # names for the 
    grep(pattern = "LPC|_O_A", x = names(DealScan_borrower), value = T), 
    grep(pattern = "Amended", x = names(DealScan_borrower), value = T), 
    grep(pattern = "Refinan", x = names(DealScan_borrower), value = T)
  )

## link the two data sets 
dt.DealScan_borrower <- DealScan_borrower %>% 
  select(Borrower_Id, Borrower_Name, 
         all_of(name_renegotiation), # all flags for different kind of renegotiations. 
         grep(x = names(.), pattern = "Matur", ignore.case = T, value = T), 
         "Deal_Amount", "Tranche_Amount", 
         "Deal_Active_Date", "Deal_Input_Date", "Tranche_Active_Date", 
         "Tranche_Type", "Deal_Purpose", 
         grep(pattern = "Remark", x = names(.), value = T)
         ) %>% 
  as_tibble() %>% 
  mutate(across(contains("Date"), as.Date)) %>% 
  unite("All_Remark", contains("Remark"), sep = "<br>", na.rm = T, remove = F) %>% 
  mutate(
    All_Remark_am = str_replace_all(string = All_Remark, pattern = "(?i)\\bAmend(ment)? Fee\\b", replacement = ""), 
    Event_amend = grepl(x = All_Remark_am, pattern = "amend", ignore.case = T), # whether the term 'amend' is detected 
    Event_restate = grepl(x = All_Remark, pattern = "restate", ignore.case = T), # whether the term 'restate' is detected 
    Event_refin = grepl(x = All_Remark, pattern = "refin", ignore.case = T) # whether the term 'refinancing' is detected 
  ) %>% 
  mutate(Event_renegotiation = (Event_amend | Event_amend | Event_refin) ) # whether there exists an events identified by contents in `Deal_Remark` 
### The last several lines collect information about whether an amend or restate or refinance event happen. 
### The `Event_renegotiation` variable detect whether a renegotiation event happens. 
### Also, I check whether a loan is a refinancing of a previous loan. 
# names(dt.DealScan_borrower) 
# names(dt.Link_table)

Here a summary for the variables Event_amend, Event_restate,Event_refin and Event_renegotiation under each group classified by amendment/refinancing flags in DealScan.

Show the code
# to identify different types of contracts which are defined as "Origination". 
dt.DealScan_borrower %>% 
  filter(grepl(pattern = "Orig", x = Tranche_O_A)) %>% # only keep tranches classified as "Origination"
  group_by(Deal_Amended, Tranche_Amended, Deal_Refinancing, Tranche_Refinancing) %>% 
  summarise(n = n(), 
            n_Event_amend = sum(Event_amend), 
            n_Event_restate = sum(Event_restate), 
            n_Event_refin = sum(Event_refin), 
            n_Event = sum(Event_renegotiation)  
            ) %>% # count the number of each types 
  ungroup() %>% 
  mutate(percentage_n = (n / sum(n))) %>% 
  gt() %>% 
  tab_header(title = "Table 2a: WRDS DealScan Deal-Tranche Level Loan Events", 
             subtitle = "Subgroup: `Tranche_O_A` = Origination"
             ) %>% 
  tab_options(table.font.size = 11, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(11)),
    locations = cells_body()
  ) %>% 
  fmt_percent(columns = percentage_n, decimals = 2) 
Table 2a: WRDS DealScan Deal-Tranche Level Loan Events
Subgroup: `Tranche_O_A` = Origination
Deal_Amended Tranche_Amended Deal_Refinancing Tranche_Refinancing n n_Event_amend n_Event_restate n_Event_refin n_Event percentage_n
No No No 240423 5615 2583 8154 13475 62.95%
No No No No 2573 34 7 66 97 0.67%
No No No Yes 2079 26 4 893 914 0.54%
No No Yes 68556 8492 6052 28273 35966 17.95%
No No Yes No 72 4 2 39 42 0.02%
No No Yes Yes 27318 718 261 16948 17401 7.15%
Yes No No 568 8 0 20 28 0.15%
Yes No No No 11 0 0 0 0 0.00%
Yes No No Yes 3 0 0 0 0 0.00%
Yes No Yes 450 63 33 245 287 0.12%
Yes No Yes Yes 82 8 6 46 54 0.02%
Yes Yes No 19375 686 318 688 1295 5.07%
Yes Yes No No 544 7 3 16 23 0.14%
Yes Yes No Yes 233 11 2 66 77 0.06%
Yes Yes Yes 15319 2564 1928 8072 10275 4.01%
Yes Yes Yes No 28 3 2 8 11 0.01%
Yes Yes Yes Yes 4304 388 245 2451 2739 1.13%
Show the code
# to identify different types of contracts which are defined as "Amendment 1". 
dt.DealScan_borrower %>% 
  filter(grepl(pattern = "Amendment", x = Tranche_O_A)) %>% # only keep tranches classified as "Origination"
  group_by(Deal_Amended, Tranche_Amended, Deal_Refinancing, Tranche_Refinancing) %>% 
  summarise(n = n(), 
            n_Event_amend = sum(Event_amend), 
            n_Event_restate = sum(Event_restate), 
            n_Event_refin = sum(Event_refin), 
            n_Event = sum(Event_renegotiation)  
            ) %>% # count the number of each types 
  ungroup() %>% 
  mutate(percentage_n = (n / sum(n))) %>% 
  gt() %>% 
  tab_header(title = "Table 2b: WRDS DealScan Deal-Tranche Level Loan Events", 
             subtitle = "Subgroup: `Tranche_O_A` = Amendment(s)"
             ) %>% 
  tab_options(table.font.size = 11, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(11)),
    locations = cells_body()
  ) %>% 
  fmt_percent(columns = percentage_n, decimals = 2)
Table 2b: WRDS DealScan Deal-Tranche Level Loan Events
Subgroup: `Tranche_O_A` = Amendment(s)
Deal_Amended Tranche_Amended Deal_Refinancing Tranche_Refinancing n n_Event_amend n_Event_restate n_Event_refin n_Event percentage_n
Yes No 4173 841 1 13 847 3.47%
Yes No No 24 12 0 1 12 0.02%
Yes No Yes 47 39 0 3 39 0.04%
Yes Yes 2507 845 38 268 915 2.09%
Yes Yes No 3 3 0 1 3 0.00%
Yes Yes Yes 826 580 12 174 620 0.69%
Yes Yes No 34180 3670 154 346 3957 28.44%
Yes Yes No No 911 260 2 9 266 0.76%
Yes Yes No Yes 2247 800 20 49 818 1.87%
Yes Yes Yes 52319 8250 1527 8961 15792 43.53%
Yes Yes Yes No 652 220 20 99 276 0.54%
Yes Yes Yes Yes 22294 8757 1058 2763 10215 18.55%

Now we merge the DealScan deal-tranche information with the link table and try to extract the loan path. To facilitate data analysis, I create a new variable obs_id, which is the id for each observation in the dataset, and will be used as the sole identifier in subsequent data mergers.

Also, I create a new firm-deal level identifier firm_deal_id to facilitate the data cleaning process. Note that this is more of a “firm-deal-time id”.

Show the code
# merge the deal/tranche information with the link table. 
### direct merge creates the issue of multiple-to-multiple matching. > solve this issue by matching at the deal, not tranche, level. 
dt.DealScan_borrower_Linked <- dt.DealScan_borrower %>% 
  left_join(distinct(select(dt.Link_table, -c("facilityid", contains("ticker")))), # collect deal-level 
            by = join_by(Borrower_Id == `LoanConnector Company ID`,
                         Tranche_Active_Date == facilitystartdate) )
# dt.DealScan_borrower_Linked %>% filter(Borrower_Id == 17400)
# dt.DealScan_borrower_Linked %>% filter(gvkey_group1 == 9721) 
# names(dt.DealScan_borrower_Linked) 

# firm-deal-tranche-time level data 
cat("I use the self-created `gvkey_group1` variable as the unique company identifer. Observations are at the firm-deal-tranche-time level.") 

I use the self-created gvkey_group1 variable as the unique company identifer. Observations are at the firm-deal-tranche-time level.

Show the code
# dt.DealScan_borrower_unifirm <- 
dt.DealScan_uniborrower <- 
  dt.DealScan_borrower_Linked %>% 
  group_by(gvkey_group1) %>% # for each unique firm over the life-cycle 
  arrange(Tranche_Active_Date) %>%  # %>% # arrange by its tranche active date 
  rowid_to_column(var = "obs_id") %>% # a new id for each observation in the data 
  mutate(
    # a. use LPC flags to identify renegotiations *** 
    # create the amendment flag based on `Tranche_O_A` and `Deal_Amended` or `Deal_Refinancing` information: 
    flag_renegotiation = ifelse(
      # the selection criterion: Deal_Refinancing == "Yes" might need further examinations.
      grepl(pattern = "Amendment", x = Tranche_O_A, fixed = T) |
        (!grepl(pattern = "Amendment", x = Tranche_O_A, fixed = T) & 
           (Deal_Amended == "Yes" | Deal_Refinancing == "Yes") ), 
      yes = TRUE, no = FALSE 
    ), 
    # b. separate the origination/amendment from `Tranche_O_A`
    Orig_Yes = ifelse(grepl(pattern = "Orig", x = Tranche_O_A), 
                           "`Tranche_O_A` = Origination", 
                           "`Tranche_O_A` = Amendment")
  ) 

# plot the distribution of firms with the number of observations in the sample 
dt.DealScan_uniborrower %>% # group firms by the `gvkey_group1` variable 
  summarise(n = n()) %>% # number of observations for each firm
  ungroup() %>% 
  ggplot(., ) + geom_bar(aes(n)) +  # + scale_y_continuous(transform = "log10")
  labs(title = "Distribution of Loan Observations for Firms", 
       subtitle = "restricted to max 100 observations") + 
  scale_x_continuous(limits = c(0, 100)) 

Show the code
# ======================================================================== (July 3, 2024) 
## Look into the file > "dealscan_loan_path_test.R" 
# analyse at the firm-deal-time level (not tranche level) > always for the same firm
dt.DealScan_uniborrower_deal <- 
  dt.DealScan_uniborrower %>% # grouped by `gvkey_group1` -> 
  # as if the deal is just amended/refinaced, `LPC_Deal_ID` won't be updated. -> 
  # in other words, all loans from the same firm/borrower a, recorded under the same deal i, at date `Tranche_Active_Date` 
  # will be considered under the same unique `package`. 
  group_by(Tranche_Active_Date, LPC_Deal_ID, .add = T) %>% # group at the firm-deal-time level 
  summarise(
    # 1. Aggregated Tranche Level Info: 
    Tranche_Info = list(
      distinct(
        data.frame(
          # a. store `LPC_Tranche_ID` information 
          Deal_Tranche_IDs = LPC_Tranche_ID, 
          # b. store `Tranche_Type` information 
          Tranche_Types = Tranche_Type, 
          # c. store `All_Remark` information 
          All_Remarks = All_Remark 
        ) 
      ) ## only keeps the unique loan information to remove potential repetition from previous "join" actions. 
    ), ## filter out information > rowwise(): This function is used to perform operations row by row. 
    # 2. Collect renegotiation flags: 
    flag_renegotiation = any(flag_renegotiation), ## check whether contain TRUE! 
    Event_renegotiation = any(Event_renegotiation), 
    Event_all = list(
      data.frame(
        Event_amend = any(Event_amend), 
        Event_restate = any(Event_restate), 
        Event_refin = any(Event_refin) 
      ) ## record the detail of all `Events` extracted from `All_Remark`!!!  
    ), 
    Orig_Yes = unique(Orig_Yes)
  ) %>% 
  # sort by the tranche observation time within each firm 
  arrange(gvkey_group1, Tranche_Active_Date) %>% 
  # this dataset creates a unique profile/package for firm-deal-time observation. 
  rowid_to_column(var = "deal_time_id") %>% # create a new id for each observation in the data - at the `firm-deal-tim level` 
  ungroup() %>% 
# now for each deal-time observation, 
# dt.DealScan_uniborrower_deal %>% head(50) %>% tail(10) %>% unnest_wider(col = Tranche_Info)
# dt.check <- dt.DealScan_uniborrower_deal %>% 
  unnest_wider(col = Tranche_Info) %>% # split the three columns in the `Tranche_Info`. 
  rowwise() %>% ## perform function on each row 
  mutate(
    # linked information from the `All_Remark`: 
    linkedinfo_Remark = list((
      str_sentence_extract(str = (All_Remarks), 
                           pattern_sentence = "(?<=(\\.|\\?|!))\\s{1,}(?=[A-Z])|<br>", 
                           pattern_text = "amend|restat|refinan" ) 
    ))
  )
`summarise()` has grouped output by 'gvkey_group1', 'Tranche_Active_Date'. You
can override using the `.groups` argument.
Show the code
# ======================================================================== (July 3, 2024) 
# Firm a, Deal i, Tranche j, at Time t. 
dt.DealScan_uniborrower_aijt <- dt.DealScan_uniborrower_deal %>% # firm a, deal i, tranche j, at time t.  
  # extract all unique date information 
  mutate(linked_record_time = str_date_extract(str = linkedinfo_Remark,
                                               date_format = "\\d+/\\d+/\\d+") ) %>% 
  # break into different rows 
  separate_rows(linked_record_time, sep = ";") %>% 
  distinct() %>% 
  mutate(related_time = as.Date(x = linked_record_time, format = "%m/%d/%y"), 
         time_low = related_time - 15, 
         time_high = related_time + 15 # the upper and lower limit for the deal time to match
         ) %>% 
  filter(!is.na(related_time) & !is.na(gvkey_group1)) %>% 
  # filter out those linked to its own (e.g. gvkey_group1 == 2933 -> deal_time_id = 24170)
  filter(Tranche_Active_Date != related_time) %>% 
  select(Linked_deal_time_id = deal_time_id, # linked to a specific deal-time id 
         gvkey_group1, # join identifier 
         Linked_Tranche_Active_Date = Tranche_Active_Date, 
         Linked_LPC_Deal_ID = LPC_Deal_ID, 
         related_time, time_low, time_high) # join identifier 

# link Deal i, Tranche j, at Time t (_ijt) to the deal before and after: 
## the join criterion: for the same firm, if its active date is within 15 days. 
by_deal_time <- join_by(gvkey_group1,
                        between(Tranche_Active_Date, time_low, time_high))
## make the manual matching: 
dt.manual_renegotiation <- dt.DealScan_uniborrower_deal %>% 
  # remove firms without `gvkey` 
  filter(!is.na(gvkey_group1)) %>% 
  inner_join(dt.DealScan_uniborrower_aijt, by = by_deal_time) %>% 
  mutate( # create a new identifier to manually track loan path from `Deal_Remark`: 
    ## record the original, unique firm-deal id if the `Linked_Tranche_Active_Date` comes after the `Linked_Tranche_Active_Date`. 
    ## and record the linked firm-deal id if otherwise. 
    id_before = ifelse(Tranche_Active_Date < Linked_Tranche_Active_Date,
                       yes = deal_time_id, no = Linked_deal_time_id), # the previous id 
    id_after = ifelse(Tranche_Active_Date < Linked_Tranche_Active_Date,
                      yes = Linked_deal_time_id, no = deal_time_id), # the id for the deal that linked to `id_before`
    id_loc_linked = ifelse(Tranche_Active_Date < Linked_Tranche_Active_Date,
                      yes = "after", no = "before") # the indicator for which one is the linked ID: if the linked deal time is later, it will be placed in "after" and vice versa. 
  ) %>% 
  group_by(id_before, id_after) %>% 
  mutate(n = n()) %>% 
  ungroup() 

### construct a more refined list of matching between firms: 
dt.manual_idonly <- dt.manual_renegotiation %>% 
  select(id_before, id_after, id_loc_linked, n) %>% 
  distinct() %>% 
  filter(id_before != id_after) %>% 
  group_by(id_before, id_after) %>% 
  summarise(
    loc_linked = sum(id_loc_linked %in% c("before", "after") ), # store all linked info 'after'/'before' 
    id_type = ifelse(loc_linked == 2, yes = "twoway", no = "oneway") 
    # id_type = 
  ) %>% 
  ungroup() 
`summarise()` has grouped output by 'id_before'. You can override using the
`.groups` argument.
Show the code
  # filter(id_before == 2260) 
  ## "twoway": deals are basically related, although activated at different time. (e.g. deal_time_id = 6 & 7)
  ## "oneway": deals are linked by only one direction, thus a renegotiation. 

# ======================================================================== (July 4, 2024) 
# merge the manually identified renegotiated contracts back to the full dataset. 
dt.DealScan_uniborrower_deal_updated <- 
  dt.DealScan_uniborrower_deal %>% 
  left_join(
    filter(dt.manual_idonly, id_type == "oneway") %>% 
      select(id_before, id_after) %>% 
      ## createe a matched sample of id(s) to one firm-deal-time observation in the main data === (July 7, 2024)
      group_by(id_after) %>% 
      summarise(id_before = list(id_before)) %>% 
      ungroup() %>% 
      ## add an identifier 
      mutate( flag_manual_renegotiation = TRUE ) , 
    by = join_by(deal_time_id == id_after) 
  ) %>% 
  ## create a more accurate flag 
  mutate(flag_manual_renegotiation = replace_na(flag_manual_renegotiation, replace = FALSE)) %>% 
  mutate(
    flag_Renegotiated = (Event_renegotiation | flag_renegotiation | flag_manual_renegotiation)
  ) 

## check the renegotiations 
# dt.DealScan_uniborrower_deal_updated %>% select(deal_time_id, gvkey_group1, Tranche_Active_Date, flag_renegotiation, Event_renegotiation, flag_manual_renegotiation, flag_Renegotiated)

# ======================================================================== (June 19, 2024)
# ========================================================================
# gvkey_group1 == 7089; 1894; 1392
Warning 1: Identify Loan Renegotiations!

In newly generated data dt.DealScan_uniborrower_deal, the criterion to generate the new variable flag_renegotiation might need more detailed examination. This variable is created to detect whether an amendment happens, even when the Tranch_O_A variable gives "Origination".

Here is a summary table showing the mapping between the amendment/refinancing flags and the flag_renegotiated I created for the firm-deal-time level dataset - dt.DealScan_uniborrower_deal_updated.

Show the code
# to identify different types of contracts which are defined as "Origination".
## for all observations - both public and private. 
dt.flg_Rng <- dt.DealScan_uniborrower_deal_updated %>% 
  # filter(!is.na(gvkey_group1)) %>% 
  group_by(Orig_Yes, 
           flag_renegotiation, Event_renegotiation, flag_manual_renegotiation, 
           flag_Renegotiated) %>% 
  summarise(n = n() # ,
            # RngYes = unique(flag_Renegotiated) # for the renegotiation identifier. 
            ) %>% # count the number of each types 
  ungroup() %>% 
  mutate(percentage_n = (n / sum(n))) 

dt.flg_Rng %>% 
  gt(groupname_col = "Orig_Yes") %>% 
  tab_header(title = "Table 3b: WRDS DealScan Deal-Time Level Loan Events", 
             subtitle = html("A summary statistics to show the results of my identifier: `flag_renegotiated`.<br>
                           All firm-deal-time observations <u>are from the full sample</u>, including both public and private firms.")
             ) %>% 
  tab_options(table.font.size = 11, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(11)),
    locations = cells_body()
  ) %>% 
  fmt_percent(columns = percentage_n, decimals = 2)  
Table 3b: WRDS DealScan Deal-Time Level Loan Events
A summary statistics to show the results of my identifier: `flag_renegotiated`.
All firm-deal-time observations are from the full sample, including both public and private firms.
flag_renegotiation Event_renegotiation flag_manual_renegotiation flag_Renegotiated n percentage_n
`Tranche_O_A` = Amendment
TRUE FALSE FALSE TRUE 43586 13.85%
TRUE FALSE TRUE TRUE 51 0.02%
TRUE TRUE FALSE TRUE 14278 4.54%
TRUE TRUE TRUE TRUE 1118 0.36%
`Tranche_O_A` = Origination
FALSE FALSE FALSE FALSE 163159 51.83%
FALSE FALSE TRUE TRUE 192 0.06%
FALSE TRUE FALSE TRUE 8137 2.59%
FALSE TRUE TRUE TRUE 1028 0.33%
TRUE FALSE FALSE TRUE 42740 13.58%
TRUE FALSE TRUE TRUE 78 0.02%
TRUE TRUE FALSE TRUE 30483 9.68%
TRUE TRUE TRUE TRUE 9919 3.15%

One additional thing to note is to check refinancing deals in the data. One way to distinguish between refinancing b/c previous loans mature and refinancing b/c other reasons, which is classified as renegotiation in my analysis.

S4. Clean the loan renegotiation data

Then, I will create a more detailed flag_Renegotiated variable in the dataset to more thoroughly cover all renegotiations at the firm-deal level.

A recap about the definition of different renegotiation flags:

  • flag_renegotiation: detected renegotiations based on the amendment/refinancing/_O_A flags in DealScan.
  • Event_renegotiation: word detection in the variable Deal_Remark.
  • flag_manual_renegotiation: manually matched renegotiated deals based on Deal_Remark.
  • flag_Renegotiated: a more refined flag for renegotiations.
Warning 2: Still! A lot of renegotiations are missing.

For company COLORADO INTERSTATE GAS COMPANY, L.L.C., it is previously known as “COLORADO INTERSTATE CORP” (ex 1976/01/19) and “COLORADO INTERSTATE GAS CO” (ex 1992/07/03) and was subsequently potentially (?) merged with KINDER MORGAN, INC, in maybe 2014. The firm is a subsidiary of El Paso Corporation (NYSE: EP).

Even with these, there are still a lot of missing amendment/restatements in the DealScan. E.g. this 8-K. However, this issue may come from the missing family-subsidiary connections and I need to rebuild this.

Here is a summary on the flag_Renegotiated:

Show the code
# to identify different types of contracts which are defined as "Origination".
## for all observations - both public and private. 
dt.flg_Rng_summary <- dt.DealScan_uniborrower_deal_updated %>% 
  # filter(!is.na(gvkey_group1)) %>% 
  group_by(Orig_Yes) %>% 
  summarise(n = n(), 
            n_RngYes = sum(flag_Renegotiated == TRUE), 
            n_RngNo = sum(flag_Renegotiated == FALSE), 
            ) %>% # count the number of each types 
  ungroup() %>% 
  mutate(percentage_n = (n / sum(n))) 

dt.flg_Rng_summary %>% 
  gt(groupname_col = "Orig_Yes") %>% 
  tab_header(title = "Table 3: WRDS DealScan Deal-Time Level Loan Events", 
             subtitle = html("A summary statistics to show the results of my identifier: `flag_renegotiated`.<br> All firm-deal-time observations <u>are from the full sample</u>, including both public and private firms.") 
             ) %>% 
  tab_options(table.font.size = 11, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(11)),
    locations = cells_body()
  ) %>% 
  fmt_percent(columns = percentage_n, decimals = 2)  
## for public firms only - with a valid GVKEY & gvkey_group1 identifier. 
dt.flg_Rng_summary_publiconly <- dt.DealScan_uniborrower_deal_updated %>% 
  filter(!is.na(gvkey_group1)) %>%
  group_by(Orig_Yes) %>% 
  summarise(n = n(), 
            n_RngYes = sum(flag_Renegotiated == TRUE), 
            n_RngNo = sum(flag_Renegotiated == FALSE), 
            ) %>% # count the number of each types 
  ungroup() %>% 
  mutate(percentage_n = (n / sum(n))) 

dt.flg_Rng_summary_publiconly %>% 
  gt(groupname_col = "Orig_Yes") %>% 
  tab_header(title = "Table 3b: WRDS DealScan Deal-Time Level Loan Events", 
             subtitle = html("A summary statistics to show the results of my identifier: `flag_renegotiated`.<br>
                           Firm-deal-time observations <u>are for public firms only</u>, i.e. with a valid GVKEY.")
             ) %>% 
  tab_options(table.font.size = 11, heading.align = 'left' ) %>% 
  tab_style( # update the font size for table cells. 
    style = cell_text(size = px(11)),
    locations = cells_body()
  ) %>% 
  fmt_percent(columns = percentage_n, decimals = 2)  
Table 3: WRDS DealScan Deal-Time Level Loan Events
A summary statistics to show the results of my identifier: `flag_renegotiated`.
All firm-deal-time observations are from the full sample, including both public and private firms.
n n_RngYes n_RngNo percentage_n
`Tranche_O_A` = Amendment
59033 59033 0 18.75%
`Tranche_O_A` = Origination
255736 92577 163159 81.25%
Table 3b: WRDS DealScan Deal-Time Level Loan Events
A summary statistics to show the results of my identifier: `flag_renegotiated`.
Firm-deal-time observations are for public firms only, i.e. with a valid GVKEY.
n n_RngYes n_RngNo percentage_n
`Tranche_O_A` = Amendment
14552 14552 0 11.30%
`Tranche_O_A` = Origination
114237 51565 62672 88.70%

From Table 3, we can see that all firm-loan-time observations with flagged as Tranche_O_A = Amendment are correctly classified as the renegotiation. For observations classified into Tranche_O_A = Origination, which consists of 81% of total observations, around 37% of those is actually a renegotiation. If using the new flag_Renegotiated variable created in my dataset, the total percentage of observations classified as renegotiations will go up to 19% from 48.17%. This is a huge increase in the actual frequency of debt renegotiation. Please note that Table 3 includes the firm-loan-time observations for both public (with gvkey_group1) and private firms (w/o GVKEY).

The results for only public firms is in Table 3b and are quantitatively similar. The total percentage of observations classified as renegotiations will go up from 11% to 51.34%, after using my identifier flag_renegotiated. Please note that using obervations from public firms in DealScan data has two potential issues: (1) the sample size is halved and (2) not all US public firms’ observations are included. For the first point, firms not in US do not have GVKEY. For the second point, going directly to the 8-K, 10-K and 10-Q SEC filings will give us a better picture about the renegotiations. However, these are not urgent at the current stage.

Then I try to construct the path for each firm-deal-tranche/loan observation for only US public firms and store in the new dataset - dt.DealScan_uniborrow_public.

Show the code
# ==================================================================================== (July 6, 2024) 
## refine the updated firm-deal-time level data: 
## prepare to create a better match with the `dt.DealScan_uniborrower` dataset: 
dt.DealScan_uniborrower_deal_updated2 <- 
  select(dt.DealScan_uniborrower_deal_updated,
         -c(All_Remarks, Event_all, linkedinfo_Remark, Orig_Yes) ) %>% 
  # create a yearmonth index for the firm-deal-time level DealScan data. 
  mutate(Tranche_Active_YM = as.yearmon(Tranche_Active_Date)) %>% 
  unnest_longer(c(Deal_Tranche_IDs, Tranche_Types) ) %>% 
  rename(LPC_Tranche_ID = Deal_Tranche_IDs, Tranche_Type = Tranche_Types)

# the new firm-deal-tranche-time level data for only US public firms:
dt.DealScan_uniborrow_public <- dt.DealScan_uniborrower %>% 
  ungroup() %>% 
  # keep only US public firms with GVKEY: 
  filter(!is.na(gvkey_group1)) %>% 
  left_join(dt.DealScan_uniborrower_deal_updated2,
            by = join_by(gvkey_group1, Tranche_Active_Date, 
                         LPC_Deal_ID, LPC_Tranche_ID, Tranche_Type, 
                         Event_renegotiation, flag_renegotiation) ) %>% 
  select(obs_id, deal_time_id, gvkey_group1, gvkey, everything() )
  
cat("Here are the new variable names for the cleaned dataset:")

Here are the new variable names for the cleaned dataset:

Show the code
dt.DealScan_uniborrow_public %>% names() %>% sort() 

[1] “All_Remark” “All_Remark_am”
[3] “Borrower_Id” “Borrower_Name”
[5] “borrowercompanyid” “Company Name”
[7] “confidence_score” “cs_company”
[9] “Deal_Active_Date” “Deal_Amended”
[11] “Deal_Amount” “Deal_Input_Date”
[13] “Deal_Purpose” “Deal_Refinancing”
[15] “Deal_Remark” “deal_time_id”
[17] “ds_company” “Event_amend”
[19] “Event_refin” “Event_renegotiation”
[21] “Event_restate” “flag_manual_renegotiation” [23] “flag_Renegotiated” “flag_renegotiation”
[25] “gvkey” “gvkey_group1”
[27] “id_before” “LPC_Deal_ID”
[29] “LPC_Tranche_ID” “obs_id”
[31] “Orig_Yes” “Performance_Pricing_Remark” [33] “Purpose_Remark” “score_company_match”
[35] “Tenor_Maturity” “Tranche_Active_Date”
[37] “Tranche_Active_YM” “Tranche_Amended”
[39] “Tranche_Amount” “Tranche_Maturity_Date”
[41] “Tranche_O_A” “Tranche_Refinancing”
[43] “Tranche_Remark” “Tranche_Type”
[45] “vintage_match”

Show the code
save(dt.DealScan_uniborrow_public, file = "DealScan_uniborrow_public_cleaned.RData") 

## separate the link table into a different file. 
dt.DealScan_link_table_public <- select(dt.DealScan_uniborrow_public, 
                                        obs_id, # identifier for the firm-deal-tranche/loan-time observations 
                                        deal_time_id, # identifier for the firm-deal-time observations
                                        gvkey_group1, # unique identifier for each firm 
                                        gvkey, # unique identifer in Compustat 
                                        Tranche_Active_Date, Tranche_Active_YM # observation time 
                                        )

save(dt.DealScan_link_table_public, file = "DealScan_link_table.RData") 

From all procedure above, the raw DealScan data, DealScan_borrower.RData, is cleaned and saved into DealScan_uniborrow_public_cleaned.RData, which contains only observations from public firms. The link table is also updated and now saved in file DealScan_link_table.RData. The new link table will be used to link the DealScan dataset to the Compustat dataset.

Refine the deal path identification

Reference

  • Related Peter Demerjian
    • Measuring the Probability of Financial Covenant Violation in Private Debt Contracts (with E. Owens). Journal of Accounting and Economics 61: 433-447 (2016). > CODE
  • Corporate Refinancing, Covenants, and the Agency Cost of Debt - Green (2018)
  • Covenant removal in corporate bonds - NHH paper (2023)

To-do list:

  • Exclude text with amendment fee in Event_renegotiation. > Line 237.
  • July 7, 2024
    • Use the maturity time to further distinguish the deals.
    • Connect to Compustat.

===

Trashbin:

Show the code
### 


# ------------------------------------------------------------------

### from section S2 - June 13, 2024 
## create a dataset for the unique id for each unique observations. 
dt.Link_table_id <- Link_table %>% 
  select(gvkey, `LoanConnector Company ID`, borrowercompanyid, facilitystartdate) %>% 
  group_by(gvkey, `LoanConnector Company ID`, borrowercompanyid) %>% 
  summarise(facilitystartdate_min = min(facilitystartdate)) %>% 
  ungroup() %>% 
  group_by(gvkey) %>% 
  arrange(facilitystartdate_min) %>% 
  mutate(gvkey_group = cur_group_id()) %>% # generate the group_id based on `gvkey` 
  ungroup() 

## only contain self-created prelimiary gvkey_group(s):
comp_id_gvkey <- dt.Link_table_id %>%
  select(gvkey, `LoanConnector Company ID`, gvkey_group) %>%
  group_by(`LoanConnector Company ID`) %>%
  summarise(company_id_to_gvkey = paste0(paste0(unique(gvkey), collapse = ";"), ";"), # for `gvkey`
            company_id_to_gvkey_group = paste0(paste0(unique(gvkey_group), collapse = ";"), ";"), # for `gvkey_group`
            n_gvkey = length(unique(gvkey)) # number of gvkey(s) within each `company_ID` group.
            ) %>%
  ungroup()

comp_id_gvkey

dt.gvkey_group1 <- dt.Link_table_id %>%
  left_join(comp_id_gvkey, by = "LoanConnector Company ID") %>%
  arrange(gvkey) %>%
  # head(30) %>%
  group_by(gvkey) %>% # for each given `gvkey`, find the associated `gvkey`(s) based on `company_ID`.
  summarise( # re-construct the list of linked `gvkey`(s)
    connected_gvkey = paste(unique(unlist(strsplit(company_id_to_gvkey, split = ";"))), collapse = ";")
  ) %>%
  ungroup() %>%
  select(connected_gvkey) %>%
  distinct() %>%
  mutate(gvkey_group1 = 1:nrow(.))


dt.Link_table_gvkey <- str_split(string = dt.gvkey_group1$connected_gvkey, pattern = ";", simplify = F) %>%
  `names<-`(value = dt.gvkey_group1$gvkey_group1) %>%
  unlist(.) %>%
  data.frame(gvkey = ., gvkey_group1 = names(.)) %>%
  mutate(gvkey = as.numeric(gvkey))

### merge back to the link table
dt.Link_table_id2 <- dt.Link_table_id %>%
  select(-facilitystartdate_min, -gvkey_group) %>%
  left_join(dt.Link_table_gvkey, by = "gvkey")

dt.Link_table_id2

## second round checking

### the arguement here is that for all `company ID` previously identified from the same gvkey, I now map it back based on the `company ID` and double check whether the new `gvkey_group` is the same as the original ones. 
### i.e. I am checking whether the same `comapny ID` maps to multiple `gvkey`(s).