LOAD PACKAGES

library( foreign )
library( memisc )
library( knitr )
library( dplyr )
library( xtable )
library( stargazer )
library( broom )
library( pander )

BUILD HAZARD DATASET

dat <- read.dta( "./Data/NCCS-Digitized-Ruledate-1998.dta" )

head( dat ) %>% pander
Table continues below
ein fisyr Accrual age FS_Totrev_adj UNAgrand HHI
10211502 1999 1 1 77197 0 0.3314
10211502 2000 1 2 302158 0 0.6505
10400759 1998 0 0 128926 57.66 0.6177
10400759 1999 0 1 194122 233.4 0.5867
10400759 2000 0 2 72074 268 0.4975
10400759 2001 0 3 244222 253.1 0.9673
Table continues below
GovtMoneyRat FixedCostRat EqRat_w SurplusRat_ndrop_w892 ProfFundFeeYes
0.3396 0 463788 0.2534 0
0.06093 0.3853 688635 0.3798 0
0.001637 0.03859 4108 0.3798 0
0.2732 0.2023 7.711 0.3798 0
0 0.2463 7.729 -0.02354 0
0 0.4326 12.2 0.3798 0
Table continues below
Subsector2 FY1998 FY1999 FY2000 FY2001 FY2002 FY2003
Edu(Exclu Higher) 0 1 0 0 0 0
Edu(Exclu Higher) 0 0 1 0 0 0
Edu(Exclu Higher) 1 0 0 0 0 0
Edu(Exclu Higher) 0 1 0 0 0 0
Edu(Exclu Higher) 0 0 1 0 0 0
Edu(Exclu Higher) 0 0 0 1 0 0
JustNowProfessionalized waffle
NA 0
1 0
NA 0
NA 0
NA 0
1 0

Summary Stats

summary(dat) %>% pander
Table continues below
ein fisyr Accrual age
Min. : 10211502 Min. :1998 Min. :0.0000 Min. :0.000
1st Qu.:311586601 1st Qu.:1999 1st Qu.:0.0000 1st Qu.:1.000
Median :522067136 Median :2000 Median :0.0000 Median :2.000
Mean :507363852 Mean :2001 Mean :0.4084 Mean :2.513
3rd Qu.:742838818 3rd Qu.:2002 3rd Qu.:1.0000 3rd Qu.:4.000
Max. :996081402 Max. :2003 Max. :1.0000 Max. :5.000
NA NA NA NA
Table continues below
FS_Totrev_adj UNAgrand HHI GovtMoneyRat
Min. : -4359 Min. :-352553.2 Min. :0.0000 Min. :0.0000
1st Qu.: 34215 1st Qu.: 0.0 1st Qu.:0.5403 1st Qu.:0.0000
Median : 112923 Median : 0.0 Median :0.8701 Median :0.0000
Mean : 1943535 Mean : 679.2 Mean :0.7361 Mean :0.1032
3rd Qu.: 366257 3rd Qu.: 44.8 3rd Qu.:0.9993 3rd Qu.:0.0000
Max. :1274332605 Max. :1276196.1 Max. :1.0000 Max. :1.0000
NA NA NA NA
Table continues below
FixedCostRat EqRat_w SurplusRat_ndrop_w892 ProfFundFeeYes
Min. :0.00000 Min. : 0.0 Min. :-6.76835 Min. :0.00000
1st Qu.:0.00000 1st Qu.: 1.9 1st Qu.:-0.49853 1st Qu.:0.00000
Median :0.00000 Median : 42.6 Median :-0.01344 Median :0.00000
Mean :0.06252 Mean : 74747.7 Mean :-1.08009 Mean :0.04067
3rd Qu.:0.06221 3rd Qu.: 24912.0 3rd Qu.: 0.14060 3rd Qu.:0.00000
Max. :1.39599 Max. :2605173.0 Max. : 0.37984 Max. :1.00000
NA NA NA’s :16 NA
Table continues below
Subsector2 FY1998 FY1999
Arts : 4189 Min. :0.0000 Min. :0.0000
Health : 4023 1st Qu.:0.0000 1st Qu.:0.0000
Human Svcs :16338 Median :0.0000 Median :0.0000
Public : 6901 Mean :0.1312 Mean :0.1891
Edu(Exclu Higher) : 7910 3rd Qu.:0.0000 3rd Qu.:0.0000
Higher Edu or Hospitals: 765 Max. :1.0000 Max. :1.0000
Other : 6243 NA NA
Table continues below
FY2000 FY2001 FY2002 FY2003
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.1845 Mean :0.1799 Mean :0.1611 Mean :0.1542
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
NA NA NA NA
JustNowProfessionalized waffle
Min. :0.00 Min. :0
1st Qu.:0.00 1st Qu.:0
Median :0.00 Median :0
Mean :0.19 Mean :0
3rd Qu.:0.00 3rd Qu.:0
Max. :1.00 Max. :0
NA’s :32225 NA

Original DV

table( dat$fisyr, dat$JustNowProfessionalized )
##       
##           0    1
##   1998    0    0
##   1999 1646  623
##   2000 2396  712
##   2001 2523  546
##   2002 2544  438
##   2003 2359  357
prop.table( table( dat$fisyr, dat$JustNowProfessionalized ), margin=1 )
##       
##                0         1
##   1998                    
##   1999 0.7254297 0.2745703
##   2000 0.7709138 0.2290862
##   2001 0.8220919 0.1779081
##   2002 0.8531187 0.1468813
##   2003 0.8685567 0.1314433
table( tapply( dat$JustNowProfessionalized, dat$ein, sum, na.rm=T ) )
## 
##     0     1 
## 10764  2676

Create New DV

prof <- NULL

fr <- NULL

for( i in unique(dat$ein) )
{
  one.org <- dat[ dat$ein == i , ]
  
  # first.year <- min(one.org$fisyr)
  
  first.rev <- one.org$FS_Totrev_adj[ which.min(one.org$fisyr) ]
  
  fr <- c(fr,first.rev)
  
  if( first.rev > 100000 )
  { 
    spell <- rep(NA, nrow(one.org)) 
    prof <- c( prof, spell )
  }
  
  if(first.rev < 100000 )
  {
    spell <- ifelse( one.org$FS_Totrev_adj < 100000, 0, 1 )
    # drop obs after professionalizes
    num.prof.periods <- cumsum(spell)
    is.prof <- num.prof.periods > 0
    is.prof[ is.prof== F ] <- NA
    post.prof.periods <- duplicated(is.prof, incomparables=NA )
    spell[ post.prof.periods ] <- NA
    
    prof <- c( prof, spell )
  }
  
}



dat <- cbind( prof, dat )
dd <- dat[ ! is.na(dat$prof ) , ]

dd <- dd[ ! is.na(dd$SurplusRat_ndrop_w892) , ]

# exclude hospital or higher ed

dd <- dd[ ! dd$Subsector2 == "Higher Edu or Hospitals" , ]

nrow( dd )
## [1] 22026
# First year they apear in the data

table( tapply( dd$fisyr, dd$ein, min ) )
## 
## 1998 1999 2000 2001 2002 2003 
## 2986 2237 1027  760  424  310
# Length of spells

table( table( dd$ein ))
## 
##    1    2    3    4    5    6 
## 1535 2384 1450 1010  857  508

NEW DV

Create new DV according to the rules:

Here is the sample breakdown:

Length of spells for grassroots orgs:

1 | 2 | 3 | 4 | 5 | 6 ==|=======|=====|====|===|===== 1541 | 2404 | 1460 | 1016 | 866 | 509

Which means there are 22,177 rows of data (observation periods).

There are 14 values in the SurplusRat_ndrop_w892 variable that are missing, so that drops to 22,163.

There are 49 hospitals and universities, accounting for 137 total spells.

After dropping we are down to 22,026.

Defining Other Study Vars

D1 <- ifelse( dat$fisyr == 1998, 1, 0 )
D2 <- ifelse( dat$fisyr == 1999, 1, 0 )
D3 <- ifelse( dat$fisyr == 2000, 1, 0 )
D4 <- ifelse( dat$fisyr == 2001, 1, 0 )
D5 <- ifelse( dat$fisyr == 2002, 1, 0 )
D6 <- ifelse( dat$fisyr == 2003, 1, 0 )


AGE <- dat$age + 1

Age2 <- AGE * AGE

Age3 <- AGE * AGE * AGE

Age4 <- AGE * AGE * AGE * AGE

Age5 <- AGE * AGE * AGE * AGE * AGE



dat <- cbind( dat, D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5 )

rm( D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5, prof )

Drop Born Large Orgs

# number of nonprofits in sample

length( unique( dat$ein ))
## [1] 13440
# original number of obs

nrow( dat )
## [1] 46369
write.dta( dat, "./Data/Aug_Profess_Build_Data_Ruledate1998_vNoWafflesParedwRev_FROMJESSE.dta" )

# remove the born large orgs

dat <- dat[ ! is.na(dat$prof) , ]

nrow( dat )
## [1] 22177

Sanity Check

# number of grassroots nonprofits in the sample

length( unique( dat$ein ))
## [1] 7796
# number that professionalize

sum(dat$prof)
## [1] 2474
# total proportion that professionalize

sum(dat$prof) / length( unique( dat$ein ))
## [1] 0.3173422
# check to make sure none professionalize twice

table( tapply( dat$prof, dat$ein, sum ) )
## 
##    0    1 
## 5322 2474
# when do orgs professionalize

table( dat$fisyr, dat$prof )
##       
##           0    1
##   1998 3008    0
##   1999 3898  623
##   2000 3519  685
##   2001 3430  504
##   2002 3016  352
##   2003 2832  310
prop.table( table( dat$fisyr, dat$prof ), margin=1 )
##       
##                 0          1
##   1998 1.00000000 0.00000000
##   1999 0.86219863 0.13780137
##   2000 0.83705994 0.16294006
##   2001 0.87188612 0.12811388
##   2002 0.89548694 0.10451306
##   2003 0.90133673 0.09866327

DROP ODD CASES

# drop 14 missing Surplus Ratio cases

dat <- dat[ ! is.na(dat$SurplusRat_ndrop_w892) , ]

# exclude hospital or higher ed

dat <- dat[ ! dat$Subsector2 == "Higher Edu or Hospitals" , ]

nrow( dat )
## [1] 22026

CONVERT EQUITY RATIO TO THOUSANDS

# convert EqRat_w to thousands to have meaningful reg coefficients

summary( dat$EqRat_w )
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       3    4173   29615   28173 2605173
dat$EqRat_w_K <- dat$EqRat_w / 1000

WRITE DATASET

write.csv( dat, "CompleteHazardSpells.csv" )

saveRDS( dat, "./Data/CompleteHazardSpells.rds" )