library( foreign )
library( memisc )
library( knitr )
library( dplyr )
library( xtable )
library( stargazer )
library( broom )
library( pander )
<- read.dta( "./Data/NCCS-Digitized-Ruledate-1998.dta" )
dat
head( dat ) %>% pander
ein | fisyr | Accrual | age | FS_Totrev_adj | UNAgrand | HHI |
---|---|---|---|---|---|---|
10211502 | 1999 | 1 | 1 | 77197 | 0 | 0.3314 |
10211502 | 2000 | 1 | 2 | 302158 | 0 | 0.6505 |
10400759 | 1998 | 0 | 0 | 128926 | 57.66 | 0.6177 |
10400759 | 1999 | 0 | 1 | 194122 | 233.4 | 0.5867 |
10400759 | 2000 | 0 | 2 | 72074 | 268 | 0.4975 |
10400759 | 2001 | 0 | 3 | 244222 | 253.1 | 0.9673 |
GovtMoneyRat | FixedCostRat | EqRat_w | SurplusRat_ndrop_w892 | ProfFundFeeYes |
---|---|---|---|---|
0.3396 | 0 | 463788 | 0.2534 | 0 |
0.06093 | 0.3853 | 688635 | 0.3798 | 0 |
0.001637 | 0.03859 | 4108 | 0.3798 | 0 |
0.2732 | 0.2023 | 7.711 | 0.3798 | 0 |
0 | 0.2463 | 7.729 | -0.02354 | 0 |
0 | 0.4326 | 12.2 | 0.3798 | 0 |
Subsector2 | FY1998 | FY1999 | FY2000 | FY2001 | FY2002 | FY2003 |
---|---|---|---|---|---|---|
Edu(Exclu Higher) | 0 | 1 | 0 | 0 | 0 | 0 |
Edu(Exclu Higher) | 0 | 0 | 1 | 0 | 0 | 0 |
Edu(Exclu Higher) | 1 | 0 | 0 | 0 | 0 | 0 |
Edu(Exclu Higher) | 0 | 1 | 0 | 0 | 0 | 0 |
Edu(Exclu Higher) | 0 | 0 | 1 | 0 | 0 | 0 |
Edu(Exclu Higher) | 0 | 0 | 0 | 1 | 0 | 0 |
JustNowProfessionalized | waffle |
---|---|
NA | 0 |
1 | 0 |
NA | 0 |
NA | 0 |
NA | 0 |
1 | 0 |
summary(dat) %>% pander
ein | fisyr | Accrual | age |
---|---|---|---|
Min. : 10211502 | Min. :1998 | Min. :0.0000 | Min. :0.000 |
1st Qu.:311586601 | 1st Qu.:1999 | 1st Qu.:0.0000 | 1st Qu.:1.000 |
Median :522067136 | Median :2000 | Median :0.0000 | Median :2.000 |
Mean :507363852 | Mean :2001 | Mean :0.4084 | Mean :2.513 |
3rd Qu.:742838818 | 3rd Qu.:2002 | 3rd Qu.:1.0000 | 3rd Qu.:4.000 |
Max. :996081402 | Max. :2003 | Max. :1.0000 | Max. :5.000 |
NA | NA | NA | NA |
FS_Totrev_adj | UNAgrand | HHI | GovtMoneyRat |
---|---|---|---|
Min. : -4359 | Min. :-352553.2 | Min. :0.0000 | Min. :0.0000 |
1st Qu.: 34215 | 1st Qu.: 0.0 | 1st Qu.:0.5403 | 1st Qu.:0.0000 |
Median : 112923 | Median : 0.0 | Median :0.8701 | Median :0.0000 |
Mean : 1943535 | Mean : 679.2 | Mean :0.7361 | Mean :0.1032 |
3rd Qu.: 366257 | 3rd Qu.: 44.8 | 3rd Qu.:0.9993 | 3rd Qu.:0.0000 |
Max. :1274332605 | Max. :1276196.1 | Max. :1.0000 | Max. :1.0000 |
NA | NA | NA | NA |
FixedCostRat | EqRat_w | SurplusRat_ndrop_w892 | ProfFundFeeYes |
---|---|---|---|
Min. :0.00000 | Min. : 0.0 | Min. :-6.76835 | Min. :0.00000 |
1st Qu.:0.00000 | 1st Qu.: 1.9 | 1st Qu.:-0.49853 | 1st Qu.:0.00000 |
Median :0.00000 | Median : 42.6 | Median :-0.01344 | Median :0.00000 |
Mean :0.06252 | Mean : 74747.7 | Mean :-1.08009 | Mean :0.04067 |
3rd Qu.:0.06221 | 3rd Qu.: 24912.0 | 3rd Qu.: 0.14060 | 3rd Qu.:0.00000 |
Max. :1.39599 | Max. :2605173.0 | Max. : 0.37984 | Max. :1.00000 |
NA | NA | NA’s :16 | NA |
Subsector2 | FY1998 | FY1999 |
---|---|---|
Arts : 4189 | Min. :0.0000 | Min. :0.0000 |
Health : 4023 | 1st Qu.:0.0000 | 1st Qu.:0.0000 |
Human Svcs :16338 | Median :0.0000 | Median :0.0000 |
Public : 6901 | Mean :0.1312 | Mean :0.1891 |
Edu(Exclu Higher) : 7910 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 |
Higher Edu or Hospitals: 765 | Max. :1.0000 | Max. :1.0000 |
Other : 6243 | NA | NA |
FY2000 | FY2001 | FY2002 | FY2003 |
---|---|---|---|
Min. :0.0000 | Min. :0.0000 | Min. :0.0000 | Min. :0.0000 |
1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 |
Median :0.0000 | Median :0.0000 | Median :0.0000 | Median :0.0000 |
Mean :0.1845 | Mean :0.1799 | Mean :0.1611 | Mean :0.1542 |
3rd Qu.:0.0000 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 |
Max. :1.0000 | Max. :1.0000 | Max. :1.0000 | Max. :1.0000 |
NA | NA | NA | NA |
JustNowProfessionalized | waffle |
---|---|
Min. :0.00 | Min. :0 |
1st Qu.:0.00 | 1st Qu.:0 |
Median :0.00 | Median :0 |
Mean :0.19 | Mean :0 |
3rd Qu.:0.00 | 3rd Qu.:0 |
Max. :1.00 | Max. :0 |
NA’s :32225 | NA |
table( dat$fisyr, dat$JustNowProfessionalized )
##
## 0 1
## 1998 0 0
## 1999 1646 623
## 2000 2396 712
## 2001 2523 546
## 2002 2544 438
## 2003 2359 357
prop.table( table( dat$fisyr, dat$JustNowProfessionalized ), margin=1 )
##
## 0 1
## 1998
## 1999 0.7254297 0.2745703
## 2000 0.7709138 0.2290862
## 2001 0.8220919 0.1779081
## 2002 0.8531187 0.1468813
## 2003 0.8685567 0.1314433
table( tapply( dat$JustNowProfessionalized, dat$ein, sum, na.rm=T ) )
##
## 0 1
## 10764 2676
<- NULL
prof
<- NULL
fr
for( i in unique(dat$ein) )
{<- dat[ dat$ein == i , ]
one.org
# first.year <- min(one.org$fisyr)
<- one.org$FS_Totrev_adj[ which.min(one.org$fisyr) ]
first.rev
<- c(fr,first.rev)
fr
if( first.rev > 100000 )
{ <- rep(NA, nrow(one.org))
spell <- c( prof, spell )
prof
}
if(first.rev < 100000 )
{<- ifelse( one.org$FS_Totrev_adj < 100000, 0, 1 )
spell # drop obs after professionalizes
<- cumsum(spell)
num.prof.periods <- num.prof.periods > 0
is.prof == F ] <- NA
is.prof[ is.prof<- duplicated(is.prof, incomparables=NA )
post.prof.periods <- NA
spell[ post.prof.periods ]
<- c( prof, spell )
prof
}
}
<- cbind( prof, dat ) dat
<- dat[ ! is.na(dat$prof ) , ]
dd
<- dd[ ! is.na(dd$SurplusRat_ndrop_w892) , ]
dd
# exclude hospital or higher ed
<- dd[ ! dd$Subsector2 == "Higher Edu or Hospitals" , ]
dd
nrow( dd )
## [1] 22026
# First year they apear in the data
table( tapply( dd$fisyr, dd$ein, min ) )
##
## 1998 1999 2000 2001 2002 2003
## 2986 2237 1027 760 424 310
# Length of spells
table( table( dd$ein ))
##
## 1 2 3 4 5 6
## 1535 2384 1450 1010 857 508
Create new DV according to the rules:
Here is the sample breakdown:
Length of spells for grassroots orgs:
1 | 2 | 3 | 4 | 5 | 6 ==|=======|=====|====|===|===== 1541 | 2404 | 1460 | 1016 | 866 | 509
Which means there are 22,177 rows of data (observation periods).
There are 14 values in the SurplusRat_ndrop_w892 variable that are missing, so that drops to 22,163.
There are 49 hospitals and universities, accounting for 137 total spells.
After dropping we are down to 22,026.
<- ifelse( dat$fisyr == 1998, 1, 0 )
D1 <- ifelse( dat$fisyr == 1999, 1, 0 )
D2 <- ifelse( dat$fisyr == 2000, 1, 0 )
D3 <- ifelse( dat$fisyr == 2001, 1, 0 )
D4 <- ifelse( dat$fisyr == 2002, 1, 0 )
D5 <- ifelse( dat$fisyr == 2003, 1, 0 )
D6
<- dat$age + 1
AGE
<- AGE * AGE
Age2
<- AGE * AGE * AGE
Age3
<- AGE * AGE * AGE * AGE
Age4
<- AGE * AGE * AGE * AGE * AGE
Age5
<- cbind( dat, D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5 )
dat
rm( D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5, prof )
# number of nonprofits in sample
length( unique( dat$ein ))
## [1] 13440
# original number of obs
nrow( dat )
## [1] 46369
write.dta( dat, "./Data/Aug_Profess_Build_Data_Ruledate1998_vNoWafflesParedwRev_FROMJESSE.dta" )
# remove the born large orgs
<- dat[ ! is.na(dat$prof) , ]
dat
nrow( dat )
## [1] 22177
# number of grassroots nonprofits in the sample
length( unique( dat$ein ))
## [1] 7796
# number that professionalize
sum(dat$prof)
## [1] 2474
# total proportion that professionalize
sum(dat$prof) / length( unique( dat$ein ))
## [1] 0.3173422
# check to make sure none professionalize twice
table( tapply( dat$prof, dat$ein, sum ) )
##
## 0 1
## 5322 2474
# when do orgs professionalize
table( dat$fisyr, dat$prof )
##
## 0 1
## 1998 3008 0
## 1999 3898 623
## 2000 3519 685
## 2001 3430 504
## 2002 3016 352
## 2003 2832 310
prop.table( table( dat$fisyr, dat$prof ), margin=1 )
##
## 0 1
## 1998 1.00000000 0.00000000
## 1999 0.86219863 0.13780137
## 2000 0.83705994 0.16294006
## 2001 0.87188612 0.12811388
## 2002 0.89548694 0.10451306
## 2003 0.90133673 0.09866327
# drop 14 missing Surplus Ratio cases
<- dat[ ! is.na(dat$SurplusRat_ndrop_w892) , ]
dat
# exclude hospital or higher ed
<- dat[ ! dat$Subsector2 == "Higher Edu or Hospitals" , ]
dat
nrow( dat )
## [1] 22026
# convert EqRat_w to thousands to have meaningful reg coefficients
summary( dat$EqRat_w )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 3 4173 29615 28173 2605173
$EqRat_w_K <- dat$EqRat_w / 1000 dat
write.csv( dat, "CompleteHazardSpells.csv" )
saveRDS( dat, "./Data/CompleteHazardSpells.rds" )