library( foreign )
library( memisc )
library( knitr )
library( dplyr )
library( xtable )
library( stargazer )
library( broom )
library( pander )dat <- read.dta( "./Data/NCCS-Digitized-Ruledate-1998.dta" )
head( dat ) %>% pander| ein | fisyr | Accrual | age | FS_Totrev_adj | UNAgrand | HHI |
|---|---|---|---|---|---|---|
| 10211502 | 1999 | 1 | 1 | 77197 | 0 | 0.3314 |
| 10211502 | 2000 | 1 | 2 | 302158 | 0 | 0.6505 |
| 10400759 | 1998 | 0 | 0 | 128926 | 57.66 | 0.6177 |
| 10400759 | 1999 | 0 | 1 | 194122 | 233.4 | 0.5867 |
| 10400759 | 2000 | 0 | 2 | 72074 | 268 | 0.4975 |
| 10400759 | 2001 | 0 | 3 | 244222 | 253.1 | 0.9673 |
| GovtMoneyRat | FixedCostRat | EqRat_w | SurplusRat_ndrop_w892 | ProfFundFeeYes |
|---|---|---|---|---|
| 0.3396 | 0 | 463788 | 0.2534 | 0 |
| 0.06093 | 0.3853 | 688635 | 0.3798 | 0 |
| 0.001637 | 0.03859 | 4108 | 0.3798 | 0 |
| 0.2732 | 0.2023 | 7.711 | 0.3798 | 0 |
| 0 | 0.2463 | 7.729 | -0.02354 | 0 |
| 0 | 0.4326 | 12.2 | 0.3798 | 0 |
| Subsector2 | FY1998 | FY1999 | FY2000 | FY2001 | FY2002 | FY2003 |
|---|---|---|---|---|---|---|
| Edu(Exclu Higher) | 0 | 1 | 0 | 0 | 0 | 0 |
| Edu(Exclu Higher) | 0 | 0 | 1 | 0 | 0 | 0 |
| Edu(Exclu Higher) | 1 | 0 | 0 | 0 | 0 | 0 |
| Edu(Exclu Higher) | 0 | 1 | 0 | 0 | 0 | 0 |
| Edu(Exclu Higher) | 0 | 0 | 1 | 0 | 0 | 0 |
| Edu(Exclu Higher) | 0 | 0 | 0 | 1 | 0 | 0 |
| JustNowProfessionalized | waffle |
|---|---|
| NA | 0 |
| 1 | 0 |
| NA | 0 |
| NA | 0 |
| NA | 0 |
| 1 | 0 |
summary(dat) %>% pander| ein | fisyr | Accrual | age |
|---|---|---|---|
| Min. : 10211502 | Min. :1998 | Min. :0.0000 | Min. :0.000 |
| 1st Qu.:311586601 | 1st Qu.:1999 | 1st Qu.:0.0000 | 1st Qu.:1.000 |
| Median :522067136 | Median :2000 | Median :0.0000 | Median :2.000 |
| Mean :507363852 | Mean :2001 | Mean :0.4084 | Mean :2.513 |
| 3rd Qu.:742838818 | 3rd Qu.:2002 | 3rd Qu.:1.0000 | 3rd Qu.:4.000 |
| Max. :996081402 | Max. :2003 | Max. :1.0000 | Max. :5.000 |
| NA | NA | NA | NA |
| FS_Totrev_adj | UNAgrand | HHI | GovtMoneyRat |
|---|---|---|---|
| Min. : -4359 | Min. :-352553.2 | Min. :0.0000 | Min. :0.0000 |
| 1st Qu.: 34215 | 1st Qu.: 0.0 | 1st Qu.:0.5403 | 1st Qu.:0.0000 |
| Median : 112923 | Median : 0.0 | Median :0.8701 | Median :0.0000 |
| Mean : 1943535 | Mean : 679.2 | Mean :0.7361 | Mean :0.1032 |
| 3rd Qu.: 366257 | 3rd Qu.: 44.8 | 3rd Qu.:0.9993 | 3rd Qu.:0.0000 |
| Max. :1274332605 | Max. :1276196.1 | Max. :1.0000 | Max. :1.0000 |
| NA | NA | NA | NA |
| FixedCostRat | EqRat_w | SurplusRat_ndrop_w892 | ProfFundFeeYes |
|---|---|---|---|
| Min. :0.00000 | Min. : 0.0 | Min. :-6.76835 | Min. :0.00000 |
| 1st Qu.:0.00000 | 1st Qu.: 1.9 | 1st Qu.:-0.49853 | 1st Qu.:0.00000 |
| Median :0.00000 | Median : 42.6 | Median :-0.01344 | Median :0.00000 |
| Mean :0.06252 | Mean : 74747.7 | Mean :-1.08009 | Mean :0.04067 |
| 3rd Qu.:0.06221 | 3rd Qu.: 24912.0 | 3rd Qu.: 0.14060 | 3rd Qu.:0.00000 |
| Max. :1.39599 | Max. :2605173.0 | Max. : 0.37984 | Max. :1.00000 |
| NA | NA | NA’s :16 | NA |
| Subsector2 | FY1998 | FY1999 |
|---|---|---|
| Arts : 4189 | Min. :0.0000 | Min. :0.0000 |
| Health : 4023 | 1st Qu.:0.0000 | 1st Qu.:0.0000 |
| Human Svcs :16338 | Median :0.0000 | Median :0.0000 |
| Public : 6901 | Mean :0.1312 | Mean :0.1891 |
| Edu(Exclu Higher) : 7910 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 |
| Higher Edu or Hospitals: 765 | Max. :1.0000 | Max. :1.0000 |
| Other : 6243 | NA | NA |
| FY2000 | FY2001 | FY2002 | FY2003 |
|---|---|---|---|
| Min. :0.0000 | Min. :0.0000 | Min. :0.0000 | Min. :0.0000 |
| 1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 |
| Median :0.0000 | Median :0.0000 | Median :0.0000 | Median :0.0000 |
| Mean :0.1845 | Mean :0.1799 | Mean :0.1611 | Mean :0.1542 |
| 3rd Qu.:0.0000 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 |
| Max. :1.0000 | Max. :1.0000 | Max. :1.0000 | Max. :1.0000 |
| NA | NA | NA | NA |
| JustNowProfessionalized | waffle |
|---|---|
| Min. :0.00 | Min. :0 |
| 1st Qu.:0.00 | 1st Qu.:0 |
| Median :0.00 | Median :0 |
| Mean :0.19 | Mean :0 |
| 3rd Qu.:0.00 | 3rd Qu.:0 |
| Max. :1.00 | Max. :0 |
| NA’s :32225 | NA |
table( dat$fisyr, dat$JustNowProfessionalized )##
## 0 1
## 1998 0 0
## 1999 1646 623
## 2000 2396 712
## 2001 2523 546
## 2002 2544 438
## 2003 2359 357
prop.table( table( dat$fisyr, dat$JustNowProfessionalized ), margin=1 )##
## 0 1
## 1998
## 1999 0.7254297 0.2745703
## 2000 0.7709138 0.2290862
## 2001 0.8220919 0.1779081
## 2002 0.8531187 0.1468813
## 2003 0.8685567 0.1314433
table( tapply( dat$JustNowProfessionalized, dat$ein, sum, na.rm=T ) )##
## 0 1
## 10764 2676
prof <- NULL
fr <- NULL
for( i in unique(dat$ein) )
{
one.org <- dat[ dat$ein == i , ]
# first.year <- min(one.org$fisyr)
first.rev <- one.org$FS_Totrev_adj[ which.min(one.org$fisyr) ]
fr <- c(fr,first.rev)
if( first.rev > 100000 )
{
spell <- rep(NA, nrow(one.org))
prof <- c( prof, spell )
}
if(first.rev < 100000 )
{
spell <- ifelse( one.org$FS_Totrev_adj < 100000, 0, 1 )
# drop obs after professionalizes
num.prof.periods <- cumsum(spell)
is.prof <- num.prof.periods > 0
is.prof[ is.prof== F ] <- NA
post.prof.periods <- duplicated(is.prof, incomparables=NA )
spell[ post.prof.periods ] <- NA
prof <- c( prof, spell )
}
}
dat <- cbind( prof, dat )dd <- dat[ ! is.na(dat$prof ) , ]
dd <- dd[ ! is.na(dd$SurplusRat_ndrop_w892) , ]
# exclude hospital or higher ed
dd <- dd[ ! dd$Subsector2 == "Higher Edu or Hospitals" , ]
nrow( dd )## [1] 22026
# First year they apear in the data
table( tapply( dd$fisyr, dd$ein, min ) )##
## 1998 1999 2000 2001 2002 2003
## 2986 2237 1027 760 424 310
# Length of spells
table( table( dd$ein ))##
## 1 2 3 4 5 6
## 1535 2384 1450 1010 857 508
Create new DV according to the rules:
Here is the sample breakdown:
Length of spells for grassroots orgs:
1 | 2 | 3 | 4 | 5 | 6 ==|=======|=====|====|===|===== 1541 | 2404 | 1460 | 1016 | 866 | 509
Which means there are 22,177 rows of data (observation periods).
There are 14 values in the SurplusRat_ndrop_w892 variable that are missing, so that drops to 22,163.
There are 49 hospitals and universities, accounting for 137 total spells.
After dropping we are down to 22,026.
D1 <- ifelse( dat$fisyr == 1998, 1, 0 )
D2 <- ifelse( dat$fisyr == 1999, 1, 0 )
D3 <- ifelse( dat$fisyr == 2000, 1, 0 )
D4 <- ifelse( dat$fisyr == 2001, 1, 0 )
D5 <- ifelse( dat$fisyr == 2002, 1, 0 )
D6 <- ifelse( dat$fisyr == 2003, 1, 0 )
AGE <- dat$age + 1
Age2 <- AGE * AGE
Age3 <- AGE * AGE * AGE
Age4 <- AGE * AGE * AGE * AGE
Age5 <- AGE * AGE * AGE * AGE * AGE
dat <- cbind( dat, D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5 )
rm( D1, D2, D3, D4, D5, D6, AGE, Age2, Age3, Age4, Age5, prof )# number of nonprofits in sample
length( unique( dat$ein ))## [1] 13440
# original number of obs
nrow( dat )## [1] 46369
write.dta( dat, "./Data/Aug_Profess_Build_Data_Ruledate1998_vNoWafflesParedwRev_FROMJESSE.dta" )
# remove the born large orgs
dat <- dat[ ! is.na(dat$prof) , ]
nrow( dat )## [1] 22177
# number of grassroots nonprofits in the sample
length( unique( dat$ein ))## [1] 7796
# number that professionalize
sum(dat$prof)## [1] 2474
# total proportion that professionalize
sum(dat$prof) / length( unique( dat$ein ))## [1] 0.3173422
# check to make sure none professionalize twice
table( tapply( dat$prof, dat$ein, sum ) )##
## 0 1
## 5322 2474
# when do orgs professionalize
table( dat$fisyr, dat$prof )##
## 0 1
## 1998 3008 0
## 1999 3898 623
## 2000 3519 685
## 2001 3430 504
## 2002 3016 352
## 2003 2832 310
prop.table( table( dat$fisyr, dat$prof ), margin=1 )##
## 0 1
## 1998 1.00000000 0.00000000
## 1999 0.86219863 0.13780137
## 2000 0.83705994 0.16294006
## 2001 0.87188612 0.12811388
## 2002 0.89548694 0.10451306
## 2003 0.90133673 0.09866327
# drop 14 missing Surplus Ratio cases
dat <- dat[ ! is.na(dat$SurplusRat_ndrop_w892) , ]
# exclude hospital or higher ed
dat <- dat[ ! dat$Subsector2 == "Higher Edu or Hospitals" , ]
nrow( dat )## [1] 22026
# convert EqRat_w to thousands to have meaningful reg coefficients
summary( dat$EqRat_w )## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 3 4173 29615 28173 2605173
dat$EqRat_w_K <- dat$EqRat_w / 1000write.csv( dat, "CompleteHazardSpells.csv" )
saveRDS( dat, "./Data/CompleteHazardSpells.rds" )