output: word_document
library( foreign )
library( memisc )
library( knitr )
library( dplyr )
library( xtable )
library( stargazer )
library( broom )
library( pander )
<- readRDS( "./Data/CompleteHazardSpells.rds" )
dat
# lapply( dat, class )
#
# head( dat, 25 ) %>% pander
<- dat[ ,c("ein","fisyr","age","FS_Totrev_adj","prof","JustNowProfessionalized") ]
dat.sub
head( dat.sub, 100 ) %>% pander
ein | fisyr | age | FS_Totrev_adj | prof | JustNowProfessionalized | |
---|---|---|---|---|---|---|
1 | 10211502 | 1999 | 1 | 77197 | 0 | NA |
2 | 10211502 | 2000 | 2 | 302158 | 1 | 1 |
10 | 10435507 | 2001 | 3 | 22254 | 0 | NA |
11 | 10435507 | 2002 | 4 | 38846 | 0 | 0 |
18 | 10482891 | 1999 | 1 | 48137 | 0 | NA |
19 | 10482891 | 2000 | 2 | 46809 | 0 | 0 |
20 | 10482891 | 2001 | 3 | 46344 | 0 | 0 |
21 | 10482891 | 2003 | 5 | 39729 | 0 | NA |
27 | 10494938 | 1998 | 0 | 0 | 0 | NA |
28 | 10494938 | 1999 | 1 | 0 | 0 | 0 |
29 | 10494938 | 2000 | 2 | 134188 | 1 | 1 |
30 | 10500759 | 2000 | 2 | 3964 | 0 | NA |
31 | 10500759 | 2001 | 3 | 2414 | 0 | 0 |
32 | 10500759 | 2002 | 4 | 2342 | 0 | 0 |
33 | 10500759 | 2003 | 5 | 5286 | 0 | 0 |
35 | 10503145 | 1998 | 0 | 30471 | 0 | NA |
36 | 10503145 | 2000 | 2 | 77109 | 0 | NA |
37 | 10503145 | 2001 | 3 | 230863 | 1 | 1 |
44 | 10506600 | 2000 | 2 | 0 | 0 | NA |
45 | 10506600 | 2001 | 3 | 38077 | 0 | 0 |
46 | 10506600 | 2002 | 4 | 31202 | 0 | 0 |
47 | 10506600 | 2003 | 5 | 29743 | 0 | 0 |
60 | 10510481 | 1999 | 1 | 0 | 0 | NA |
61 | 10510481 | 2000 | 2 | 0 | 0 | 0 |
62 | 10510481 | 2001 | 3 | 85853 | 0 | 0 |
63 | 10510481 | 2002 | 4 | 75063 | 0 | 0 |
64 | 10510481 | 2003 | 5 | 48988 | 0 | 0 |
65 | 10510909 | 1998 | 0 | 59352 | 0 | NA |
66 | 10510909 | 1999 | 1 | 51470 | 0 | 0 |
67 | 10510909 | 2000 | 2 | 56985 | 0 | 0 |
68 | 10510909 | 2001 | 3 | 55693 | 0 | 0 |
69 | 10510909 | 2002 | 4 | 62800 | 0 | 0 |
70 | 10510909 | 2003 | 5 | 62556 | 0 | 0 |
71 | 10511056 | 1998 | 0 | 21196 | 0 | NA |
72 | 10511056 | 1999 | 1 | 24823 | 0 | 0 |
73 | 10511056 | 2000 | 2 | 21049 | 0 | 0 |
74 | 10511056 | 2001 | 3 | 31252 | 0 | 0 |
75 | 10511056 | 2002 | 4 | 25123 | 0 | 0 |
82 | 10512664 | 2000 | 2 | 20980 | 0 | NA |
83 | 10512664 | 2002 | 4 | 20555 | 0 | NA |
84 | 10512986 | 2000 | 2 | 0 | 0 | NA |
85 | 10513088 | 1998 | 0 | 23433 | 0 | NA |
86 | 10513088 | 1999 | 1 | 21718 | 0 | 0 |
87 | 10513088 | 2000 | 2 | 17017 | 0 | 0 |
88 | 10513181 | 2000 | 2 | 8972 | 0 | NA |
89 | 10513181 | 2002 | 4 | 21996 | 0 | NA |
90 | 10513181 | 2003 | 5 | 69734 | 0 | 0 |
91 | 10513268 | 2000 | 2 | 33267 | 0 | NA |
96 | 10513496 | 1998 | 0 | 36494 | 0 | NA |
97 | 10513496 | 1999 | 1 | 44541 | 0 | 0 |
98 | 10513496 | 2000 | 2 | 46138 | 0 | 0 |
99 | 10513496 | 2001 | 3 | 45439 | 0 | 0 |
100 | 10513496 | 2002 | 4 | 50838 | 0 | 0 |
101 | 10513496 | 2003 | 5 | 56575 | 0 | 0 |
102 | 10513575 | 1999 | 1 | 1418 | 0 | NA |
103 | 10513575 | 2000 | 2 | 4418 | 0 | 0 |
104 | 10513575 | 2002 | 4 | 1220 | 0 | NA |
105 | 10513575 | 2003 | 5 | 1984 | 0 | 0 |
106 | 10513589 | 1998 | 0 | 52034 | 0 | NA |
107 | 10513589 | 1999 | 1 | 166141 | 1 | 1 |
111 | 10514230 | 1999 | 1 | 0 | 0 | NA |
112 | 10514230 | 2000 | 2 | 313509 | 1 | 1 |
113 | 10514747 | 1998 | 0 | 0 | 0 | NA |
114 | 10514747 | 2000 | 2 | 0 | 0 | NA |
119 | 10515050 | 1999 | 1 | 47215 | 0 | NA |
120 | 10515050 | 2000 | 2 | 43059 | 0 | 0 |
121 | 10515050 | 2001 | 3 | 95275 | 0 | 0 |
122 | 10515050 | 2002 | 4 | 58865 | 0 | 0 |
123 | 10515050 | 2003 | 5 | 34193 | 0 | 0 |
124 | 10515357 | 2000 | 2 | 3226 | 0 | NA |
125 | 10515357 | 2002 | 4 | 58926 | 0 | NA |
126 | 10515357 | 2003 | 5 | 44397 | 0 | 0 |
127 | 10515363 | 2000 | 2 | 62831 | 0 | NA |
128 | 10515363 | 2001 | 3 | 225484 | 1 | 1 |
146 | 10516071 | 1998 | 0 | 89105 | 0 | NA |
147 | 10516071 | 1999 | 1 | 224349 | 1 | 1 |
152 | 10516434 | 2000 | 2 | 11405 | 0 | NA |
153 | 10516434 | 2001 | 3 | 6983 | 0 | 0 |
154 | 10516434 | 2003 | 5 | 8940 | 0 | NA |
155 | 10516517 | 1999 | 1 | 31566 | 0 | NA |
156 | 10516517 | 2000 | 2 | 183391 | 1 | 1 |
157 | 10516579 | 1999 | 1 | 0 | 0 | NA |
158 | 10516579 | 2001 | 3 | 0 | 0 | NA |
159 | 10516646 | 1998 | 0 | 67462 | 0 | NA |
160 | 10516646 | 1999 | 1 | 63669 | 0 | 0 |
161 | 10516646 | 2000 | 2 | 194600 | 1 | 1 |
162 | 10517116 | 2001 | 3 | 1350 | 0 | NA |
163 | 10517116 | 2002 | 4 | 0 | 0 | 0 |
174 | 10517798 | 1999 | 1 | 38686 | 0 | NA |
175 | 10517798 | 2000 | 2 | 137693 | 1 | 1 |
182 | 10518008 | 2000 | 2 | 33966 | 0 | NA |
188 | 10518193 | 1999 | 1 | 0 | 0 | NA |
189 | 10518193 | 2000 | 2 | 132529 | 1 | 1 |
193 | 10518534 | 1999 | 1 | 0 | 0 | NA |
194 | 10518534 | 2000 | 2 | 209403 | 1 | 1 |
195 | 10518763 | 2001 | 3 | 6281 | 0 | NA |
196 | 10518763 | 2002 | 4 | 35310 | 0 | 0 |
197 | 10518763 | 2003 | 5 | 34798 | 0 | 0 |
198 | 10519140 | 1999 | 1 | 12807 | 0 | NA |
199 | 10519382 | 1998 | 0 | 55193 | 0 | NA |
NOTE - do we want to experiment with imputing observations backwards for sensitivity analysis?
NOTE - if we don’t impute spell lengths backwards, should we set age to first year in dataset?
# count of spells by length
table( table( dat$ein ) )
##
## 1 2 3 4 5 6
## 1535 2384 1450 1010 857 508
# proportion that professionalize
sum( dat$prof ) / length(unique(dat$ein))
## [1] 0.3172779
Number that never professionalize - are right-censored because of end of study.
sum( dat$prof == 0 & dat$age == 5 )
## [1] 2815
# 2815 that never professioalize
# 7744 orgs
2815 / 7744
## [1] 0.3635072
Those that drop out before professionalization:
<- tapply( dat$prof, dat$ein, sum )
do.prof
<- names(do.prof)[ do.prof == 0 ]
these.do.not
<- dat[ dat$ein %in% these.do.not , ]
dat.not
nrow( dat.not )
## [1] 15384
table( tapply( dat.not$age, dat.not$ein, max ) )
##
## 0 1 2 3 4 5
## 362 495 437 493 685 2815
2472 / 7744 # do not report (fail)
## [1] 0.3192149
2815 / 7744 # right-censored (alive in 2003 but small)
## [1] 0.3635072
2457 / 7744 # professionalize
## [1] 0.3172779
table( tapply( dat.not$age, dat.not$ein, max ) ) /
table( dat$age )
##
## 0 1 2 3 4 5
## 0.1212324 0.1102695 0.1046456 0.1260870 0.2049671 0.9013769
table( dat$AGE )
##
## 1 2 3 4 5 6
## 2986 4489 4176 3910 3342 3123
table( dat$fisyr )
##
## 1998 1999 2000 2001 2002 2003
## 2986 4489 4176 3910 3342 3123
table( dat$age, dat$fisyr ) %>% kable
1998 | 1999 | 2000 | 2001 | 2002 | 2003 | |
---|---|---|---|---|---|---|
0 | 2986 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 4489 | 0 | 0 | 0 | 0 |
2 | 0 | 0 | 4176 | 0 | 0 | 0 |
3 | 0 | 0 | 0 | 3910 | 0 | 0 |
4 | 0 | 0 | 0 | 0 | 3342 | 0 |
5 | 0 | 0 | 0 | 0 | 0 | 3123 |
table( dat$fisyr, dat$prof ) %>% kable
0 | 1 | |
---|---|---|
1998 | 2986 | 0 |
1999 | 3873 | 616 |
2000 | 3494 | 682 |
2001 | 3411 | 499 |
2002 | 2990 | 352 |
2003 | 2815 | 308 |