output: word_document

SETUP

library( foreign )
library( memisc )
library( knitr )
library( dplyr )
library( xtable )
library( stargazer )
library( broom )
library( pander )

LOAD DATA

dat <- readRDS( "./Data/CompleteHazardSpells.rds" )

# lapply( dat, class )
#                   
# head( dat, 25 ) %>% pander

SPELLS

Inspect Spells

dat.sub <- dat[ ,c("ein","fisyr","age","FS_Totrev_adj","prof","JustNowProfessionalized") ]

head( dat.sub, 100 ) %>% pander
  ein fisyr age FS_Totrev_adj prof JustNowProfessionalized
1 10211502 1999 1 77197 0 NA
2 10211502 2000 2 302158 1 1
10 10435507 2001 3 22254 0 NA
11 10435507 2002 4 38846 0 0
18 10482891 1999 1 48137 0 NA
19 10482891 2000 2 46809 0 0
20 10482891 2001 3 46344 0 0
21 10482891 2003 5 39729 0 NA
27 10494938 1998 0 0 0 NA
28 10494938 1999 1 0 0 0
29 10494938 2000 2 134188 1 1
30 10500759 2000 2 3964 0 NA
31 10500759 2001 3 2414 0 0
32 10500759 2002 4 2342 0 0
33 10500759 2003 5 5286 0 0
35 10503145 1998 0 30471 0 NA
36 10503145 2000 2 77109 0 NA
37 10503145 2001 3 230863 1 1
44 10506600 2000 2 0 0 NA
45 10506600 2001 3 38077 0 0
46 10506600 2002 4 31202 0 0
47 10506600 2003 5 29743 0 0
60 10510481 1999 1 0 0 NA
61 10510481 2000 2 0 0 0
62 10510481 2001 3 85853 0 0
63 10510481 2002 4 75063 0 0
64 10510481 2003 5 48988 0 0
65 10510909 1998 0 59352 0 NA
66 10510909 1999 1 51470 0 0
67 10510909 2000 2 56985 0 0
68 10510909 2001 3 55693 0 0
69 10510909 2002 4 62800 0 0
70 10510909 2003 5 62556 0 0
71 10511056 1998 0 21196 0 NA
72 10511056 1999 1 24823 0 0
73 10511056 2000 2 21049 0 0
74 10511056 2001 3 31252 0 0
75 10511056 2002 4 25123 0 0
82 10512664 2000 2 20980 0 NA
83 10512664 2002 4 20555 0 NA
84 10512986 2000 2 0 0 NA
85 10513088 1998 0 23433 0 NA
86 10513088 1999 1 21718 0 0
87 10513088 2000 2 17017 0 0
88 10513181 2000 2 8972 0 NA
89 10513181 2002 4 21996 0 NA
90 10513181 2003 5 69734 0 0
91 10513268 2000 2 33267 0 NA
96 10513496 1998 0 36494 0 NA
97 10513496 1999 1 44541 0 0
98 10513496 2000 2 46138 0 0
99 10513496 2001 3 45439 0 0
100 10513496 2002 4 50838 0 0
101 10513496 2003 5 56575 0 0
102 10513575 1999 1 1418 0 NA
103 10513575 2000 2 4418 0 0
104 10513575 2002 4 1220 0 NA
105 10513575 2003 5 1984 0 0
106 10513589 1998 0 52034 0 NA
107 10513589 1999 1 166141 1 1
111 10514230 1999 1 0 0 NA
112 10514230 2000 2 313509 1 1
113 10514747 1998 0 0 0 NA
114 10514747 2000 2 0 0 NA
119 10515050 1999 1 47215 0 NA
120 10515050 2000 2 43059 0 0
121 10515050 2001 3 95275 0 0
122 10515050 2002 4 58865 0 0
123 10515050 2003 5 34193 0 0
124 10515357 2000 2 3226 0 NA
125 10515357 2002 4 58926 0 NA
126 10515357 2003 5 44397 0 0
127 10515363 2000 2 62831 0 NA
128 10515363 2001 3 225484 1 1
146 10516071 1998 0 89105 0 NA
147 10516071 1999 1 224349 1 1
152 10516434 2000 2 11405 0 NA
153 10516434 2001 3 6983 0 0
154 10516434 2003 5 8940 0 NA
155 10516517 1999 1 31566 0 NA
156 10516517 2000 2 183391 1 1
157 10516579 1999 1 0 0 NA
158 10516579 2001 3 0 0 NA
159 10516646 1998 0 67462 0 NA
160 10516646 1999 1 63669 0 0
161 10516646 2000 2 194600 1 1
162 10517116 2001 3 1350 0 NA
163 10517116 2002 4 0 0 0
174 10517798 1999 1 38686 0 NA
175 10517798 2000 2 137693 1 1
182 10518008 2000 2 33966 0 NA
188 10518193 1999 1 0 0 NA
189 10518193 2000 2 132529 1 1
193 10518534 1999 1 0 0 NA
194 10518534 2000 2 209403 1 1
195 10518763 2001 3 6281 0 NA
196 10518763 2002 4 35310 0 0
197 10518763 2003 5 34798 0 0
198 10519140 1999 1 12807 0 NA
199 10519382 1998 0 55193 0 NA

Typical Spell Lengths

NOTE - do we want to experiment with imputing observations backwards for sensitivity analysis?

NOTE - if we don’t impute spell lengths backwards, should we set age to first year in dataset?

# count of spells by length

table( table( dat$ein ) )
## 
##    1    2    3    4    5    6 
## 1535 2384 1450 1010  857  508
# proportion that professionalize

sum( dat$prof ) / length(unique(dat$ein))
## [1] 0.3172779

Number that never professionalize - are right-censored because of end of study.

sum( dat$prof == 0 & dat$age == 5 )
## [1] 2815
# 2815 that never professioalize
# 7744 orgs

2815 / 7744
## [1] 0.3635072

Those that drop out before professionalization:

do.prof <- tapply( dat$prof, dat$ein, sum )

these.do.not <- names(do.prof)[ do.prof == 0 ]

dat.not <- dat[ dat$ein %in% these.do.not , ]

nrow( dat.not )
## [1] 15384
table( tapply( dat.not$age, dat.not$ein, max ) )
## 
##    0    1    2    3    4    5 
##  362  495  437  493  685 2815
2472 / 7744 # do not report (fail)
## [1] 0.3192149
2815 / 7744 # right-censored (alive in 2003 but small)
## [1] 0.3635072
2457 / 7744 # professionalize
## [1] 0.3172779
table( tapply( dat.not$age, dat.not$ein, max ) ) /

  table( dat$age )
## 
##         0         1         2         3         4         5 
## 0.1212324 0.1102695 0.1046456 0.1260870 0.2049671 0.9013769

Observation Accounting

table( dat$AGE )
## 
##    1    2    3    4    5    6 
## 2986 4489 4176 3910 3342 3123
table( dat$fisyr )
## 
## 1998 1999 2000 2001 2002 2003 
## 2986 4489 4176 3910 3342 3123
table( dat$age, dat$fisyr ) %>% kable
1998 1999 2000 2001 2002 2003
0 2986 0 0 0 0 0
1 0 4489 0 0 0 0
2 0 0 4176 0 0 0
3 0 0 0 3910 0 0
4 0 0 0 0 3342 0
5 0 0 0 0 0 3123
table( dat$fisyr, dat$prof ) %>% kable
0 1
1998 2986 0
1999 3873 616
2000 3494 682
2001 3411 499
2002 2990 352
2003 2815 308