Data API Example



LOAD LIBRARIES

library( RCurl )
library( jsonlite )
library( pander )
library( dplyr )

REQUEST A KEY

http://api.census.gov/data/key_signup.html

APIkey <- "your.census.api.key.goes.here"



ANATOMY OF AN API

fieldnm <- "DP03_0119PE"   # poverty
state <- "36"              # NY
county <- "067"            # Onondaga County
APIkey <-  "your,key"      # http://api.census.gov/data/key_signup.html    



FETCH CENSUS DATA

We will retrieve data for the following three variables from Onondaga County in NY.

  • poverty rate: DP03_0119PE
  • total pop: DP05_0028E
  • pop black: DP05_0033E
fieldnm <- "DP03_0119PE"   # poverty
state <- "36"              # NY
county <- "067"            # Onondaga County
APIkey <- "your.api.key.here"     

  
paste0(  "http://api.census.gov/data/2013/acs5/profile/?get=", fieldnm,
        "&for=tract:*&in=state:", state,
        "+county:", county,
        "&key=", "your.api.key.here"  )

[1] “http://api.census.gov/data/2013/acs5/profile/?get=DP03_0119PE&for=tract:*&in=state:36+county:067&key=your.api.key.here



FETCH THE DATA

The data is returned in a format that looks like this:

Census API return



Poverty Rate

fieldnm <- "DP03_0119PE"   # poverty
state <- "36"              # NY
county <- "067"            # Onondaga County

resURL <-  paste("https://api.census.gov/data/2013/acs/acs5?get=",fieldnm,
               "&for=tract:*&in=state:",state,"+county:",county,"&key=",
               APIkey,sep="")


  
poverty.json <- getURL( resURL, ssl.verifypeer = FALSE )

head( poverty.json )
## [1] "[[\"NAME\",\"B01001_001E\",\"state\"],\n[\"Alabama\",\"4830620\",\"01\"],\n[\"Alaska\",\"733375\",\"02\"],\n[\"Arizona\",\"6641928\",\"04\"],\n[\"Arkansas\",\"2958208\",\"05\"],\n[\"California\",\"38421464\",\"06\"],\n[\"Colorado\",\"5278906\",\"08\"],\n[\"Connecticut\",\"3593222\",\"09\"],\n[\"Delaware\",\"926454\",\"10\"],\n[\"District of Columbia\",\"647484\",\"11\"],\n[\"Florida\",\"19645772\",\"12\"],\n[\"Georgia\",\"10006693\",\"13\"],\n[\"Hawaii\",\"1406299\",\"15\"],\n[\"Idaho\",\"1616547\",\"16\"],\n[\"Illinois\",\"12873761\",\"17\"],\n[\"Indiana\",\"6568645\",\"18\"],\n[\"Iowa\",\"3093526\",\"19\"],\n[\"Kansas\",\"2892987\",\"20\"],\n[\"Kentucky\",\"4397353\",\"21\"],\n[\"Louisiana\",\"4625253\",\"22\"],\n[\"Maine\",\"1329100\",\"23\"],\n[\"Maryland\",\"5930538\",\"24\"],\n[\"Massachusetts\",\"6705586\",\"25\"],\n[\"Michigan\",\"9900571\",\"26\"],\n[\"Minnesota\",\"5419171\",\"27\"],\n[\"Mississippi\",\"2988081\",\"28\"],\n[\"Missouri\",\"6045448\",\"29\"],\n[\"Montana\",\"1014699\",\"30\"],\n[\"Nebraska\",\"1869365\",\"31\"],\n[\"Nevada\",\"2798636\",\"32\"],\n[\"New Hampshire\",\"1324201\",\"33\"],\n[\"New Jersey\",\"8904413\",\"34\"],\n[\"New Mexico\",\"2084117\",\"35\"],\n[\"New York\",\"19673174\",\"36\"],\n[\"North Carolina\",\"9845333\",\"37\"],\n[\"North Dakota\",\"721640\",\"38\"],\n[\"Ohio\",\"11575977\",\"39\"],\n[\"Oklahoma\",\"3849733\",\"40\"],\n[\"Oregon\",\"3939233\",\"41\"],\n[\"Pennsylvania\",\"12779559\",\"42\"],\n[\"Rhode Island\",\"1053661\",\"44\"],\n[\"South Carolina\",\"4777576\",\"45\"],\n[\"South Dakota\",\"843190\",\"46\"],\n[\"Tennessee\",\"6499615\",\"47\"],\n[\"Texas\",\"26538614\",\"48\"],\n[\"Utah\",\"2903379\",\"49\"],\n[\"Vermont\",\"626604\",\"50\"],\n[\"Virginia\",\"8256630\",\"51\"],\n[\"Washington\",\"6985464\",\"53\"],\n[\"West Virginia\",\"1851420\",\"54\"],\n[\"Wisconsin\",\"5742117\",\"55\"],\n[\"Wyoming\",\"579679\",\"56\"],\n[\"Puerto Rico\",\"3583073\",\"72\"]]"



That’s not very helpful! Let’s reformat the json output.

poverty <- json.to.data( poverty.json )

head( poverty ) %>% pander
NAME B01001_001E state
Alabama 4830620 01
Alaska 733375 02
Arizona 6641928 04
Arkansas 2958208 05
California 38421464 06
Colorado 5278906 08







JSON TO DATA FRAME FUNCTION

json.to.data <- function( x )
{
    a.matrix <- fromJSON(x)  # converts json table to a matrix

    c.names <- a.matrix[ 1 , ]  # column names are the first row

    a.matrix <- a.matrix[ -1 , ]

    my.dat <- data.frame( a.matrix, stringsAsFactors=F )

    names( my.dat ) <- c.names
    
    # my.dat[,1] <- as.numeric( as.character( my.dat[,1] ) )

    # > names( my.dat )
    # [1] "DP03_0119PE" "state"       "county"      "tract"
    
    return( my.dat )
}