Skip to content

Instantly share code, notes, and snippets.

@technickle
Last active August 29, 2015 14:05
Show Gist options
  • Select an option

  • Save technickle/2fc673c3a3222a1d93ae to your computer and use it in GitHub Desktop.

Select an option

Save technickle/2fc673c3a3222a1d93ae to your computer and use it in GitHub Desktop.

Revisions

  1. technickle revised this gist Dec 2, 2014. 1 changed file with 49 additions and 35 deletions.
    84 changes: 49 additions & 35 deletions state-ope-security-data.r
    Original file line number Diff line number Diff line change
    @@ -10,7 +10,7 @@
    # geocoding should be in a CSV with at least UNITID_P, x, y, and county;
    # place this file in the same folder with the .sav files
    # 5) modify this script to accommodate new year values (beyond the config)
    # 6) run it
    # 6) run this script: rscript state-ope-security-data.r
    #
    # this script produces almost 200 columns of data;
    # 14 general columns + (5 years x 4 location types x 9 crime types)
    @@ -21,13 +21,14 @@
    # FUTURE: fix SPSS import so it drops text field whitespace and remove workaround
    # FUTURE: hyphenate the zipcode column (source data has 9-digit numbers for ZIP+4s)
    # FUTURE: have the script grab the data directly from the website
    # FUTURE: make the year selections and merges configuration-driven
    # FUTURE: make the entire script driven by configuration parameters
    # ##############################################################################
    # Configuration

    # set this to the folder where you placed your OPE .sav files & geocoded .csv
    # you must have write access to this folder; the output file gets put here too
    setwd("~/CampusCrime/")
    setwd("~/Campus-Crime/")

    # set this to the 2-letter abbreviation for the state you want to compile
    statefilter = "NY"
    @@ -76,10 +77,20 @@ publicpropertytertiary_state = subset(publicpropertytertiary, State == statefilt
    #### build new data
    # create a new dataframe with the common fields from all records
    # we assume that the most recent year is the master list of institutions we want. Any institutions/campuses in previous years that are not in current year are ignored
    crimedata = subset(oncampusprimary_state, select=c(UNITID_P, INSTNM, BRANCH, Address, City, State, ZIP, sector_cd, Sector_desc, men_total, women_total, Total))

    crimedata = subset(oncampusprimary_state, select=c("UNITID_P", "INSTNM", "BRANCH", "Address", "City"))
    # rename column headers in this new dataframe
    colnames(crimedata) <- c("UNITID_P", "institution_name", "branch", "address", "city", "state", "zip", "sector_cd", "sector_desc", "men_total", "women_total", "total")
    colnames(crimedata)[1:5] = c("UNITID_P", "institution_name", "branch", "address", "city")
    print("initial campus columns...")

    # merge county column from campuslocations file
    crimedata = merge(x = crimedata, subset(campuslocations, select=c("UNITID_P","County")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[6] = "county"
    print("merged county column...")

    # merge rest of basic campus data
    crimedata = merge(x = crimedata, subset(oncampusprimary_state, select=c("UNITID_P", "State", "ZIP", "sector_cd", "Sector_desc", "men_total", "women_total", "Total")), by="UNITID_P",all.x=TRUE)
    colnames(crimedata)[7:13] = c("state", "zip", "sector_cd", "sector_desc", "men_total", "women_total", "total")
    print("additional campus columns...")

    # trim specific text columns (SPSS import includes whitespace)
    trim = function (x) gsub("^\\s+|\\s+$", "", x)
    @@ -89,65 +100,68 @@ crimedata$address = trim(crimedata$address)
    crimedata$city = trim(crimedata$city)
    crimedata$sector_desc = trim(crimedata$sector_desc)

    # strip down zip codes to 5 digits (removes trailing spaces and +4 codes)
    crimedata$zip = substr(crimedata$zip,1,5)

    # begin merging (appending) crime data columns from each year (36 columns per year!)
    # current year

    #merge the data for the first year and location type
    # merge the data for the first year by each of the location types
    crimedata = merge(x = crimedata, subset(oncampusprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "NEG_M12", "MURD12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2012
    #rename the column headers for the newly merged year/location - column order MUST match the above subset
    #there is probably a more clever way to do this using substitution/regex
    colnames(crimedata)[13:21] = c("2012_oncampus_aggravated_assault","2012_oncampus_arson","2012_oncampus_burglary", "2012_oncampus_forciblesexualoffense", "2012_oncampus_negligenthomicide", "2012_oncampus_murder", "2012_oncampus_nonforciblesexualoffense", "2012_oncampus_robbery", "2012_oncampus_vehicletheft")
    #rinse and repeat for each year and location type
    # rename the column headers for the newly merged year/location - column order MUST match the above subset
    # there is probably a more clever way to do this using substitution/regex
    colnames(crimedata)[14:22] = c("2012_oncampus_aggravated_assault","2012_oncampus_arson","2012_oncampus_burglary", "2012_oncampus_forciblesexualoffense", "2012_oncampus_negligenthomicide", "2012_oncampus_murder", "2012_oncampus_nonforciblesexualoffense", "2012_oncampus_robbery", "2012_oncampus_vehicletheft")
    # rinse and repeat for each year and location type
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2012
    colnames(crimedata)[22:30] = c("2012_offcampus_aggravated_assault","2012_offcampus_arson","2012_offcampus_burglary", "2012_offcampus_forciblesexualoffense", "2012_offcampus_negligenthomicide", "2012_offcampus_murder", "2012_offcampus_nonforciblesexualoffense", "2012_offcampus_robbery", "2012_offcampus_vehicletheft")
    colnames(crimedata)[23:31] = c("2012_offcampus_aggravated_assault","2012_offcampus_arson","2012_offcampus_burglary", "2012_offcampus_forciblesexualoffense", "2012_offcampus_negligenthomicide", "2012_offcampus_murder", "2012_offcampus_nonforciblesexualoffense", "2012_offcampus_robbery", "2012_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2012
    colnames(crimedata)[31:39] = c("2012_residencehall_aggravated_assault","2012_residencehall_arson","2012_residencehall_burglary", "2012_residencehall_forciblesexualoffense", "2012_residencehall_negligenthomicide", "2012_residencehall_murder", "2012_residencehall_nonforciblesexualoffense", "2012_residencehall_robbery", "2012_residencehall_vehicletheft")
    colnames(crimedata)[32:40] = c("2012_residencehall_aggravated_assault","2012_residencehall_arson","2012_residencehall_burglary", "2012_residencehall_forciblesexualoffense", "2012_residencehall_negligenthomicide", "2012_residencehall_murder", "2012_residencehall_nonforciblesexualoffense", "2012_residencehall_robbery", "2012_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #public-property 2012
    colnames(crimedata)[40:48] = c("2012_publicproperty_aggravated_assault","2012_publicproperty_arson","2012_publicproperty_burglary", "2012_publicproperty_forciblesexualoffense", "2012_publicproperty_negligenthomicide", "2012_publicproperty_murder", "2012_publicproperty_nonforciblesexualoffense", "2012_publicproperty_robbery", "2012_publicproperty_vehicletheft")
    colnames(crimedata)[41:49] = c("2012_publicproperty_aggravated_assault","2012_publicproperty_arson","2012_publicproperty_burglary", "2012_publicproperty_forciblesexualoffense", "2012_publicproperty_negligenthomicide", "2012_publicproperty_murder", "2012_publicproperty_nonforciblesexualoffense", "2012_publicproperty_robbery", "2012_publicproperty_vehicletheft")

    # previous year
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "NEG_M11", "MURD11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2011
    colnames(crimedata)[49:57] = c("2011_oncampus_aggravated_assault","2011_oncampus_arson","2011_oncampus_burglary", "2011_oncampus_forciblesexualoffense", "2011_oncampus_negligenthomicide", "2011_oncampus_murder", "2011_oncampus_nonforciblesexualoffense", "2011_oncampus_robbery", "2011_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(oncampusprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "NEG_M11", "MURD11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2011
    colnames(crimedata)[50:58] = c("2011_oncampus_aggravated_assault","2011_oncampus_arson","2011_oncampus_burglary", "2011_oncampus_forciblesexualoffense", "2011_oncampus_negligenthomicide", "2011_oncampus_murder", "2011_oncampus_nonforciblesexualoffense", "2011_oncampus_robbery", "2011_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2011
    colnames(crimedata)[58:66] = c("2011_offcampus_aggravated_assault","2011_offcampus_arson","2011_offcampus_burglary", "2011_offcampus_forciblesexualoffense", "2011_offcampus_negligenthomicide", "2011_offcampus_murder", "2011_offcampus_nonforciblesexualoffense", "2011_offcampus_robbery", "2011_offcampus_vehicletheft")
    colnames(crimedata)[59:67] = c("2011_offcampus_aggravated_assault","2011_offcampus_arson","2011_offcampus_burglary", "2011_offcampus_forciblesexualoffense", "2011_offcampus_negligenthomicide", "2011_offcampus_murder", "2011_offcampus_nonforciblesexualoffense", "2011_offcampus_robbery", "2011_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2011
    colnames(crimedata)[67:75] = c("2011_residencehall_aggravated_assault","2011_residencehall_arson","2011_residencehall_burglary", "2011_residencehall_forciblesexualoffense", "2011_residencehall_negligenthomicide", "2011_residencehall_murder", "2011_residencehall_nonforciblesexualoffense", "2011_residencehall_robbery", "2011_residencehall_vehicletheft")
    colnames(crimedata)[68:76] = c("2011_residencehall_aggravated_assault","2011_residencehall_arson","2011_residencehall_burglary", "2011_residencehall_forciblesexualoffense", "2011_residencehall_negligenthomicide", "2011_residencehall_murder", "2011_residencehall_nonforciblesexualoffense", "2011_residencehall_robbery", "2011_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #public-property 2011
    colnames(crimedata)[76:84] = c("2011_publicproperty_aggravated_assault","2011_publicproperty_arson","2011_publicproperty_burglary", "2011_publicproperty_forciblesexualoffense", "2011_publicproperty_negligenthomicide", "2011_publicproperty_murder", "2011_publicproperty_nonforciblesexualoffense", "2011_publicproperty_robbery", "2011_publicproperty_vehicletheft")
    colnames(crimedata)[77:85] = c("2011_publicproperty_aggravated_assault","2011_publicproperty_arson","2011_publicproperty_burglary", "2011_publicproperty_forciblesexualoffense", "2011_publicproperty_negligenthomicide", "2011_publicproperty_murder", "2011_publicproperty_nonforciblesexualoffense", "2011_publicproperty_robbery", "2011_publicproperty_vehicletheft")

    # 2nd previous year
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "NEG_M10", "MURD10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2010
    colnames(crimedata)[85:93] = c("2010_oncampus_aggravated_assault","2010_oncampus_arson","2010_oncampus_burglary", "2010_oncampus_forciblesexualoffense", "2010_oncampus_negligenthomicide", "2010_oncampus_murder", "2010_oncampus_nonforciblesexualoffense", "2010_oncampus_robbery", "2010_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(oncampusprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "NEG_M10", "MURD10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2010
    colnames(crimedata)[86:94] = c("2010_oncampus_aggravated_assault","2010_oncampus_arson","2010_oncampus_burglary", "2010_oncampus_forciblesexualoffense", "2010_oncampus_negligenthomicide", "2010_oncampus_murder", "2010_oncampus_nonforciblesexualoffense", "2010_oncampus_robbery", "2010_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2010
    colnames(crimedata)[94:102] = c("2010_offcampus_aggravated_assault","2010_offcampus_arson","2010_offcampus_burglary", "2010_offcampus_forciblesexualoffense", "2010_offcampus_negligenthomicide", "2010_offcampus_murder", "2010_offcampus_nonforciblesexualoffense", "2010_offcampus_robbery", "2010_offcampus_vehicletheft")
    colnames(crimedata)[95:103] = c("2010_offcampus_aggravated_assault","2010_offcampus_arson","2010_offcampus_burglary", "2010_offcampus_forciblesexualoffense", "2010_offcampus_negligenthomicide", "2010_offcampus_murder", "2010_offcampus_nonforciblesexualoffense", "2010_offcampus_robbery", "2010_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2010
    colnames(crimedata)[103:111] = c("2010_residencehall_aggravated_assault","2010_residencehall_arson","2010_residencehall_burglary", "2010_residencehall_forciblesexualoffense", "2010_residencehall_negligenthomicide", "2010_residencehall_murder", "2010_residencehall_nonforciblesexualoffense", "2010_residencehall_robbery", "2010_residencehall_vehicletheft")
    colnames(crimedata)[104:112] = c("2010_residencehall_aggravated_assault","2010_residencehall_arson","2010_residencehall_burglary", "2010_residencehall_forciblesexualoffense", "2010_residencehall_negligenthomicide", "2010_residencehall_murder", "2010_residencehall_nonforciblesexualoffense", "2010_residencehall_robbery", "2010_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #public-property 2010
    colnames(crimedata)[112:120] = c("2010_publicproperty_aggravated_assault","2010_publicproperty_arson","2010_publicproperty_burglary", "2010_publicproperty_forciblesexualoffense", "2010_publicproperty_negligenthomicide", "2010_publicproperty_murder", "2010_publicproperty_nonforciblesexualoffense", "2010_publicproperty_robbery", "2010_publicproperty_vehicletheft")
    colnames(crimedata)[113:121] = c("2010_publicproperty_aggravated_assault","2010_publicproperty_arson","2010_publicproperty_burglary", "2010_publicproperty_forciblesexualoffense", "2010_publicproperty_negligenthomicide", "2010_publicproperty_murder", "2010_publicproperty_nonforciblesexualoffense", "2010_publicproperty_robbery", "2010_publicproperty_vehicletheft")

    # 3rd previous year
    crimedata = merge(x = crimedata, subset(publicpropertysecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "NEG_M9", "MURD9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2009
    colnames(crimedata)[121:129] = c("2009_oncampus_aggravated_assault","2009_oncampus_arson","2009_oncampus_burglary", "2009_oncampus_forciblesexualoffense", "2009_oncampus_negligenthomicide", "2009_oncampus_murder", "2009_oncampus_nonforciblesexualoffense", "2009_oncampus_robbery", "2009_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(oncampussecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "NEG_M9", "MURD9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2009
    colnames(crimedata)[122:130] = c("2009_oncampus_aggravated_assault","2009_oncampus_arson","2009_oncampus_burglary", "2009_oncampus_forciblesexualoffense", "2009_oncampus_negligenthomicide", "2009_oncampus_murder", "2009_oncampus_nonforciblesexualoffense", "2009_oncampus_robbery", "2009_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampussecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2009
    colnames(crimedata)[130:138] = c("2009_offcampus_aggravated_assault","2009_offcampus_arson","2009_offcampus_burglary", "2009_offcampus_forciblesexualoffense", "2009_offcampus_negligenthomicide", "2009_offcampus_murder", "2009_offcampus_nonforciblesexualoffense", "2009_offcampus_robbery", "2009_offcampus_vehicletheft")
    colnames(crimedata)[131:139] = c("2009_offcampus_aggravated_assault","2009_offcampus_arson","2009_offcampus_burglary", "2009_offcampus_forciblesexualoffense", "2009_offcampus_negligenthomicide", "2009_offcampus_murder", "2009_offcampus_nonforciblesexualoffense", "2009_offcampus_robbery", "2009_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallsecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2009
    colnames(crimedata)[139:147] = c("2009_residencehall_aggravated_assault","2009_residencehall_arson","2009_residencehall_burglary", "2009_residencehall_forciblesexualoffense", "2009_residencehall_negligenthomicide", "2009_residencehall_murder", "2009_residencehall_nonforciblesexualoffense", "2009_residencehall_robbery", "2009_residencehall_vehicletheft")
    colnames(crimedata)[140:148] = c("2009_residencehall_aggravated_assault","2009_residencehall_arson","2009_residencehall_burglary", "2009_residencehall_forciblesexualoffense", "2009_residencehall_negligenthomicide", "2009_residencehall_murder", "2009_residencehall_nonforciblesexualoffense", "2009_residencehall_robbery", "2009_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertysecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #public-property 2009
    colnames(crimedata)[148:156] = c("2009_publicproperty_aggravated_assault","2009_publicproperty_arson","2009_publicproperty_burglary", "2009_publicproperty_forciblesexualoffense", "2009_publicproperty_negligenthomicide", "2009_publicproperty_murder", "2009_publicproperty_nonforciblesexualoffense", "2009_publicproperty_robbery", "2009_publicproperty_vehicletheft")
    colnames(crimedata)[149:157] = c("2009_publicproperty_aggravated_assault","2009_publicproperty_arson","2009_publicproperty_burglary", "2009_publicproperty_forciblesexualoffense", "2009_publicproperty_negligenthomicide", "2009_publicproperty_murder", "2009_publicproperty_nonforciblesexualoffense", "2009_publicproperty_robbery", "2009_publicproperty_vehicletheft")

    # 4th previous year
    crimedata = merge(x = crimedata, subset(publicpropertytertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "NEG_M8", "MURD8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2008
    colnames(crimedata)[157:165] = c("2008_oncampus_aggravated_assault","2008_oncampus_arson","2008_oncampus_burglary", "2008_oncampus_forciblesexualoffense", "2008_oncampus_negligenthomicide", "2008_oncampus_murder", "2008_oncampus_nonforciblesexualoffense", "2008_oncampus_robbery", "2008_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(oncampustertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "NEG_M8", "MURD8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2008
    colnames(crimedata)[158:166] = c("2008_oncampus_aggravated_assault","2008_oncampus_arson","2008_oncampus_burglary", "2008_oncampus_forciblesexualoffense", "2008_oncampus_negligenthomicide", "2008_oncampus_murder", "2008_oncampus_nonforciblesexualoffense", "2008_oncampus_robbery", "2008_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampustertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2008
    colnames(crimedata)[166:174] = c("2008_offcampus_aggravated_assault","2008_offcampus_arson","2008_offcampus_burglary", "2008_offcampus_forciblesexualoffense", "2008_offcampus_negligenthomicide", "2008_offcampus_murder", "2008_offcampus_nonforciblesexualoffense", "2008_offcampus_robbery", "2008_offcampus_vehicletheft")
    colnames(crimedata)[167:175] = c("2008_offcampus_aggravated_assault","2008_offcampus_arson","2008_offcampus_burglary", "2008_offcampus_forciblesexualoffense", "2008_offcampus_negligenthomicide", "2008_offcampus_murder", "2008_offcampus_nonforciblesexualoffense", "2008_offcampus_robbery", "2008_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehalltertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2008
    colnames(crimedata)[175:183] = c("2008_residencehall_aggravated_assault","2008_residencehall_arson","2008_residencehall_burglary", "2008_residencehall_forciblesexualoffense", "2008_residencehall_negligenthomicide", "2008_residencehall_murder", "2008_residencehall_nonforciblesexualoffense", "2008_residencehall_robbery", "2008_residencehall_vehicletheft")
    colnames(crimedata)[176:184] = c("2008_residencehall_aggravated_assault","2008_residencehall_arson","2008_residencehall_burglary", "2008_residencehall_forciblesexualoffense", "2008_residencehall_negligenthomicide", "2008_residencehall_murder", "2008_residencehall_nonforciblesexualoffense", "2008_residencehall_robbery", "2008_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertytertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #public-property 2008
    colnames(crimedata)[184:192] = c("2008_publicproperty_aggravated_assault","2008_publicproperty_arson","2008_publicproperty_burglary", "2008_publicproperty_forciblesexualoffense", "2008_publicproperty_negligenthomicide", "2008_publicproperty_murder", "2008_publicproperty_nonforciblesexualoffense", "2008_publicproperty_robbery", "2008_publicproperty_vehicletheft")
    colnames(crimedata)[185:193] = c("2008_publicproperty_aggravated_assault","2008_publicproperty_arson","2008_publicproperty_burglary", "2008_publicproperty_forciblesexualoffense", "2008_publicproperty_negligenthomicide", "2008_publicproperty_murder", "2008_publicproperty_nonforciblesexualoffense", "2008_publicproperty_robbery", "2008_publicproperty_vehicletheft")

    # locations
    crimedata = merge(x = crimedata, subset(campuslocations,select=c("UNITID_P","x","y","county")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[193:195] = c("x","y","county")
    crimedata = merge(x = crimedata, subset(campuslocations,select=c("UNITID_P","x","y")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[194:195] = c("longitude","latitude")

    #### save new data to disk
    # don't include row headers, and write blanks instead of "NA" for missing data values
  2. technickle revised this gist Aug 22, 2014. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions state-ope-security-data.r
    Original file line number Diff line number Diff line change
    @@ -7,7 +7,7 @@
    # Residencehallcrime*.sav, and Publicpropertycrime*.sav
    # out to a folder of your choosing. It should have 12 files when done
    # 4) use the one of the most recent year's .sav files to geocode addresses;
    # geocoding should be in a CSV with at least UNITID_P, x, and y fields;
    # geocoding should be in a CSV with at least UNITID_P, x, y, and county;
    # place this file in the same folder with the .sav files
    # 5) modify this script to accommodate new year values (beyond the config)
    # 6) run it
    @@ -146,8 +146,8 @@ crimedata = merge(x = crimedata, subset(publicpropertytertiary_state,select=c("U
    colnames(crimedata)[184:192] = c("2008_publicproperty_aggravated_assault","2008_publicproperty_arson","2008_publicproperty_burglary", "2008_publicproperty_forciblesexualoffense", "2008_publicproperty_negligenthomicide", "2008_publicproperty_murder", "2008_publicproperty_nonforciblesexualoffense", "2008_publicproperty_robbery", "2008_publicproperty_vehicletheft")

    # locations
    crimedata = merge(x = crimedata, subset(campuslocations,select=c("UNITID_P","x","y")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[193:194] = c("x","y")
    crimedata = merge(x = crimedata, subset(campuslocations,select=c("UNITID_P","x","y","county")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[193:195] = c("x","y","county")

    #### save new data to disk
    # don't include row headers, and write blanks instead of "NA" for missing data values
  3. technickle revised this gist Aug 19, 2014. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions state-ope-security-data.r
    Original file line number Diff line number Diff line change
    @@ -115,8 +115,6 @@ colnames(crimedata)[67:75] = c("2011_residencehall_aggravated_assault","2011_res
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #public-property 2011
    colnames(crimedata)[76:84] = c("2011_publicproperty_aggravated_assault","2011_publicproperty_arson","2011_publicproperty_burglary", "2011_publicproperty_forciblesexualoffense", "2011_publicproperty_negligenthomicide", "2011_publicproperty_murder", "2011_publicproperty_nonforciblesexualoffense", "2011_publicproperty_robbery", "2011_publicproperty_vehicletheft")

    print(nrow(crimedata))

    # 2nd previous year
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "NEG_M10", "MURD10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2010
    colnames(crimedata)[85:93] = c("2010_oncampus_aggravated_assault","2010_oncampus_arson","2010_oncampus_burglary", "2010_oncampus_forciblesexualoffense", "2010_oncampus_negligenthomicide", "2010_oncampus_murder", "2010_oncampus_nonforciblesexualoffense", "2010_oncampus_robbery", "2010_oncampus_vehicletheft")
  4. technickle revised this gist Aug 19, 2014. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions state-ope-security-data.r
    Original file line number Diff line number Diff line change
    @@ -37,6 +37,8 @@ crimedatafilename = "crimedata.csv"

    # ##############################################################################

    require(foreign)

    #### load most last 5 years of data from files
    # most recent 3 years are in the latest file
    oncampusprimary = read.spss("oncampuscrime101112.sav", to.data.frame=TRUE)
  5. technickle created this gist Aug 19, 2014.
    154 changes: 154 additions & 0 deletions state-ope-security-data.r
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,154 @@
    # This R script grabs 5 years of OPE security data for a specified state
    #
    # How to use:
    # 1) Go to http://www.ope.ed.gov/security/GetDownloadFile.aspx
    # 2) download SPSS version of 3 most recent sets (past 5 years of data)
    # 3) from each zip file, copy oncampuscrime*.sav, noncampuscrime*.sav,
    # Residencehallcrime*.sav, and Publicpropertycrime*.sav
    # out to a folder of your choosing. It should have 12 files when done
    # 4) use the one of the most recent year's .sav files to geocode addresses;
    # geocoding should be in a CSV with at least UNITID_P, x, and y fields;
    # place this file in the same folder with the .sav files
    # 5) modify this script to accommodate new year values (beyond the config)
    # 6) run it
    #
    # this script produces almost 200 columns of data;
    # 14 general columns + (5 years x 4 location types x 9 crime types)
    #
    # TODO: Sort output data by institution name and branch
    #
    # FUTURE: fix merges so they don't require "x = crimedata" declaration
    # FUTURE: fix SPSS import so it drops text field whitespace and remove workaround
    # FUTURE: hyphenate the zipcode column (source data has 9-digit numbers for ZIP+4s)
    # FUTURE: have the script grab the data directly from the website
    # FUTURE: make the entire script driven by configuration parameters
    # ##############################################################################
    # Configuration

    # set this to the folder where you placed your OPE .sav files & geocoded .csv
    # you must have write access to this folder; the output file gets put here too
    setwd("~/CampusCrime/")

    # set this to the 2-letter abbreviation for the state you want to compile
    statefilter = "NY"

    # set this to the name of the file you want to output
    crimedatafilename = "crimedata.csv"

    # ##############################################################################

    #### load most last 5 years of data from files
    # most recent 3 years are in the latest file
    oncampusprimary = read.spss("oncampuscrime101112.sav", to.data.frame=TRUE)
    offcampusprimary = read.spss("noncampuscrime101112.sav", to.data.frame=TRUE)
    residencehallprimary = read.spss("Residencehallcrime101112.sav", to.data.frame=TRUE)
    publicpropertyprimary = read.spss("Publicpropertycrime101112.sav", to.data.frame=TRUE)

    # 2 oldest years are in the second-latest and third-latest set of files, respectively
    oncampussecondary = read.spss("oncampuscrime091011.sav", to.data.frame=TRUE)
    offcampussecondary = read.spss("noncampuscrime091011.sav", to.data.frame=TRUE)
    residencehallsecondary = read.spss("Residencehallcrime091011.sav", to.data.frame=TRUE)
    publicpropertysecondary = read.spss("Publicpropertycrime091011.sav", to.data.frame=TRUE)
    oncampustertiary = read.spss("oncampuscrime080910.sav", to.data.frame=TRUE)
    offcampustertiary = read.spss("noncampuscrime080910.sav", to.data.frame=TRUE)
    residencehalltertiary = read.spss("Residencehallcrime080910.sav", to.data.frame=TRUE)
    publicpropertytertiary = read.spss("Publicpropertycrime080910.sav", to.data.frame=TRUE)

    #### load campus locations
    campuslocations = read.csv("campuslocations.csv")

    #### filter all years for just NYS data
    oncampusprimary_state = subset(oncampusprimary, State == statefilter)
    offcampusprimary_state = subset(offcampusprimary, State == statefilter)
    residencehallprimary_state = subset(residencehallprimary, State == statefilter)
    publicpropertyprimary_state = subset(publicpropertyprimary, State == statefilter)
    oncampussecondary_state = subset(oncampussecondary, State == statefilter)
    offcampussecondary_state = subset(offcampussecondary, State == statefilter)
    residencehallsecondary_state = subset(residencehallsecondary, State == statefilter)
    publicpropertysecondary_state = subset(publicpropertysecondary, State == statefilter)
    oncampustertiary_state = subset(oncampustertiary, State == statefilter)
    offcampustertiary_state = subset(offcampustertiary, State == statefilter)
    residencehalltertiary_state = subset(residencehalltertiary, State == statefilter)
    publicpropertytertiary_state = subset(publicpropertytertiary, State == statefilter)

    #### build new data
    # create a new dataframe with the common fields from all records
    # we assume that the most recent year is the master list of institutions we want. Any institutions/campuses in previous years that are not in current year are ignored
    crimedata = subset(oncampusprimary_state, select=c(UNITID_P, INSTNM, BRANCH, Address, City, State, ZIP, sector_cd, Sector_desc, men_total, women_total, Total))

    # rename column headers in this new dataframe
    colnames(crimedata) <- c("UNITID_P", "institution_name", "branch", "address", "city", "state", "zip", "sector_cd", "sector_desc", "men_total", "women_total", "total")

    # trim specific text columns (SPSS import includes whitespace)
    trim = function (x) gsub("^\\s+|\\s+$", "", x)
    crimedata$institution_name = trim(crimedata$institution_name)
    crimedata$branch = trim(crimedata$branch)
    crimedata$address = trim(crimedata$address)
    crimedata$city = trim(crimedata$city)
    crimedata$sector_desc = trim(crimedata$sector_desc)

    # begin merging (appending) crime data columns from each year (36 columns per year!)
    # current year

    #merge the data for the first year and location type
    crimedata = merge(x = crimedata, subset(oncampusprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "NEG_M12", "MURD12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2012
    #rename the column headers for the newly merged year/location - column order MUST match the above subset
    #there is probably a more clever way to do this using substitution/regex
    colnames(crimedata)[13:21] = c("2012_oncampus_aggravated_assault","2012_oncampus_arson","2012_oncampus_burglary", "2012_oncampus_forciblesexualoffense", "2012_oncampus_negligenthomicide", "2012_oncampus_murder", "2012_oncampus_nonforciblesexualoffense", "2012_oncampus_robbery", "2012_oncampus_vehicletheft")
    #rinse and repeat for each year and location type
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2012
    colnames(crimedata)[22:30] = c("2012_offcampus_aggravated_assault","2012_offcampus_arson","2012_offcampus_burglary", "2012_offcampus_forciblesexualoffense", "2012_offcampus_negligenthomicide", "2012_offcampus_murder", "2012_offcampus_nonforciblesexualoffense", "2012_offcampus_robbery", "2012_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2012
    colnames(crimedata)[31:39] = c("2012_residencehall_aggravated_assault","2012_residencehall_arson","2012_residencehall_burglary", "2012_residencehall_forciblesexualoffense", "2012_residencehall_negligenthomicide", "2012_residencehall_murder", "2012_residencehall_nonforciblesexualoffense", "2012_residencehall_robbery", "2012_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A12", "ARSON12", "BURGLA12", "FORCIB12", "MURD12", "NEG_M12", "NONFOR12", "ROBBE12", "VEHIC12")), by=c("UNITID_P"), all.x=TRUE) #public-property 2012
    colnames(crimedata)[40:48] = c("2012_publicproperty_aggravated_assault","2012_publicproperty_arson","2012_publicproperty_burglary", "2012_publicproperty_forciblesexualoffense", "2012_publicproperty_negligenthomicide", "2012_publicproperty_murder", "2012_publicproperty_nonforciblesexualoffense", "2012_publicproperty_robbery", "2012_publicproperty_vehicletheft")

    # previous year
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "NEG_M11", "MURD11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2011
    colnames(crimedata)[49:57] = c("2011_oncampus_aggravated_assault","2011_oncampus_arson","2011_oncampus_burglary", "2011_oncampus_forciblesexualoffense", "2011_oncampus_negligenthomicide", "2011_oncampus_murder", "2011_oncampus_nonforciblesexualoffense", "2011_oncampus_robbery", "2011_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2011
    colnames(crimedata)[58:66] = c("2011_offcampus_aggravated_assault","2011_offcampus_arson","2011_offcampus_burglary", "2011_offcampus_forciblesexualoffense", "2011_offcampus_negligenthomicide", "2011_offcampus_murder", "2011_offcampus_nonforciblesexualoffense", "2011_offcampus_robbery", "2011_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2011
    colnames(crimedata)[67:75] = c("2011_residencehall_aggravated_assault","2011_residencehall_arson","2011_residencehall_burglary", "2011_residencehall_forciblesexualoffense", "2011_residencehall_negligenthomicide", "2011_residencehall_murder", "2011_residencehall_nonforciblesexualoffense", "2011_residencehall_robbery", "2011_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A11", "ARSON11", "BURGLA11", "FORCIB11", "MURD11", "NEG_M11", "NONFOR11", "ROBBE11", "VEHIC11")), by=c("UNITID_P"), all.x=TRUE) #public-property 2011
    colnames(crimedata)[76:84] = c("2011_publicproperty_aggravated_assault","2011_publicproperty_arson","2011_publicproperty_burglary", "2011_publicproperty_forciblesexualoffense", "2011_publicproperty_negligenthomicide", "2011_publicproperty_murder", "2011_publicproperty_nonforciblesexualoffense", "2011_publicproperty_robbery", "2011_publicproperty_vehicletheft")

    print(nrow(crimedata))

    # 2nd previous year
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "NEG_M10", "MURD10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2010
    colnames(crimedata)[85:93] = c("2010_oncampus_aggravated_assault","2010_oncampus_arson","2010_oncampus_burglary", "2010_oncampus_forciblesexualoffense", "2010_oncampus_negligenthomicide", "2010_oncampus_murder", "2010_oncampus_nonforciblesexualoffense", "2010_oncampus_robbery", "2010_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampusprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2010
    colnames(crimedata)[94:102] = c("2010_offcampus_aggravated_assault","2010_offcampus_arson","2010_offcampus_burglary", "2010_offcampus_forciblesexualoffense", "2010_offcampus_negligenthomicide", "2010_offcampus_murder", "2010_offcampus_nonforciblesexualoffense", "2010_offcampus_robbery", "2010_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2010
    colnames(crimedata)[103:111] = c("2010_residencehall_aggravated_assault","2010_residencehall_arson","2010_residencehall_burglary", "2010_residencehall_forciblesexualoffense", "2010_residencehall_negligenthomicide", "2010_residencehall_murder", "2010_residencehall_nonforciblesexualoffense", "2010_residencehall_robbery", "2010_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertyprimary_state,select=c("UNITID_P", "AGG_A10", "ARSON10", "BURGLA10", "FORCIB10", "MURD10", "NEG_M10", "NONFOR10", "ROBBE10", "VEHIC10")), by=c("UNITID_P"), all.x=TRUE) #public-property 2010
    colnames(crimedata)[112:120] = c("2010_publicproperty_aggravated_assault","2010_publicproperty_arson","2010_publicproperty_burglary", "2010_publicproperty_forciblesexualoffense", "2010_publicproperty_negligenthomicide", "2010_publicproperty_murder", "2010_publicproperty_nonforciblesexualoffense", "2010_publicproperty_robbery", "2010_publicproperty_vehicletheft")

    # 3rd previous year
    crimedata = merge(x = crimedata, subset(publicpropertysecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "NEG_M9", "MURD9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2009
    colnames(crimedata)[121:129] = c("2009_oncampus_aggravated_assault","2009_oncampus_arson","2009_oncampus_burglary", "2009_oncampus_forciblesexualoffense", "2009_oncampus_negligenthomicide", "2009_oncampus_murder", "2009_oncampus_nonforciblesexualoffense", "2009_oncampus_robbery", "2009_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampussecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2009
    colnames(crimedata)[130:138] = c("2009_offcampus_aggravated_assault","2009_offcampus_arson","2009_offcampus_burglary", "2009_offcampus_forciblesexualoffense", "2009_offcampus_negligenthomicide", "2009_offcampus_murder", "2009_offcampus_nonforciblesexualoffense", "2009_offcampus_robbery", "2009_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehallsecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2009
    colnames(crimedata)[139:147] = c("2009_residencehall_aggravated_assault","2009_residencehall_arson","2009_residencehall_burglary", "2009_residencehall_forciblesexualoffense", "2009_residencehall_negligenthomicide", "2009_residencehall_murder", "2009_residencehall_nonforciblesexualoffense", "2009_residencehall_robbery", "2009_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertysecondary_state,select=c("UNITID_P", "AGG_A9", "ARSON9", "BURGLA9", "FORCIB9", "MURD9", "NEG_M9", "NONFOR9", "ROBBE9", "VEHIC9")), by=c("UNITID_P"), all.x=TRUE) #public-property 2009
    colnames(crimedata)[148:156] = c("2009_publicproperty_aggravated_assault","2009_publicproperty_arson","2009_publicproperty_burglary", "2009_publicproperty_forciblesexualoffense", "2009_publicproperty_negligenthomicide", "2009_publicproperty_murder", "2009_publicproperty_nonforciblesexualoffense", "2009_publicproperty_robbery", "2009_publicproperty_vehicletheft")

    # 4th previous year
    crimedata = merge(x = crimedata, subset(publicpropertytertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "NEG_M8", "MURD8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #on-campus 2008
    colnames(crimedata)[157:165] = c("2008_oncampus_aggravated_assault","2008_oncampus_arson","2008_oncampus_burglary", "2008_oncampus_forciblesexualoffense", "2008_oncampus_negligenthomicide", "2008_oncampus_murder", "2008_oncampus_nonforciblesexualoffense", "2008_oncampus_robbery", "2008_oncampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(offcampustertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #off-campus 2008
    colnames(crimedata)[166:174] = c("2008_offcampus_aggravated_assault","2008_offcampus_arson","2008_offcampus_burglary", "2008_offcampus_forciblesexualoffense", "2008_offcampus_negligenthomicide", "2008_offcampus_murder", "2008_offcampus_nonforciblesexualoffense", "2008_offcampus_robbery", "2008_offcampus_vehicletheft")
    crimedata = merge(x = crimedata, subset(residencehalltertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #residence-hall 2008
    colnames(crimedata)[175:183] = c("2008_residencehall_aggravated_assault","2008_residencehall_arson","2008_residencehall_burglary", "2008_residencehall_forciblesexualoffense", "2008_residencehall_negligenthomicide", "2008_residencehall_murder", "2008_residencehall_nonforciblesexualoffense", "2008_residencehall_robbery", "2008_residencehall_vehicletheft")
    crimedata = merge(x = crimedata, subset(publicpropertytertiary_state,select=c("UNITID_P", "AGG_A8", "ARSON8", "BURGLA8", "FORCIB8", "MURD8", "NEG_M8", "NONFOR8", "ROBBE8", "VEHIC8")), by=c("UNITID_P"), all.x=TRUE) #public-property 2008
    colnames(crimedata)[184:192] = c("2008_publicproperty_aggravated_assault","2008_publicproperty_arson","2008_publicproperty_burglary", "2008_publicproperty_forciblesexualoffense", "2008_publicproperty_negligenthomicide", "2008_publicproperty_murder", "2008_publicproperty_nonforciblesexualoffense", "2008_publicproperty_robbery", "2008_publicproperty_vehicletheft")

    # locations
    crimedata = merge(x = crimedata, subset(campuslocations,select=c("UNITID_P","x","y")), by="UNITID_P", all.x=TRUE)
    colnames(crimedata)[193:194] = c("x","y")

    #### save new data to disk
    # don't include row headers, and write blanks instead of "NA" for missing data values
    write.csv(crimedata, crimedatafilename, row.names=FALSE, na="")