Skip to content

Instantly share code, notes, and snippets.

@chenjiayuan
Last active September 2, 2015 04:45
Show Gist options
  • Select an option

  • Save chenjiayuan/dfda2b69884509fc126d to your computer and use it in GitHub Desktop.

Select an option

Save chenjiayuan/dfda2b69884509fc126d to your computer and use it in GitHub Desktop.
small fix...
PassengerId = testData[1]
testData = testData[-c(1, 8:11)]
testData$Sex = gsub("female", 1, testData$Sex)
testData$Sex = gsub("^male", 0, testData$Sex)
test_master_vector = grep("Master.",testData$Name, fixed=TRUE)
test_miss_vector = grep("Miss.", testData$Name, fixed=TRUE)
test_mrs_vector = grep("Mrs.", testData$Name, fixed=TRUE)
test_mr_vector = grep("Mr.", testData$Name, fixed=TRUE)
test_dr_vector = grep("Dr.", testData$Name, fixed=TRUE)
for(i in test_master_vector) {
testData[i, 2] = "Master"
}
for(i in test_miss_vector) {
testData[i, 2] = "Miss"
}
for(i in test_mrs_vector) {
testData[i, 2] = "Mrs"
}
for(i in test_mr_vector) {
testData[i, 2] = "Mr"
}
for(i in test_dr_vector) {
testData[i, 2] = "Dr"
}
test_master_age = round(mean(testData$Age[testData$Name == "Master"], na.rm = TRUE), digits = 2)
test_miss_age = round(mean(testData$Age[testData$Name == "Miss"], na.rm = TRUE), digits =2)
test_mrs_age = round(mean(testData$Age[testData$Name == "Mrs"], na.rm = TRUE), digits = 2)
test_mr_age = round(mean(testData$Age[testData$Name == "Mr"], na.rm = TRUE), digits = 2)
test_dr_age = round(mean(testData$Age[testData$Name == "Dr"], na.rm = TRUE), digits = 2)
for (i in 1:nrow(testData)) {
if (is.na(testData[i,4])) {
if (testData[i, 2] == "Master") {
testData[i, 4] = test_master_age
} else if (testData[i, 2] == "Miss") {
testData[i, 4] = test_miss_age
} else if (testData[i, 2] == "Mrs") {
testData[i, 4] = test_mrs_age
} else if (testData[i, 2] == "Mr") {
testData[i, 4] = test_mr_age
} else if (testData[i, 2] == "Dr") {
testData[i, 4] = test_dr_age
} else {
print(paste("Uncaught title at: ", i, sep=""))
print(paste("The title unrecognized was: ", testData[i,2], sep=""))
}
}
}
#We do a manual replacement here, because we weren't able to programmatically figure out the title.
#We figured out it was 89 because the above print statement should have warned us.
testData[89, 4] = test_miss_age
testData["Child"] = NA
for (i in 1:nrow(testData)) {
if (testData[i, 4] <= 12) {
testData[i, 7] = 1
} else {
testData[i, 7] = 2
}
}
testData["Family"] = NA
for(i in 1:nrow(testData)) {
testData[i, 8] = testData[i, 5] + testData[i, 6] + 1
}
testData["Mother"] = NA
for(i in 1:nrow(testData)) {
if(testData[i, 2] == "Mrs" & testData[i, 6] > 0) {
testData[i, 9] = 1
} else {
testData[i, 9] = 2
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment