Last active
January 31, 2019 15:49
-
-
Save augier/e4f7f4f9898bd98de8c4f3d0d57fc276 to your computer and use it in GitHub Desktop.
Converts world CSV into type of rules file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "encoding/csv" | |
| "errors" | |
| "flag" | |
| "fmt" | |
| "io/ioutil" | |
| "os" | |
| "regexp" | |
| "strings" | |
| ) | |
| func main() { | |
| inputDir := flag.String("inputdir", "", "Directory that contains the files") | |
| outputDir := flag.String("outputdir", "", "Directory to write files to") | |
| flag.Parse() | |
| csvFiles, err := getCSVFiles(*inputDir) | |
| if err != nil { | |
| fmt.Println(err) | |
| os.Exit(1) | |
| } | |
| for _, file := range csvFiles { | |
| fmt.Printf("parsing file %s\n", file) | |
| b, err := ioutil.ReadFile(fmt.Sprintf("%s/%s", *inputDir, file)) | |
| if err != nil { | |
| fmt.Println(err) | |
| os.Exit(1) | |
| } | |
| sport, ruleType, err := inferSportAndRuleType(file) | |
| if err != nil { | |
| fmt.Println("hi " + err.Error()) | |
| continue | |
| } | |
| if ruleType != "Teams" && ruleType != "Competitions" { | |
| continue | |
| } | |
| r := csv.NewReader(strings.NewReader(string(b))) | |
| records, err := r.ReadAll() | |
| if err != nil { | |
| fmt.Println(err) | |
| os.Exit(1) | |
| } | |
| fmt.Printf("read in %d rows\n", len(records)) | |
| header := records[:1][0] | |
| aliasPositions := getAliasHeaderPositions(header) | |
| rest := records[1:] | |
| rules := constructRules(rest, aliasPositions, sport, ruleType) | |
| filePath := fmt.Sprintf("%s/%s - %s.csv", *outputDir, sport, ruleType) | |
| err = writeFile(filePath, rules) | |
| if err != nil { | |
| if err != nil { | |
| fmt.Println(err) | |
| os.Exit(1) | |
| } | |
| } | |
| } | |
| } | |
| func getCSVFiles(dir string) ([]string, error) { | |
| fmt.Printf("looking for CSVs in %s\n", dir) | |
| files, err := ioutil.ReadDir(dir) | |
| if err != nil { | |
| return nil, err | |
| } | |
| var csvFiles []string | |
| for _, file := range files { | |
| if strings.Contains(file.Name(), ".csv") { | |
| csvFiles = append(csvFiles, file.Name()) | |
| } | |
| } | |
| fmt.Printf("found %d csv files\n", len(csvFiles)) | |
| return csvFiles, nil | |
| } | |
| func inferSportAndRuleType(fileName string) (string, string, error) { | |
| r := regexp.MustCompile(`^Fresh8 Data - (.*) (Competitions|Teams|Players|Motor Racing|Venues|Tracks).csv$`) | |
| s := r.FindStringSubmatch(fileName) | |
| if len(s) != 3 { | |
| return "", "", errors.New(fmt.Sprintf("malformed file name: %s", fileName)) | |
| } | |
| return s[1], s[2], nil | |
| } | |
| func getAliasHeaderPositions(header []string) []int { | |
| var positions []int | |
| for i, columnTitle := range header { | |
| if strings.Contains(strings.ToLower(columnTitle), "alias") { | |
| positions = append(positions, i) | |
| } | |
| } | |
| return positions | |
| } | |
| func constructRules(records [][]string, aliasPositions []int, sport string, ruleType string) [][]string { | |
| var rules [][]string | |
| rules = append(rules, writeHeader(ruleType)) | |
| for _, row := range records { | |
| r := convertRowIntoRules(row, aliasPositions, sport) | |
| rules = append(rules, r...) | |
| } | |
| return rules | |
| } | |
| func writeHeader(ruleType string) []string { | |
| return []string{"raw" + ruleType, "sport", ruleType} | |
| } | |
| func convertRowIntoRules(row []string, aliasPositions []int, sport string) [][]string { | |
| var rules [][]string | |
| // first add the team itself as a rule | |
| rules = append(rules, []string{row[0], sport, row[0]}) | |
| // loop through aliases | |
| for _, position := range aliasPositions { | |
| alias := row[position] | |
| // skip if empty | |
| if alias == "" { | |
| continue | |
| } | |
| // there might be multiple aliases in each position so split by comma | |
| aliases := strings.Split(alias, ",") | |
| for _, a := range aliases { | |
| rule := []string{a, sport, row[0]} | |
| rules = append(rules, rule) | |
| } | |
| } | |
| return rules | |
| } | |
| func writeFile(filePath string, rules [][]string) error { | |
| fmt.Printf("writing file %s\n", filePath) | |
| file, err := os.Create(filePath) | |
| if err != nil { | |
| return err | |
| } | |
| defer file.Close() | |
| writer := csv.NewWriter(file) | |
| defer writer.Flush() | |
| return writer.WriteAll(rules) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment