Skip to content

Instantly share code, notes, and snippets.

@augier
Last active January 31, 2019 15:49
Show Gist options
  • Select an option

  • Save augier/e4f7f4f9898bd98de8c4f3d0d57fc276 to your computer and use it in GitHub Desktop.

Select an option

Save augier/e4f7f4f9898bd98de8c4f3d0d57fc276 to your computer and use it in GitHub Desktop.
Converts world CSV into type of rules file
package main
import (
"encoding/csv"
"errors"
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"strings"
)
func main() {
inputDir := flag.String("inputdir", "", "Directory that contains the files")
outputDir := flag.String("outputdir", "", "Directory to write files to")
flag.Parse()
csvFiles, err := getCSVFiles(*inputDir)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for _, file := range csvFiles {
fmt.Printf("parsing file %s\n", file)
b, err := ioutil.ReadFile(fmt.Sprintf("%s/%s", *inputDir, file))
if err != nil {
fmt.Println(err)
os.Exit(1)
}
sport, ruleType, err := inferSportAndRuleType(file)
if err != nil {
fmt.Println("hi " + err.Error())
continue
}
if ruleType != "Teams" && ruleType != "Competitions" {
continue
}
r := csv.NewReader(strings.NewReader(string(b)))
records, err := r.ReadAll()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Printf("read in %d rows\n", len(records))
header := records[:1][0]
aliasPositions := getAliasHeaderPositions(header)
rest := records[1:]
rules := constructRules(rest, aliasPositions, sport, ruleType)
filePath := fmt.Sprintf("%s/%s - %s.csv", *outputDir, sport, ruleType)
err = writeFile(filePath, rules)
if err != nil {
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
}
}
func getCSVFiles(dir string) ([]string, error) {
fmt.Printf("looking for CSVs in %s\n", dir)
files, err := ioutil.ReadDir(dir)
if err != nil {
return nil, err
}
var csvFiles []string
for _, file := range files {
if strings.Contains(file.Name(), ".csv") {
csvFiles = append(csvFiles, file.Name())
}
}
fmt.Printf("found %d csv files\n", len(csvFiles))
return csvFiles, nil
}
func inferSportAndRuleType(fileName string) (string, string, error) {
r := regexp.MustCompile(`^Fresh8 Data - (.*) (Competitions|Teams|Players|Motor Racing|Venues|Tracks).csv$`)
s := r.FindStringSubmatch(fileName)
if len(s) != 3 {
return "", "", errors.New(fmt.Sprintf("malformed file name: %s", fileName))
}
return s[1], s[2], nil
}
func getAliasHeaderPositions(header []string) []int {
var positions []int
for i, columnTitle := range header {
if strings.Contains(strings.ToLower(columnTitle), "alias") {
positions = append(positions, i)
}
}
return positions
}
func constructRules(records [][]string, aliasPositions []int, sport string, ruleType string) [][]string {
var rules [][]string
rules = append(rules, writeHeader(ruleType))
for _, row := range records {
r := convertRowIntoRules(row, aliasPositions, sport)
rules = append(rules, r...)
}
return rules
}
func writeHeader(ruleType string) []string {
return []string{"raw" + ruleType, "sport", ruleType}
}
func convertRowIntoRules(row []string, aliasPositions []int, sport string) [][]string {
var rules [][]string
// first add the team itself as a rule
rules = append(rules, []string{row[0], sport, row[0]})
// loop through aliases
for _, position := range aliasPositions {
alias := row[position]
// skip if empty
if alias == "" {
continue
}
// there might be multiple aliases in each position so split by comma
aliases := strings.Split(alias, ",")
for _, a := range aliases {
rule := []string{a, sport, row[0]}
rules = append(rules, rule)
}
}
return rules
}
func writeFile(filePath string, rules [][]string) error {
fmt.Printf("writing file %s\n", filePath)
file, err := os.Create(filePath)
if err != nil {
return err
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
return writer.WriteAll(rules)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment