package main import ( "database/sql" "errors" "fmt" _ "github.com/go-sql-driver/mysql" "log" "time" ) const ( DATABASE_USER = "cleaner" DATABASE_PASS = "nil" DATABASE_CONNECTION = "" //e.g. "tcp(localhost:55555) DATABASE_NAME = "netflow" ) func cleanData(conf Config) (err error) { db, err := sql.Open("mysql", DATABASE_USER+":"+DATABASE_PASS+"@/"+DATABASE_NAME) if err != nil { log.Println("Failed to connect to db") return } defer db.Close() //Remove the processed mark on entries older than 6 hours err = reprocess(db, time.Now().Add(-6*time.Hour)) if err != nil { return } var interval time.Duration switch conf.Interval { case "5min": interval = time.Minute * 5 case "10min": interval = time.Minute * 10 case "hour": interval = time.Hour case "day": interval = time.Hour * 24 default: err = errors.New(fmt.Sprintf("Invalid interval: %s", conf.Interval)) return } //Fetch data that should be cleaned rDat, err := fetchRawData(db, time.Now().Add(-2*interval)) if err != nil { log.Println("Faild to fetch raw data") return } cDat, err := clean(rDat, conf) if err != nil { log.Println("Failed to clean data") return } //Add noise for differential privacy for i := range cDat { cDat[i].occurances = diffpriv(cDat[i].occurances, 1, conf.Epsilon) } //Begin transaction tx, err := db.Begin() if err != nil { log.Println("Failed to initialize transaction") return } //save cleaned data for _, cd := range cDat { err = insertCleanData(tx, cd.ipbSrc, cd.ipbDst, cd.volume, cd.time, cd.port, cd.occurances) if err != nil { tx.Rollback() log.Println("Failed to save cleaned data") return } } //remove old data err = purgeRawData(tx, rDat) if err != nil { tx.Rollback() log.Println("Failed to remove old data") return } tx.Commit() return } func getTimespan(t time.Time, conf Config) (span time.Time, err error) { loc, err := time.LoadLocation(TIMEZONE) if err != nil { return } switch { case conf.Interval == "5min": //Round the date into 5 minutes y, m, d := t.Date() h := t.Hour() min := t.Minute() min = (min / 5) * 5 span = time.Date(y, m, d, h, min, 0, 0, loc) case conf.Interval == "10min": //Round the date into 10 minutes y, m, d := t.Date() h := t.Hour() min := t.Minute() min = (min / 10) * 10 span = time.Date(y, m, d, h, min, 0, 0, loc) case conf.Interval == "hour": //Round the date into hour y, m, d := t.Date() h := t.Hour() span = time.Date(y, m, d, h, 0, 0, 0, loc) case conf.Interval == "day": //Round the date into day y, m, d := t.Date() span = time.Date(y, m, d, 0, 0, 0, 0, loc) default: err = errors.New(fmt.Sprintf("Bad interval in config %s", conf.Interval)) return } return } func clean(rDat []RawData, conf Config) (cDat []CleanData, err error) { // collect all ips so we can query for their ip blocks ips := make(map[string]*asnipPair) for _, rd := range rDat { ips[rd.ipSrc] = nil ips[rd.ipDst] = nil } var iplist []string for ip := range ips { iplist = append(iplist, ip) } pairs, err := findASAndIPBlock(iplist...) if err != nil { return } for ix, p := range pairs { ips[p.ipAdr] = &pairs[ix] } var vol string for _, rd := range rDat { vol, err = rd.getVolSize(conf) if err != nil { return } var tim time.Time tim, err = getTimespan(rd.time, conf) if err != nil { return } cDat = append(cDat, CleanData{ ipbSrc: ips[rd.ipSrc].ipBlock, ipbDst: ips[rd.ipDst].ipBlock, time: tim, port: rd.port, volume: vol, occurances: 1, }) } cDat = removeDups(cDat) return } func removeDups(cDat []CleanData) []CleanData { ret := make([]CleanData, 0) var found bool for ci := range cDat { found = false //Check if an equal struct already is appended for ri := range ret { if ret[ri].equals(&cDat[ci]) { //If found, increase it occurances instead of //appending a new struct ret[ri].occurances += cDat[ci].occurances found = true break } } if !found { //if no equal struct is found //append it ret = append(ret, cDat[ci]) } } return ret }