package main import ( "database/sql" "errors" "fmt" _ "github.com/go-sql-driver/mysql" "log" "time" ) func cleanData(conf Config, db_user, db_pass, db_conn, db_name string) (err error) { db, err := sql.Open("mysql", db_user+":"+db_pass+"@"+db_conn+"/"+db_name) if err != nil { log.Println("Failed to connect to db") return } defer db.Close() //Remove the processed mark on entries older than 6 hours err = reprocess(db, time.Now().Add(-1*time.Hour)) if err != nil { return } interval, err := conf.getInterval() if err != nil { return err } //Fetch data that should be cleaned rDat, err := fetchRawData(db, time.Now().Add(-2*interval), conf.Limit) if err != nil { log.Println("Faild to fetch raw data") return } cDat, err := clean(rDat, conf) if err != nil { log.Println("Failed to clean data") return } //Begin transaction tx, err := db.Begin() if err != nil { log.Println("Failed to initialize transaction") return } //save cleaned data for ix := range cDat { err = insertCleanData(tx, &cDat[ix]) if err != nil { tx.Rollback() log.Println("Failed to save cleaned data") return } } //remove old data err = purgeRawData(tx, rDat) if err != nil { tx.Rollback() log.Println("Failed to remove old data") return } tx.Commit() return } func getTimespan(t time.Time, conf Config) (span time.Time, err error) { loc, err := time.LoadLocation(TIMEZONE) if err != nil { return } switch { case conf.Interval == "5min": //Round the date into 5 minutes y, m, d := t.Date() h := t.Hour() min := t.Minute() min = (min / 5) * 5 span = time.Date(y, m, d, h, min, 0, 0, loc) case conf.Interval == "10min": //Round the date into 10 minutes y, m, d := t.Date() h := t.Hour() min := t.Minute() min = (min / 10) * 10 span = time.Date(y, m, d, h, min, 0, 0, loc) case conf.Interval == "hour": //Round the date into hour y, m, d := t.Date() h := t.Hour() span = time.Date(y, m, d, h, 0, 0, 0, loc) case conf.Interval == "day": //Round the date into day y, m, d := t.Date() span = time.Date(y, m, d, 0, 0, 0, 0, loc) default: err = errors.New(fmt.Sprintf("Bad interval in config %s", conf.Interval)) return } return } func clean(rDat []rawData, conf Config) (cDat []cleanedData, err error) { // collect all ips so we can query for their ip blocks ips := make(map[string]struct{}) for _, rd := range rDat { ips[rd.ipSrc] = struct{}{} ips[rd.ipDst] = struct{}{} } var iplist []string for ip := range ips { iplist = append(iplist, ip) } pairs, err := findIPBlock(iplist...) if err != nil { return } for _, rd := range rDat { var tim time.Time tim, err = getTimespan(rd.time, conf) if err != nil { return } cDat = append(cDat, cleanedData{ ipbSrc: pairs[rd.ipSrc], ipbDst: pairs[rd.ipDst], asSrc: rd.asSrc, asDst: rd.asDst, portSrc: rd.portSrc, portDst: rd.portDst, occurences: 1, volume: rd.pktLenDist, time: tim, }) } cDat = removeDups(cDat) return } func removeDups(cDat []cleanedData) []cleanedData { ret := make([]cleanedData, 0) var found bool for ci := range cDat { found = false //Check if an equal struct already is appended for ri := range ret { if ret[ri].equals(&cDat[ci]) { //If found, increase it occurences instead of //appending a new struct ret[ri].occurences += cDat[ci].occurences found = true break } } if !found { //if no equal struct is found //append it ret = append(ret, cDat[ci]) } } return ret }