Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Crunching data with go: Tips, tricks, use-cases

1,030 views

Published on

Talk for the first meetup of Munich Golang User Group. Described use-cases from real Go development, covered fetching data from sql database, connecting to Google services like Google Analytics, Google BigQuery, other aspect of building a geolocation application.

Published in: Technology, News & Politics
  • Be the first to comment

  • Be the first to like this

Crunching data with go: Tips, tricks, use-cases

  1. 1. ! Crunching data with go: Tips, tricks, use-cases S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  2. 2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation ! Useful things from Go-world W H A T I T ’ S A B O U T
  3. 3. Relational databases
  4. 4. • github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int! ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  5. 5. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! ! config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! !! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! !!
  6. 6. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string) + " 23:59:59"}! ! geoParams := paramStruct{}! !! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! !! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  7. 7. ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  8. 8. Geolocation W H E R E M Y I P S A R E F R O M
  9. 9. ! task := make(chan Clickout)! ! result := make(chan IpResult)! ! done = make(chan interface{})! !! go processChannel(task, result)! ! go aggregateResults(result, &results)! !! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  10. 10. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  11. 11. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results := *rs! ! found, notFound := 0, 0! !! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %sn", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! !! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! !! close(done)! }! !
  12. 12. package main! !import (! ! "fmt"! ! "runtime"! )! !func main() {! !! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !}!
  13. 13. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! !! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)! ! fmt.Printf("Time zone: %vn", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude, record.Location.Longitude)! !! db.Close()
  14. 14. Google Analytics and BigQuery
  15. 15. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,! ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  16. 16. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! ! if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! !! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  17. 17. ! data, err := dataGaGetCall.Do()! ! if err != nil {! ! ! log.Fatal("Failed fetch data from GA")! ! }! !! return data.Rows
  18. 18. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions": "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! !! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %vn", row, rows[row])! ! }! }
  19. 19. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! ! ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  20. 20. ! service, err := bigquery.New(client)! ! if err != nil {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  21. 21. Useful and interesting Gophers
  22. 22. Interesting Gophers • Golang machine learning lib 
 https://github.com/xlvector/hector • Logistic Regression • Factorized Machine • CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree • Neural Network
  23. 23. Interesting Gophers • library for numeric operation
 https://github.com/gonum - fairly, but they are working to bring some useful packages • matrix - Scientific math package for the Go language. • graph - Discrete math structures and functions
  24. 24. Reference list • Why are ‘Cool Kids’ at Github Moving to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ • How suitable Go will be for scientific computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  25. 25. Thank you! M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  26. 26. M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M
  27. 27. DAHO.AM — Developer Conference 06-06-14 S A F E T H E D A T E

×