Crunching data with go: Tips, tricks, use-cases

945 views

Published on

Talk for the first meetup of Munich Golang User Group. Described use-cases from real Go development, covered fetching data from sql database, connecting to Google services like Google Analytics, Google BigQuery, other aspect of building a geolocation application.

Published in: Technology, News & Politics
0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total views
945
On SlideShare
0
From Embeds
0
Number of Embeds
19
Actions
Shares
0
Downloads
17
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

Crunching data with go: Tips, tricks, use-cases

  1. 1. ! Crunching data with go: Tips, tricks, use-cases S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  2. 2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation ! Useful things from Go-world W H A T I T ’ S A B O U T
  3. 3. Relational databases
  4. 4. • github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int! ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  5. 5. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! ! config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! !! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! !!
  6. 6. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string) + " 23:59:59"}! ! geoParams := paramStruct{}! !! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! !! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  7. 7. ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  8. 8. Geolocation W H E R E M Y I P S A R E F R O M
  9. 9. ! task := make(chan Clickout)! ! result := make(chan IpResult)! ! done = make(chan interface{})! !! go processChannel(task, result)! ! go aggregateResults(result, &results)! !! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  10. 10. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  11. 11. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results := *rs! ! found, notFound := 0, 0! !! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %sn", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! !! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! !! close(done)! }! !
  12. 12. package main! !import (! ! "fmt"! ! "runtime"! )! !func main() {! !! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !}!
  13. 13. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! !! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)! ! fmt.Printf("Time zone: %vn", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude, record.Location.Longitude)! !! db.Close()
  14. 14. Google Analytics and BigQuery
  15. 15. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,! ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  16. 16. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! ! if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! !! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  17. 17. ! data, err := dataGaGetCall.Do()! ! if err != nil {! ! ! log.Fatal("Failed fetch data from GA")! ! }! !! return data.Rows
  18. 18. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions": "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! !! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %vn", row, rows[row])! ! }! }
  19. 19. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! ! ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  20. 20. ! service, err := bigquery.New(client)! ! if err != nil {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  21. 21. Useful and interesting Gophers
  22. 22. Interesting Gophers • Golang machine learning lib 
 https://github.com/xlvector/hector • Logistic Regression • Factorized Machine • CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree • Neural Network
  23. 23. Interesting Gophers • library for numeric operation
 https://github.com/gonum - fairly, but they are working to bring some useful packages • matrix - Scientific math package for the Go language. • graph - Discrete math structures and functions
  24. 24. Reference list • Why are ‘Cool Kids’ at Github Moving to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ • How suitable Go will be for scientific computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  25. 25. Thank you! M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  26. 26. M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M
  27. 27. DAHO.AM — Developer Conference 06-06-14 S A F E T H E D A T E

×