7. All caps column names with spaces
> names(data)
[1] "DESTINATION" "COMMERCIAL FLAG" "CITY" "REGION"
[5] "COUNTRY" "FLIGH DIRECTION" "FLIGHT TYPE" "DATE"
[9] "DATE YEAR" "DATE HALF YEAR" "DATE YEAR QUARTER" "DATE YEAR MONTH"
[13] "NBR OF PASSENGERS" "CARGO WEIGHT" "NBR OF FLIGHTS" "SEAT CAPACITY"
8. > unique( data[["COMMERCIAL FLAG"]] )
[1] "Commercial"
> summary( as.factor( data[["REGION"]] ) )
China VH China ZB China ZS Germany ED Germany ET India VA India VI
68 127 3 2160 9 3 1
Japan RJ Russia UI Russia UL Russia UN Russia UR Russia US Russia UU
63 1 167 7 62 59 340
Russia UW Spain GC Spain LE USA KA USA KB USA KC USA KD
6 205 870 4 7 5 19
USA KE USA KI USA KJ USA KM USA KP USA KT USA KW
3 1 114 2 4 2 5
NA's
14520
9. > data %>% select( starts_with( "DATE") )
# A tibble: 18,837 x 5
DATE DATE YEAR DATE HALF YEAR DATE YEAR QUARTER DATE YEAR MONTH
<time> <dbl> <chr> <chr> <chr>
1 2010-11-01 2010 2010H2 2010Q4 201011
2 2010-12-01 2010 2010H2 2010Q4 201012
3 2011-01-01 2011 2011H1 2011Q1 201101
4 2007-05-01 2007 2007H1 2007Q2 200705
5 2011-07-01 2011 2011H2 2011Q3 201107
6 2010-07-01 2010 2010H2 2010Q3 201007
7 2010-04-01 2010 2010H1 2010Q2 201004
8 2012-06-01 2012 2012H1 2012Q2 201206
9 2009-02-01 2009 2009H1 2009Q1 200902
10 2009-10-01 2009 2009H2 2009Q4 200910
# ... with 18,827 more rows
10. > data %>%
+ filter( `NBR OF PASSENGERS` == 0 & `CARGO WEIGHT` == 0 ) %>%
+ group_by( CITY, `DATE YEAR MONTH`, `FLIGH DIRECTION` ) %>%
+ summarise( flights = sum( `NBR OF FLIGHTS` ) ) %>%
+ arrange( desc(flights) )
Source: local data frame [2,417 x 4]
Groups: CITY, DATE YEAR MONTH [2,023]
CITY DATE YEAR MONTH FLIGH DIRECTION flights
<chr> <chr> <chr> <dbl>
1 Timisoara 200911 Incoming 26
2 Arad 200701 Incoming 23
3 Arad 200801 Outgoing 22
4 Cluj Napoca 201110 Incoming 22
5 Arad 200801 Incoming 21
# ...
Empty flights
11. > data %>%
+ filter( `SEAT CAPACITY` < `NBR OF PASSENGERS` ) %>%
+ group_by( CITY ) %>%
+ summarise(
+ flights = sum( `NBR OF FLIGHTS` ),
+ passengers = sum( `NBR OF PASSENGERS`),
+ capacity = sum( `SEAT CAPACITY`)
+ ) %>%
+ arrange( desc(flights) )
# A tibble: 8 x 4
CITY flights passengers capacity
<chr> <dbl> <dbl> <dbl>
1 Hurghada 32 5229 4665
2 Tenerife Sur 4 760 756
3 Djerba 3 530 510
Empty flights