More Related Content Similar to I like Trains - MRMCD15 (20) I like Trains - MRMCD154. curl -i -s -k -X 'GET' 'http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&tp
l=time2json&performLocating=512&loo
k_nv=type|servertime&'
[
{
"date":"05.09.15"
,"time":"02:55"
,"hours" : "2"
,"minutes" : "55"
,"seconds" : "12"
,"year" : "2015"
,"month" : "9"
,"day" : "5"
,"weekday" : "5"
,"HHMMSS" : "025512"
,"DDMMYYYY" : "05092015"
}
]
6. curl -i -s -k -X 'GET' 'http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&p
erformLocating=2&tpl=stop2json&look
_maxno=150&look_nv=get_stopweigh
t|yes|nur_hauptmast|yes|minId|80000
00|maxId|8099999&look_stopclass=8
&look_maxx=8120171&look_maxy=510
00923&look_minx=6822411&look_min
y=50392017&'
{
"prods":"8",
"stops": [{
"x" : "6832952",
"y" : "50807094"
,"name" : "Erftstadt"
,"urlname" : "Erftstadt"
,"prodclass" : "8"
,"extId":"8003671"
,"puic":"80"
,"planId":"1440019499"
,"stopweight":"5240"
},{...}]
,"error" : "0"
,"numberofstops": "191"
}
8. [
[
[14547207,52335925],
[...]
[12566416,52400756]
],
[
[0,"Frankfurt(Oder)"],
[71,"Frankfurt(Oder)-Rosengarten"],
[120,"Pillgram"],
[137,"Jacobsdorf(Mark)"],
[175,"Briesen(Mark)"],
[..]
[1510,"Potsdam Park Sanssouci"],
[1580,"Werder(Havel)"],
[1662,"Groß Kreutz"],
[1691,"Götz"],
[1768,"Brandenburg Hbf"]
]
]
curl -i -s -k -X 'GET' 'http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&lo
ok_trainid=84/84935/18/19/80&tpl=ch
ain2json3&performLocating=16&forma
t_xy_n'
9. {
"look":{
"singletrain":[
{
"trainid":"84/84935/18/19/80",
"x":"12566137",
"y":"52400558",
"name":"RE 18150",
"pstopname":"Brandenburg Hbf",
"pstopno":"8010060",
"parr":"2:35",
"fstopname":"Frankfurt(Oder)",
"fstopno":"8010113",
"fdep":"0:29",
"lstopname":"Brandenburg Hbf",
"lstopno":"8010060",
"larr":"2:35",
"pass":"19",
"edgeid":"20",
"passproc":"0"
}
]
}
}
curl -i -s -k -X 'GET' 'http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&tp
l=singletrain2json&performLocating=8
&look_nv=get_rtmsgstatus|yes|get_rtfr
eitextmn|yes|get_rtstoptimes|yes|get_f
stop|yes|get_pstop|yes|get_nstop|yes|
get_lstop|yes|zugposmode|3|&look_tr
ainid=84/84935/18/19/80'
10. [71905, -65585, 7135, "30", "97", null, null], [72012, -65603, 7393,
"30", "110", null, null], [73325, -65900, 10620, "30", "111", null, null],
[75923, -66556, 17074, "29", "110", null, null], [77208, -66906,
20301, "30", "110", null, null], [78512, -67221, 23528, "29", "111", null,
null], [78566, -67239, 23657, "30", "109", null, null], [79168, -67374,
25141, "30", "109", null, null], [79833, -67500, 26755, "31", "111", null,
null], [80525, -67581, 28368, "31", "109", null, null], [81217, -67608,
29982, "0", "106", null, null], [81496, -67608, 30627, "0", "111", null,
null], [81874, -67599, 31499, "0", "109", null, null], [82602, -67527,
33209, "1", "109", null, null], [83267, -67437, 34790, "1", "111", null,
null], [83968, -67347, 36435, "", "0", null, null]], "Koblenz Hbf",
"8000206", "Mainz Hbf", "8000240", "04.09.15", "0", null, "1:36",
"0:40", "31", "31", "0", null, null], ["RB 11840", 1366314646,
1180032903, "84/94656/18/19/80", "6", 8, "", "Neuss Hbf", [[242, -
827, -1115, "9", "40", "1", "1"], [89, -386, 3368, "8", "39", "1", "2"], [-18,
54, 7850, "8", "39", null, null], [-198, 944, 16815, "8", "40", "1", "3"], [-
324, 1843, 25780, "8", "40", null, null], [-378, 2293, 30262, "8",
"40", "1", "4"], [-405, 2742, 34744, "7", "40", "1", "6"], [-396, 3192,
39227, "", "0", "1", "7"]], "Holzheim(b Neuss)", "8002979", "Neuss
Hbf", "8000274", "05.09.15", "-1", null, "1:51", "1:44", null, null, "4",
null, null], ["RE 10892", 631912656, 1369918931,
"84/93747/18/19/80", "20", 8, "", "Bielefeld Hbf", [[351, -18, -316, "15",
"273", "3", "0"], [0, 0, 0, "", "0", null, null], [0, 0, 60000, "", "0", null,
null]], "Herford", "8000162", "Brake(b Bielefeld)", "8001118",
"05.09.15", "-1", null, "1:55", "1:50", null, null, "4", null, null], ["RE
10246", 1120099420, 908435482, "84/92655/18/19/80", "18", 8, "",
"Essen Hbf", [], "Münster(Westf)Hbf", "8000263", "Münster-
Albachten", "8000462", "05.09.15", "0", null, "2:16", "2:10", null,
curl -i -s -k -X 'GET' ‘http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&p
erformLocating=1&performFixedLocati
ng=9’
11. curl -i -s -k -X 'GET' ‘http://www.apps-
bahn.de/bin/livemap/query-
livemap.exe/dny?L=vs_livefahrplan&p
erformLocating=1&performFixedLocati
ng=9’
[...],[
"IC 60457",
1155930774,
1591832938,
"84/183259/18/19/80",
"0",
2,
"0",
"Praha hl.n.",
[
[136330, 59796, -32737, "7", "98", null, null],
[136438, 75959, 32664, "7", "98", null, null],
[136447, 77748, 39903, "", "0", null, null]
],
"Bielefeld Hbf", "8000036",
"Berlin Hbf (tief)", "8098160",
"04.09.15", "0", null, "4:22", "0:43", "0",
"18", "0", null, null
],[...]
12. name
x
y
id
direction
productclass
delay
lstopname
pstopname
pstopno
nstopname
nstopno
dateRef
ageofreport
lastreporting
nstoparrival
pstopdeparture
zpathflags
additionaltype
hideMoments
[...],[
"IC 60457",
1155930774,
1591832938,
"84/183259/18/19/80",
"0",
2,
"0",
"Praha hl.n.",
[
[136330, 59796, -32737, "7", "98", null, null],
[136438, 75959, 32664, "7", "98", null, null],
[136447, 77748, 39903, "", "0", null, null]
],
"Bielefeld Hbf", "8000036",
"Berlin Hbf (tief)", "8098160",
"04.09.15", "0", null, "4:22", "0:43", "0",
"18", "0", null, null
],[...]
13. name
x
y
id
direction
productclass
delay
lstopname
pstopname
pstopno
nstopname
nstopno
dateRef
ageofreport
lastreporting
nstoparrival
pstopdeparture
zpathflags
additionaltype
hideMoments
[...],[
"IC 60457",
1155930774,
1591832938,
"84/183259/18/19/80",
"0",
2,
"0",
"Praha hl.n.",
[
[136330, 59796, -32737, "7", "98", null, null],
[136438, 75959, 32664, "7", "98", null, null],
[136447, 77748, 39903, "", "0", null, null]
],
"Bielefeld Hbf", "8000036",
"Berlin Hbf (tief)", "8098160",
"04.09.15", "0", null, "4:22", "0:43", "0",
"18", "0", null, null
],[...]
19. How much we talkin’?
~160 million
datasets per year
...stored as JSON… yikes!
21. What to do?
no budget, high expectations
ElasticSearch
→ performs well with
large datasets
→ easy clustering
22. How it works
Collect / Normalize
request all that data
fix formats
convert location
Store
save everything to a
file
Import
import to ES
import everything
again because you
forgot something
23. Current stack
3 ES servers
~3.4 GB res/srv
~40 GB disk/srv
~2 CPUs/srv
1 nginx + kibana
26. Mappings!
less data (in memory)
mapping = {
'train': {
'_all': {'enabled': False},
'properties': {
'cid': {
'type': 'string',
'index': 'not_analyzed',
'norms': {'enabled': False}},
'timestamp': {
'type': 'date',
'norms': {'enabled': False}},
'location': {
'type': 'geo_point',
'fielddata': {
'lat_lon': True,
'format': 'compressed',
'precision': '3m'
},
'norms': {'enabled': False}
},
'name': {
'type': 'string',
'analyzer': 'keyword',
'norms': {'enabled': False},
'fields': {
'raw' : {
'type' : 'string', 'index' : 'not_analyzed'}
}
}
[…]
27. What we get
with (almost) no budget, still high
expectations
Fast search
sub-second searches
for <2 weeks
<30 seconds for
whole year