One of the limiting factors of most timeseries databases is that, in order to get good read performance, they limit your ability to update data. That's fine if your data is an event stream, but if its coming from a pre-aggregated sources it might update past data, for example data about online ad performance updated after click fraud is discovered. In this talk I'll show you how AdStage stores timeseries data in Postgres to allow fast reads and updates using clever schema design and functions for speed.
23. TimescaleDB
create table entity_date_metric (
entity integer not null,
ts date not null,
metric_1 numeric,
metric_2 numeric,
...
metric_100 numeric
);
select create_hypertable('entity_date_metric', 'ts');
24. TimescaleDB
select
entity, date_trunc('week', ts) ts_trunc,
sum(metric_*) ...
from entity_date_metric
where entity in (*list of 1000 random entities*)
and ts between '2001-01-15' and '2001-02-15'
group by entity, ts_trunc;
●
27. TOASTy
Arrays
●
○
●
select
entity, date_trunc('week', day) ts_trunc,
sum(metric_*) ...
from (
select
entity,
unnest(array(select generate_series(s, e, '1 day'))) as day,
unnest(metric_*[start:end]) as metric_*,
...
from entity_date_metric
where entity in (*list of 1000 random entities*)
) as unnested_metrics
group by entity, ts_trunc;
29. TOASTy
Arrays
create or replace function metric_array_sum(
input numeric[]
) returns numeric as $fun$
declare
output numeric := 0;
begin
if array_length(input, 1) is null then return null; end if;
for i in array_lower(input, 1)..array_upper(input, 1) loop
output := output + coalesce(input[i], 0);
end loop;
return output;
end
$fun$ language 'plpgsql' immutable strict parallel safe;
●
●
○
○
○