設定postgresql時區並產生48小時資料

#查詢時區名稱

SELECT * FROM pg_timezone_names;

#設定資料庫時區

ALTER DATABASE “dbname” SET timezone TO ‘Asia/Taipei’;
#產生最後48hours

#將資料與真實資料進行join即可防止排程或爬蟲掉資料的狀況

#注意剪掉的interval要是47 hour才會是48小時否則為47

SELECT CAST(t.day as varchar)
FROM generate_series(
(SELECT date_trunc(‘hour’ , ( SELECT now()::timestamp – interval ’47’ hour)::timestamp))
, now()::timestamp
, interval ‘1 hour’) AS t(day)

#使用cross join 配對出每組設備編號或依情境調整產生配對的大量資料集

#若資料有遇到可能重複收集的狀況可以在cte內以group by搭配流水號進行篩選重複的資料

#最終使用left join即可將資料補滿

with d as(
–產生72 or 48 小時資料
–cross join將每個時間配對到每個站點
SELECT CAST(t.day as varchar) , a.stationid
FROM generate_series(
(SELECT date_trunc(‘hour’ , ( SELECT now()::timestamp – interval ’47’ hour)::timestamp))
, now()::timestamp
, interval ‘1 hour’) AS t(day)
cross join autostation a
) , filter_duplicate as (

–防止爬蟲或排程重複收集資料
SELECT stationid, obstime, elev, wdir, wdsd, temp, humd, pres, h_24r,
h_fx, h_xd, h_fxt, d_tx, d_txt, d_tn, d_tnt , max( id ) as id
FROM public.weatherlog
group by stationid, obstime, elev, wdir, wdsd, temp, humd, pres, h_24r,
h_fx, h_xd, h_fxt, d_tx, d_txt, d_tn, d_tnt
)

–以left join執行資料補齊
select d.* , w.*
from d
left join filter_duplicate w
on d.day = w.obstime
and d.stationid = w.stationid


發佈留言