import pandas as pd
import numpy as np
rng1 = pd.date_range(
'1/1/2012',
periods=10,
freq='H'
)
s1 = pd.Series(
np.arange(10),
index=rng1
)
df1 = pd.DataFrame(
{'temp': s1}
)
s2 = pd.Series(
np.arange(5, 10),
index=['1/1/2012 01:20:00',
'1/1/2012 01:40:00',
'1/1/2012 02:00:00',
'1/1/2012 05:30:00',
'1/1/2012 06:00:00']
)
df2 = pd.DataFrame(
{'voltage': s2},
)
print df1
print df2
--output:--
temp
2012-01-01 00:00:00 0
2012-01-01 01:00:00 1
2012-01-01 02:00:00 2
2012-01-01 03:00:00 3
2012-01-01 04:00:00 4
2012-01-01 05:00:00 5
2012-01-01 06:00:00 6
2012-01-01 07:00:00 7
2012-01-01 08:00:00 8
2012-01-01 09:00:00 9
voltage
1/1/2012 01:20:00 5
1/1/2012 01:40:00 6
1/1/2012 02:00:00 7
1/1/2012 05:30:00 8
1/1/2012 06:00:00 9
combined = df1.join(df2, how='outer')
print combined
--output:--
temp voltage
2012-01-01 00:00:00 0 NaN
2012-01-01 01:00:00 1 NaN
2012-01-01 01:20:00 NaN 5
2012-01-01 01:40:00 NaN 6
2012-01-01 02:00:00 2 7
2012-01-01 03:00:00 3 NaN
2012-01-01 04:00:00 4 NaN
2012-01-01 05:00:00 5 NaN
2012-01-01 05:30:00 NaN 8
2012-01-01 06:00:00 6 9
2012-01-01 07:00:00 7 NaN
2012-01-01 08:00:00 8 NaN
2012-01-01 09:00:00 9 NaN
combined = combined.apply(
pd.Series.interpolate,
args=('time',)
)
print combined
--output:--
temp voltage
2012-01-01 00:00:00 0.000000 NaN
2012-01-01 01:00:00 1.000000 NaN
2012-01-01 01:20:00 1.333333 5.000000
2012-01-01 01:40:00 1.666667 6.000000
2012-01-01 02:00:00 2.000000 7.000000
2012-01-01 03:00:00 3.000000 7.285714
2012-01-01 04:00:00 4.000000 7.571429
2012-01-01 05:00:00 5.000000 7.857143
2012-01-01 05:30:00 5.500000 8.000000
2012-01-01 06:00:00 6.000000 9.000000
2012-01-01 07:00:00 7.000000 9.000000
2012-01-01 08:00:00 8.000000 9.000000
2012-01-01 09:00:00 9.000000 9.000000
print combined.fillna(method='backfill')
--output:--
temp voltage
2012-01-01 00:00:00 0.000000 5.000000
2012-01-01 01:00:00 1.000000 5.000000
2012-01-01 01:20:00 1.333333 5.000000
2012-01-01 01:40:00 1.666667 6.000000
2012-01-01 02:00:00 2.000000 7.000000
2012-01-01 03:00:00 3.000000 7.285714
2012-01-01 04:00:00 4.000000 7.571429
2012-01-01 05:00:00 5.000000 7.857143
2012-01-01 05:30:00 5.500000 8.000000
2012-01-01 06:00:00 6.000000 9.000000
2012-01-01 07:00:00 7.000000 9.000000
2012-01-01 08:00:00 8.000000 9.000000
2012-01-01 09:00:00 9.000000 9.000000
기본 아이디어는 모든 datframes에 사용할 공통 datetime 인덱스를 결정하는 것입니다. 관측 된 모든 시간의 합집합을 취하는 것처럼 들리므로이 작업은 쉬워야합니다. pd.concat ([df1, df2, df3], axis = 1) .fillna()와 같은 것입니다. 그러나 샘플 데이터를 게시하고 예상 한대로, 누군가로부터 완전한 대답을 얻지 못할 것입니다. – TomAugspurger