'''
An unlocked version of the timeseries API intended for testing alternate inputs.
Mirrors the production timeseries API in the crucial respects, but won't be as fast.
ONLY works afer the first three variables in MockAPI.__init__ are populated.
'''
from typing import Sequence, Tuple
import pandas as pd
class MockApi:
def __init__(self):
'''
YOU MUST UPDATE THE FIRST THREE LINES of this method.
They've been intentionally left in an invalid state.
Variables to set:
input_paths: a list of two or more paths to the csv files to be served
group_id_column: the column that identifies which groups of rows the API should serve.
A call to iter_test serves all rows of all dataframes with the current group ID value.
export_group_id_column: if true, the dataframes iter_test serves will include the group_id_column values.
'''
self.input_paths: Sequence[str] =
self.group_id_column: str =
self.export_group_id_column: bool =
# iter_test is only designed to support at least two dataframes, such as test and sample_submission
assert len(self.input_paths) >= 2
self._status = 'initialized'
self.predictions = []
def iter_test(self) -> Tuple[pd.DataFrame]:
'''
Loads all of the dataframes specified in self.input_paths,
then yields all rows in those dataframes that equal the current self.group_id_column value.
'''
if self._status != 'initialized':
raise Exception('WARNING: the real API can only iterate over `iter_test()` once.')
dataframes = []
for pth in self.input_paths:
dataframes.append(pd.read_csv(pth, low_memory=False))
group_order = dataframes[0][self.group_id_column].drop_duplicates().tolist()
dataframes = [df.set_index(self.group_id_column) for df in dataframes]
for group_id in group_order:
self._status = 'prediction_needed'
current_data = []
for df in dataframes:
cur_df = df.loc[group_id].copy()
# returning single line dataframes from df.loc requires special handling
if not isinstance(cur_df, pd.DataFrame):
cur_df = pd.DataFrame({a: b for a, b in zip(cur_df.index.values, cur_df.values)}, index=[group_id])
cur_df.index.name = self.group_id_column
cur_df = cur_df.reset_index(drop=not(self.export_group_id_column))
current_data.append(cur_df)
yield tuple(current_data)
while self._status != 'prediction_received':
print('You must call `predict()` successfully before you can continue with `iter_test()`', flush=True)
yield None
with open('submission.csv', 'w') as f_open:
pd.concat(self.predictions).to_csv(f_open, index=False)
self._status = 'finished'
def predict(self, user_predictions: pd.DataFrame):
'''
Accepts and stores the user's predictions and unlocks iter_test once that is done
'''
if self._status == 'finished':
raise Exception('You have already made predictions for the full test set.')
if self._status != 'prediction_needed':
raise Exception('You must get the next test sample from `iter_test()` first.')
if not isinstance(user_predictions, pd.DataFrame):
raise Exception('You must provide a DataFrame.')
self.predictions.append(user_predictions)
self._status = 'prediction_received'
def make_env():
return MockApi()
没有合适的资源?快使用搜索试试~ 我知道了~
预测产消者的能源行为 最新!
共19个文件
csv:15个
py:2个
so:1个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 77 浏览量
2024-10-11
21:41:07
上传
评论
收藏 232.59MB ZIP 举报
温馨提示
所有数据集都遵循相同的时间约定。时间以 EET/EEST 给出。大多数变量是 1 小时期间的总和或平均值。日期时间列(无论其名称如何)始终给出 1 小时期间的开始时间。但是,对于天气数据集,某些变量(例如温度或云量)是针对特定时间给出的,该时间始终是 1 小时期间的结束时间。 文件 训练.csv county- 该县的 ID 代码。 is_business- 布尔值,表示生产消费者是否是企业。 product_type- ID 代码与合同类型的以下映射:{0: "Combined", 1: "Fixed", 2: "General service", 3: "Spot"}。 target- 每小时相应段的消耗量或生产量。段由county、is_business和定义product_type。 is_consumption- 布尔值,表示此行的目标是否是消费还是生产。 datetime- 爱沙尼亚时间,EET(UTC+2)/ EEST(UTC+3)。它描述了设定目标的 1 小时周期的开始。 data_block_id- 所有共享相同内容的行将data_block_id在同一预测时间可
资源推荐
资源详情
资源评论
收起资源包目录
predict-energy-behavior-of-prosumers.zip (19个子文件)
enefit
__init__.py 59B
competition.cpython-310-x86_64-linux-gnu.so 190KB
forecast_weather.csv 744.93MB
historical_weather.csv 172.17MB
weather_station_to_county_mapping.csv 3KB
client.csv 1.3MB
example_test_files
forecast_weather.csv 4.48MB
sample_submission.csv 171KB
historical_weather.csv 1.09MB
client.csv 9KB
electricity_prices.csv 5KB
revealed_targets.csv 617KB
gas_prices.csv 229B
test.csv 600KB
electricity_prices.csv 751KB
gas_prices.csv 23KB
county_id_to_name_map.json 301B
train.csv 94.39MB
public_timeseries_testing_util.py 4KB
共 19 条
- 1
资源评论
乌南竹
- 粉丝: 1835
- 资源: 311
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功