GenStockKLine_pythontick转K线代码_tick转为bar_源码

共1个文件

py：1个

版权申诉

5星 · 超过95%的资源 119 浏览量 2021-10-04 01:50:59 上传评论 1 收藏 3KB ZIP 举报

在金融数据分析领域， Tick数据和K线数据是两种常见的市场数据类型。Tick数据是指市场上的每一笔交易数据，包括价格、成交量、时间等信息，而K线数据则是将多笔Tick数据汇总后的结果，通常以分钟、小时或日为周期进行统计。本项目“GenStockKLine_pythontick转K线代码_tick转为bar_源码”专注于将Tick数据转换为1分钟的K线（Bar）数据，这对于股票、期货、数字货币等市场的技术分析至关重要。在Python中处理金融市场数据时，常常会用到pandas库，因为其强大的数据处理能力。在这个项目中，`GenStockKLine.py`文件很可能是实现这一转换功能的核心代码。以下是一些可能包含在该源码中的关键步骤和概念： 1. **数据读取与预处理**：你需要从数据库、CSV文件或其他数据源读取Tick数据。这通常涉及到使用pandas的`read_csv()`或`read_sql()`函数，根据数据存储的格式进行操作。预处理可能包括去除重复数据、处理缺失值等。 2. **时间序列处理**：Tick数据的时间戳通常精确到毫秒，转换成K线数据时需要按照分钟间隔进行归整。可以使用pandas的`resample()`函数，将数据按照每分钟进行重采样。 3. **开盘价、收盘价、最高价、最低价计算**：1分钟K线的四个核心要素——开盘价、收盘价、最高价和最低价，需要从Tick数据中计算得出。开盘价通常是这一分钟内的第一笔交易价格，收盘价是最后一笔，最高价和最低价则需要遍历所有Tick数据找出。 4. **成交量计算**：成交量是1分钟K线的另一个重要指标，可以通过累加这一分钟内的所有Tick成交量得到。 5. **生成K线数据框**：使用pandas创建一个新的DataFrame，包含每分钟的K线数据，列名通常为'Open'、'Close'、'High'、'Low'和'Volume'。 6. **数据保存**：将生成的1分钟K线数据保存为CSV文件或其他适合进一步分析的格式，以便后续使用。在实际的`GenStockKLine.py`源码中，还可能涉及到异常处理、性能优化（如使用多线程并行处理大量Tick数据）、数据清洗以及特定业务逻辑的实现。理解并掌握这个过程，对于进行金融数据分析和算法交易的开发者来说是非常有价值的。通过这个项目，你可以深入学习Python在金融数据处理方面的应用，提升数据处理和编程技能。

资源详情

资源评论

资源推荐

收起资源包目录

GenStockKLine.zip （1个子文件）

GenStockKLine.py 9KB

#!/usr/bin/python3 from pathlib import Path from datetime import date, datetime, timedelta import os import sys import pandas as pd import json import shutil class MyDict(dict): def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) self.__dict__ = self class GenKLine: def __init__(self, start_date, end_date): self.logList = list() self.start_date = start_date self.end_date = end_date self.conf = None self.load_conf() self.of = None pass def load_conf(self): # with open("config_new.json") as f: with open("config.json") as f: conf = json.load(f) self.conf = MyDict(conf) pass def filter_data(self, df, prod): if "trade_time" not in self.conf or prod not in self.conf["trade_time"]: return df ret = [] for it in self.conf["trade_time"][prod]: if it["start"] < it["end"]: df1 = df[(df["Time"] > it["start"]) & (df["Time"] <= it["end"])] ret.append(df1) else: df1 = df[(df["Time"] > it["start"]) | (df["Time"] <= it["end"])] ret.append(df1) df2 = pd.concat(ret, axis=0) return df2 @staticmethod def convert_to_double_datetime(dt): dlt = dt - datetime(1899, 12, 30) return dlt.total_seconds() / 86400.0 # @staticmethod def gen_kline_from_tick(self, tick_df, symbol, k, exg): if k == 'day': freq = "D" else: freq = str(k) + "S" tick1 = tick_df[['Datetime', 'LastPrice', 'Volume', 'Turnover', 'OpenInterest']] if exg not in ['CME', 'HKEX', 'SGX']: tick1.loc[:, "Volume"] = tick1["Volume"].diff() tick1.loc[:, "Turnover"] = tick1["Turnover"].diff() tick1.set_index('Datetime', inplace=True) tick0 = tick1['LastPrice'].resample(freq, label='right').ohlc() tick0['volume'] = tick1['Volume'].resample(freq, label='right').sum().fillna(0).astype("int64") tick0['amount'] = tick1['Turnover'].resample(freq, label='right').sum() tick0['openinterest'] = tick1['OpenInterest'].resample(freq, label='right').last().fillna(0).astype("int64") tick0['instrument'] = symbol tick0['Datetime'] = tick0.index tick0['datetime'] = tick0.apply( lambda r: GenKLine.convert_to_double_datetime( r['Datetime']), axis=1) tick0['Time'] = tick0.apply(lambda r: r.Datetime.time().__str__(), axis=1) tick0 = self.filter_data(tick0, 'all') tick0.dropna(inplace=True) tick0.sort_index(inplace=True) # 排序 # tick2 = tick0.fillna(method='ffill') # 价格先向前对齐 # tick2.fillna(method='bfill', inplace=True) # 后向后对齐 # tick0.loc[:, ['open', 'high', 'low', 'close']] = tick2.loc[:, ['open', 'high', 'low', 'close']] # tick0.fillna(0, inplace=True) # 成交量和成交额的空值置为0 kline = tick0[["instrument", "datetime", "open", "high", "low", "close", "volume", "amount", "openinterest"]] return kline def log_str(self, con): self.of.writelines(con) # self.logList.append(con) def init_log(self, log_file=None): if log_file is None: log_file = "log/{}-{}.log".format(self.start_date, self.end_date) if not Path.exists(Path("log")): Path.mkdir(Path("log")) self.of = open(log_file, 'w+', encoding='utf-8') def write_log(self, log_file=None): self.of.writelines("\n".join(self.logList)) def get_exch_prod(self, symbol, dt): if str(symbol).startswith("0") or str(symbol).startswith("3") or \ str(symbol).startswith("123") or str(symbol).startswith("128") or \ str(symbol).startswith("15"): prod = "SZA" exg = "SZE" else: prod = "SHA" exg = "SSE" md_path = "{}/{}/{}/{}".format( self.conf["data_path"], exg, prod, dt) md_file = "{}/tick/{}.csv".format(md_path, symbol) return exg, prod, md_path, md_file def gen_kline(self, symbol, exg, prod, md_path, md_file, dt): # exg, prod, md_path, md_file = self.get_exch_prod(symbol, dt) self.log_str("{} {} {} {} {}".format(datetime.now(), exg, prod, symbol, dt)) if not os.path.exists(md_file): self.log_str("{} 文件不存在".format(md_file)) else: md = pd.read_csv( md_file, dtype={ 'InstrumentID': object}, skipinitialspace=True, skiprows=1, header=None, usecols=range(0, len(self.conf.tick_columns))) md.set_axis(self.conf.tick_columns, axis='columns', inplace=True) md["Datetime"] = md.apply( lambda r: datetime.strptime( "{} {}.{:03d}".format( r['TradingDay'], r['Time'], r['Milliseconds']), '%Y%m%d %H:%M:%S.%f'), axis=1) md1 = md[md.LastPrice > 0.0001] for k in self.conf["cycles"]: if len(md1) <= 0: continue md0 = self.gen_kline_from_tick(md1, symbol, k, exg) # cycle_path = "{}/{}".format(md_path, k) cycle_path = md_path / k if not Path.exists(cycle_path): Path.mkdir(cycle_path) out_file = "{}/{}/{}.csv".format(md_path, k, symbol) md0.to_csv(out_file, index=False) print('{}{} is updated'.format(symbol, k)) # 复制tick文件至新文件夹 out_tick_folder = md_path / 'tick' if not Path.exists(out_tick_folder): Path.mkdir(out_tick_folder, parents=True) out_tick_file = "{}/{}.csv".format(out_tick_folder, symbol) if not os.path.exists(out_tick_file): shutil.copy(md_file, out_tick_file) def start(self): data_path = Path(self.conf["data_path"]) save_folder = Path(self.conf["save_path"]) start_dt = datetime.strptime(self.start_date, "%Y%m%d") end_dt = datetime.strptime(self.end_date, "%Y%m%d") self.init_log() trade_date = start_dt while trade_date <= end_dt: date_str = trade_date.strftime("%Y%m%d") print("开始处理日期：{}".format(date_str)) for exg in self.conf['exchanges'].keys(): # if exg not in ['CME', 'HKEX', 'SGX']: # continue exch_path = data_path / exg if not exch_path.exists() or not exch_path.is_dir(): print(str(exch_path), "not exists") continue for prod_path in exch_path.iterdir(): if not prod_path.is_dir(): print(str(prod_path), "not dir") continue if len(self.conf['exchanges'][exg]) > 0 and prod_path.name not in self.conf['exchanges'][exg]: print(prod_path.name, "is skipped") continue day_path = prod_path / date_str save_path = save_folder / exg / prod_path.name / date_str if not save_path.exists(): Path.mkdir(save_path,parents=True) if day_path.exists() and day_path.is_dir(): tick_path = day_path / "tick" for tick_file in tick_path.iterdir(): if tick_file