| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- import json
- import scipy.stats as stats
- import numpy as np
- import pandas as pd
- import queue
- import threading
- from collections import deque
- from logger_config import logger
- # 假设我们有一个数据流,订单簿和成交数据
- order_book_snapshots = deque(maxlen=1000) # 存储过去10000个订单簿快照
- # 数据积累的阈值
- DATA_THRESHOLD = 20
- messages = queue.Queue() # 创建一个线程安全队列
- stop_event = threading.Event()
- def on_message_depth(_ws, message):
- global order_book_snapshots
- json_message = json.loads(message)
- bids = json_message['data']['b'][:10] # Top 10 bids
- asks = json_message['data']['a'][:10] # Top 10 asks
- timestamp = pd.to_datetime(json_message['data']['E'], unit='ms')
- order_book_snapshots.append({
- 'bids': bids,
- 'asks': asks,
- 'timestamp': timestamp
- })
- if len(order_book_snapshots) >= DATA_THRESHOLD:
- process_depth_data(order_book_snapshots)
- def process_depth_data(order_book_snapshots):
- # 提取订单大小数据
- order_sizes = []
- for snapshot in order_book_snapshots:
- for bid in snapshot['bids']:
- order_sizes.append(float(bid[1]))
- for ask in snapshot['asks']:
- order_sizes.append(float(ask[1]))
- if len(order_sizes) < 2: # 确保有足够的数据进行计算
- logger.warning("Not enough data to calculate kappa.")
- return
- # 使用 MLE 估计幂律指数 β
- Q_min = np.min(order_sizes)
- n = len(order_sizes)
- try:
- beta_hat = 1 + n / np.sum(np.log(order_sizes / Q_min))
- except ZeroDivisionError:
- logger.error("ZeroDivisionError encountered in MLE calculation.")
- return
- # 使用最小二乘法拟合幂律分布
- log_order_sizes = np.log(order_sizes)
- hist, bin_edges = np.histogram(log_order_sizes, bins=50, density=True)
- bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
- # 过滤掉频率为零的bin
- non_zero_indices = hist > 0
- hist = hist[non_zero_indices]
- bin_centers = bin_centers[non_zero_indices]
- if len(hist) == 0 or len(bin_centers) == 0:
- logger.warning("No non-zero bins available for linear regression.")
- return
- try:
- slope, intercept, r_value, p_value, std_err = stats.linregress(bin_centers, np.log(hist))
- beta_lsm = -slope
- except ValueError as e:
- logger.error(f"ValueError encountered in linear regression: {e}")
- return
- # 计算kappa值
- gamma = 0.1 # 库存风险厌恶参数
- sigma = 0.02 # 市场波动率
- T_minus_t = 1 # 交易会话的剩余时间
- # 结合不同方法估算的β值,计算kappa
- kappa_mle = (2 / (gamma * sigma ** 2 * T_minus_t)) * np.log(1 + (gamma / beta_hat))
- kappa_lsm = (2 / (gamma * sigma ** 2 * T_minus_t)) * np.log(1 + (gamma / beta_lsm))
- # 输出结果
- logger.info(f"Estimated beta_hat (MLE): {beta_hat}, kappa (MLE): {kappa_mle}")
- logger.info(f"Estimated beta_lsm (LSM): {beta_lsm}, kappa (LSM): {kappa_lsm}")
|