import json import scipy.stats as stats import numpy as np import pandas as pd import queue import threading from collections import deque from logger_config import logger # 假设我们有一个数据流,订单簿和成交数据 order_book_snapshots = deque(maxlen=1000) # 存储过去10000个订单簿快照 # 数据积累的阈值 DATA_THRESHOLD = 20 messages = queue.Queue() # 创建一个线程安全队列 stop_event = threading.Event() def on_message_depth(_ws, message): global order_book_snapshots json_message = json.loads(message) bids = json_message['data']['b'][:10] # Top 10 bids asks = json_message['data']['a'][:10] # Top 10 asks timestamp = pd.to_datetime(json_message['data']['E'], unit='ms') order_book_snapshots.append({ 'bids': bids, 'asks': asks, 'timestamp': timestamp }) if len(order_book_snapshots) >= DATA_THRESHOLD: process_depth_data(order_book_snapshots) def process_depth_data(order_book_snapshots): # 提取订单大小数据 order_sizes = [] for snapshot in order_book_snapshots: for bid in snapshot['bids']: order_sizes.append(float(bid[1])) for ask in snapshot['asks']: order_sizes.append(float(ask[1])) if len(order_sizes) < 2: # 确保有足够的数据进行计算 logger.warning("Not enough data to calculate kappa.") return # 使用 MLE 估计幂律指数 β Q_min = np.min(order_sizes) n = len(order_sizes) try: beta_hat = 1 + n / np.sum(np.log(order_sizes / Q_min)) except ZeroDivisionError: logger.error("ZeroDivisionError encountered in MLE calculation.") return # 使用最小二乘法拟合幂律分布 log_order_sizes = np.log(order_sizes) hist, bin_edges = np.histogram(log_order_sizes, bins=50, density=True) bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 # 过滤掉频率为零的bin non_zero_indices = hist > 0 hist = hist[non_zero_indices] bin_centers = bin_centers[non_zero_indices] if len(hist) == 0 or len(bin_centers) == 0: logger.warning("No non-zero bins available for linear regression.") return try: slope, intercept, r_value, p_value, std_err = stats.linregress(bin_centers, np.log(hist)) beta_lsm = -slope except ValueError as e: logger.error(f"ValueError encountered in linear regression: {e}") return # 计算kappa值 gamma = 0.1 # 库存风险厌恶参数 sigma = 0.02 # 市场波动率 T_minus_t = 1 # 交易会话的剩余时间 # 结合不同方法估算的β值,计算kappa kappa_mle = (2 / (gamma * sigma ** 2 * T_minus_t)) * np.log(1 + (gamma / beta_hat)) kappa_lsm = (2 / (gamma * sigma ** 2 * T_minus_t)) * np.log(1 + (gamma / beta_lsm)) # 输出结果 logger.info(f"Estimated beta_hat (MLE): {beta_hat}, kappa (MLE): {kappa_mle}") logger.info(f"Estimated beta_lsm (LSM): {beta_lsm}, kappa (LSM): {kappa_lsm}")