1 year ago · 3abc2a6d45
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 
				 .idea
			
 
				+.ipynb_checkpoints
			
--- a/kappa/data_processing.py
+++ b/kappa/data_processing.py
@@ -1,24 +1,41 @@
 
				 import json
			
 
				+from decimal import Decimal, getcontext
			
 
				+
			
 
				 import pandas as pd
			
 
				 import threading
			
 
				 from collections import deque
			
 
				 from scipy.integrate import trapz
			
 
				 import numpy as np
			
 
				+from scipy.optimize import minimize
			
 
				 from logger_config import logger
			
 
				 
			
 
				+# 设置全局精度
			
 
				+getcontext().prec = 28
			
 
				+
			
 
				 # 假设我们有一个数据流，订单簿和成交数据
			
 
				 order_book_snapshots = deque(maxlen=600)        # 存储过去600个订单簿快照
			
 
				+spread_delta_snapshots = deque(maxlen=6000)     # 存储过去600个价差数据(最小变动价格的倍数)
			
 
				 trade_snapshots = deque(maxlen=6000)            # 存储过去6000个成交数据
			
 
				 
			
 
				 stop_event = threading.Event()
			
 
				 
			
 
				 # 初始参数
			
 
				-k_initial = 0.5
			
 
				-A_initial = 1.0
			
 
				+k_initial = 10
			
 
				+A_initial = 100
			
 
				 
			
 
				 # 假设S0是初始的参考价格
			
 
				 S0 = -1
			
 
				 
			
 
				+def get_tick_size_from_prices(ask_price, bid_price):
			
 
				+    # 获取价格的小数位数
			
 
				+    ask_decimal_places = len(str(ask_price).split('.')[1])
			
 
				+    bid_decimal_places = len(str(bid_price).split('.')[1])
			
 
				+
			
 
				+    # 确定最小变动单元
			
 
				+    tick_size = 10 ** -max(ask_decimal_places, bid_decimal_places)
			
 
				+
			
 
				+    return tick_size
			
 
				+
			
 
				 
			
 
				 def on_message_trade(_ws, message):
			
 
				     global trade_snapshots
			
@@ -34,16 +51,26 @@ def on_message_trade(_ws, message):
 
				 
			
 
				 
			
 
				 def on_message_depth(_ws, message):
			
 
				-    global order_book_snapshots
			
 
				+    global order_book_snapshots, spread_delta_snapshots
			
 
				     json_message = json.loads(message)
			
 
				     bids = [[float(price), float(quantity)] for price, quantity in json_message['data']['b'][:10]]
			
 
				     asks = [[float(price), float(quantity)] for price, quantity in json_message['data']['a'][:10]]
			
 
				     timestamp = pd.to_datetime(json_message['data']['E'], unit='ms')
			
 
				-    order_book_snapshots.append({
			
 
				+    depth = {
			
 
				         'bids': bids,
			
 
				         'asks': asks,
			
 
				         'timestamp': timestamp
			
 
				-    })
			
 
				+    }
			
 
				+    order_book_snapshots.append(depth)
			
 
				+
			
 
				+    # 求价差
			
 
				+    ask_price = Decimal(str(asks[0][0]))
			
 
				+    bid_price = Decimal(str(bids[0][0]))
			
 
				+    tick_size = get_tick_size_from_prices(ask_price, bid_price)
			
 
				+    spread = float(ask_price - bid_price)
			
 
				+    spread_delta = int(spread / tick_size)
			
 
				+    spread_delta_snapshots.append(spread_delta)
			
 
				+
			
 
				     process_depth_data()
			
 
				 
			
 
				 
			
@@ -95,11 +122,31 @@ def estimate_lambda(waiting_times, T):
 
				     return lambda_hat
			
 
				 
			
 
				 
			
 
				+def objective_function(params, delta_max, log_lambda_hat_value, log_integral_phi_value):
			
 
				+    """
			
 
				+    目标函数 r(A, k)
			
 
				+    :param params: 包含 A 和 k 的数组
			
 
				+    :param delta_max: 最大的价格偏移
			
 
				+    :param log_lambda_hat_value: log(λ(δ)) 的值
			
 
				+    :param log_integral_phi_value: log(∫ φ(k, ξ) dξ) 的值
			
 
				+    :return: 目标函数值
			
 
				+    """
			
 
				+    A, k = params
			
 
				+    if A <= 0:
			
 
				+        return 0
			
 
				+
			
 
				+    residuals = []
			
 
				+    for delta in range(1, delta_max + 1):
			
 
				+        residual = (log_lambda_hat_value + k * delta - np.log(A) - log_integral_phi_value) ** 2
			
 
				+        residuals.append(residual)
			
 
				+    return np.sum(residuals)
			
 
				+
			
 
				+
			
 
				 def process_depth_data():
			
 
				-    global order_book_snapshots, trade_snapshots
			
 
				+    global order_book_snapshots, trade_snapshots, spread_delta_snapshots
			
 
				 
			
 
				     # 数据预热，至少10条深度数据以及100条成交数据才能用于计算
			
 
				-    if len(order_book_snapshots) < 10 and len(trade_snapshots) < 100:
			
 
				+    if len(order_book_snapshots) < 10 or len(trade_snapshots) < 100:
			
 
				         return
			
 
				 
			
 
				     global k_initial, A_initial, S0
			
@@ -126,8 +173,23 @@ def process_depth_data():
 
				     # 时间窗口的大小
			
 
				     T = pd.to_datetime(100, unit='ms') - pd.to_datetime(0, unit='ms')
			
 
				 
			
 
				+    # 计算 λ(δ) 的估计值
			
 
				     lambda_hat = estimate_lambda(waiting_times, T)
			
 
				-    # logger.info("λ(δ) 的值: " + str(lambda_hat) + "log(∫ φ(k, ξ) dξ) 的值: " + str(log_integral_phi_value))
			
 
				+
			
 
				+    # 计算 log(λ(δ))
			
 
				     log_lambda_hat_value = np.log(lambda_hat)
			
 
				-    logger.info("log(λ(δ)) 的值: " + str(log_lambda_hat_value) + "log(∫ φ(k, ξ) dξ) 的值: " + str(log_integral_phi_value))
			
 
				 
			
 
				+    # ========================== 校准 A 和 k =============================
			
 
				+    delta_max = np.max(spread_delta_snapshots)
			
 
				+
			
 
				+    # 优化目标函数以找到最优的 A 和 k
			
 
				+    result = minimize(objective_function, np.array([A_initial, k_initial]),
			
 
				+                      args=(delta_max, log_lambda_hat_value, log_integral_phi_value))
			
 
				+
			
 
				+    if result.success:
			
 
				+        A_optimal, k_optimal = result.x
			
 
				+        logger.info(f"Optimal A: {A_optimal}, Optimal k: {k_optimal}")
			
 
				+    else:
			
 
				+        logger.error("Optimization failed")
			
 
				+
			
 
				+    # logger.info("log(λ(δ)) 的值: " + str(log_lambda_hat_value) + " log(∫ φ(k, ξ) dξ) 的值: " + str(log_integral_phi_value))