|
|
@@ -1,85 +1,150 @@
|
|
|
import json
|
|
|
|
|
|
+import numpy as np
|
|
|
import pandas as pd
|
|
|
import time
|
|
|
import queue
|
|
|
import threading
|
|
|
from datetime import datetime
|
|
|
from logger_config import logger
|
|
|
+from collections import deque
|
|
|
+from sklearn.linear_model import LogisticRegression
|
|
|
|
|
|
-# Initialize the DataFrame
|
|
|
-df_trades = pd.DataFrame(columns=['price', 'qty', 'timestamp'])
|
|
|
-df_order_book = pd.DataFrame(columns=['bid_price', 'bid_qty', 'ask_price', 'ask_qty'])
|
|
|
+# 假设我们有一个数据流,订单簿和成交数据
|
|
|
+order_book_snapshots = deque(maxlen=10000) # 存储过去100ms的订单簿快照
|
|
|
+trade_data = deque(maxlen=10000) # 存储过去100ms的成交数据
|
|
|
|
|
|
-previous_order_book = None
|
|
|
-fill_probabilities = {}
|
|
|
-order_disappearances = {}
|
|
|
-order_executions = {}
|
|
|
-spoofing_probabilities = {}
|
|
|
-last_trade = {'price': None, 'qty': None, 'side': None}
|
|
|
+# 数据积累的阈值
|
|
|
+DATA_THRESHOLD = 100
|
|
|
+model = LogisticRegression() # 初始化模型
|
|
|
messages = queue.Queue() # 创建一个线程安全队列
|
|
|
|
|
|
stop_event = threading.Event()
|
|
|
|
|
|
def on_message_trade(_ws, message):
|
|
|
- global df_trades, order_executions, last_trade
|
|
|
+ global trade_data
|
|
|
json_message = json.loads(message)
|
|
|
trade = {
|
|
|
'price': float(json_message['data']['p']),
|
|
|
'qty': float(json_message['data']['q']),
|
|
|
'timestamp': pd.to_datetime(json_message['data']['T'], unit='ms'),
|
|
|
- 'side': 'sell' if json_message['data']['m'] else 'buy' # 'm' indicates是否买方是做市商
|
|
|
+ 'side': 'sell' if json_message['data']['m'] else 'buy'
|
|
|
}
|
|
|
- trade_df = pd.DataFrame([trade])
|
|
|
- if not trade_df.empty and not trade_df.isna().all().all():
|
|
|
- df_trades = pd.concat([df_trades, trade_df], ignore_index=True)
|
|
|
-
|
|
|
- # 记录每个价格的实际成交总量
|
|
|
- price = trade['price']
|
|
|
- last_trade = {'price': price, 'qty': trade['qty'], 'side': trade['side']}
|
|
|
- if price not in order_executions:
|
|
|
- order_executions[price] = 0
|
|
|
- order_executions[price] += trade['qty']
|
|
|
- show_message()
|
|
|
+ trade_data.append(trade)
|
|
|
+ predict_market_direction()
|
|
|
+
|
|
|
|
|
|
def on_message_depth(_ws, message):
|
|
|
- global df_order_book, order_disappearances, previous_order_book, spoofing_probabilities
|
|
|
+ global order_book_snapshots
|
|
|
json_message = json.loads(message)
|
|
|
bids = json_message['data']['b'][:10] # Top 10 bids
|
|
|
asks = json_message['data']['a'][:10] # Top 10 asks
|
|
|
- order_book = {
|
|
|
- 'bid_price': [float(bid[0]) for bid in bids],
|
|
|
- 'bid_qty': [float(bid[1]) for bid in bids],
|
|
|
- 'ask_price': [float(ask[0]) for ask in asks],
|
|
|
- 'ask_qty': [float(ask[1]) for ask in asks]
|
|
|
+ timestamp = pd.to_datetime(json_message['data']['E'], unit='ms')
|
|
|
+ order_book_snapshots.append({
|
|
|
+ 'bids': bids,
|
|
|
+ 'asks': asks,
|
|
|
+ 'timestamp': timestamp
|
|
|
+ })
|
|
|
+ predict_market_direction()
|
|
|
+
|
|
|
+
|
|
|
+def extract_features(order_book, trade):
|
|
|
+ # 计算买卖盘差距(spread)
|
|
|
+ best_bid = float(order_book['bids'][0][0])
|
|
|
+ best_ask = float(order_book['asks'][0][0])
|
|
|
+ spread = best_ask - best_bid
|
|
|
+
|
|
|
+ # 计算买卖盘深度
|
|
|
+ bid_depth = sum(float(bid[1]) for bid in order_book['bids'])
|
|
|
+ ask_depth = sum(float(ask[1]) for ask in order_book['asks'])
|
|
|
+
|
|
|
+ # 计算成交量和方向
|
|
|
+ trade_volume = trade['qty']
|
|
|
+ trade_side = 1 if trade['side'] == 'buy' else -1
|
|
|
+
|
|
|
+ features = {
|
|
|
+ 'spread': spread,
|
|
|
+ 'bid_depth': bid_depth,
|
|
|
+ 'ask_depth': ask_depth,
|
|
|
+ 'trade_volume': trade_volume,
|
|
|
+ 'trade_side': trade_side
|
|
|
}
|
|
|
- df_order_book = pd.DataFrame([order_book])
|
|
|
-
|
|
|
- show_message()
|
|
|
-
|
|
|
-
|
|
|
-# 计算成交概率
|
|
|
-def show_message():
|
|
|
- global df_order_book, last_trade
|
|
|
-
|
|
|
- if not df_order_book.empty and last_trade['price'] is not None:
|
|
|
- last_price = last_trade['price']
|
|
|
- asks = [[price, qty] for price, qty in
|
|
|
- zip(df_order_book['ask_price'].iloc[0], df_order_book['ask_qty'].iloc[0])]
|
|
|
- bids = [[price, qty] for price, qty in
|
|
|
- zip(df_order_book['bid_price'].iloc[0], df_order_book['bid_qty'].iloc[0])]
|
|
|
-
|
|
|
- asks_sorted = sorted(asks, key=lambda x: x[0])
|
|
|
- bids_sorted = sorted(bids, key=lambda x: x[0], reverse=True)
|
|
|
-
|
|
|
- last_qty = last_trade['qty']
|
|
|
- side = last_trade['side']
|
|
|
- data = {
|
|
|
- "asks": asks_sorted,
|
|
|
- "bids": bids_sorted,
|
|
|
- "last_price": last_price,
|
|
|
- "last_qty": last_qty,
|
|
|
- "side": side,
|
|
|
- "time": int(time.time() * 1000)
|
|
|
- }
|
|
|
- messages.put(data)
|
|
|
+
|
|
|
+ return features
|
|
|
+
|
|
|
+
|
|
|
+def prepare_training_data():
|
|
|
+ # 提取特征和标签
|
|
|
+ X = []
|
|
|
+ y = []
|
|
|
+
|
|
|
+ for i in range(len(order_book_snapshots) - 1):
|
|
|
+ current_order_book = order_book_snapshots[i]
|
|
|
+ current_trade = trade_data[i]
|
|
|
+ future_order_book = order_book_snapshots[i + 1]
|
|
|
+
|
|
|
+ # 提取当前的特征
|
|
|
+ features = extract_features(current_order_book, current_trade)
|
|
|
+ X.append(list(features.values()))
|
|
|
+
|
|
|
+ # 生成标签
|
|
|
+ current_price = float(current_order_book['bids'][0][0])
|
|
|
+ future_price = float(future_order_book['bids'][0][0])
|
|
|
+ label = generate_label(current_price, future_price)
|
|
|
+ y.append(label)
|
|
|
+
|
|
|
+ # 将特征和标签转换为NumPy数组
|
|
|
+ X_train = np.array(X)
|
|
|
+ y_train = np.array(y)
|
|
|
+
|
|
|
+ return X_train, y_train
|
|
|
+
|
|
|
+
|
|
|
+def generate_label(current_price, future_price):
|
|
|
+ return 1 if future_price > current_price else 0
|
|
|
+
|
|
|
+
|
|
|
+def check_and_train_model():
|
|
|
+ if len(order_book_snapshots) >= DATA_THRESHOLD and len(trade_data) >= DATA_THRESHOLD:
|
|
|
+ X_train, y_train = prepare_training_data()
|
|
|
+ model.fit(X_train, y_train)
|
|
|
+ logger.info("Model trained with", len(X_train), "samples")
|
|
|
+
|
|
|
+
|
|
|
+def predict_market_direction():
|
|
|
+ if len(order_book_snapshots) == 0 or len(trade_data) == 0:
|
|
|
+ logger.info("Not enough data to make a prediction")
|
|
|
+ return
|
|
|
+
|
|
|
+ features = extract_features(order_book_snapshots[-1], trade_data[-1])
|
|
|
+ if features is not None:
|
|
|
+ feature_vector = np.array([list(features.values())])
|
|
|
+ prediction = model.predict(feature_vector)
|
|
|
+ logger.info("Predicted Market Direction:", "Up" if prediction[0] == 1 else "Down")
|
|
|
+
|
|
|
+
|
|
|
+# # 计算成交概率
|
|
|
+# def show_message():
|
|
|
+# global df_order_book, last_trade
|
|
|
+#
|
|
|
+# if not df_order_book.empty and last_trade['price'] is not None:
|
|
|
+# last_price = last_trade['price']
|
|
|
+# asks = [[price, qty] for price, qty in
|
|
|
+# zip(df_order_book['ask_price'].iloc[0], df_order_book['ask_qty'].iloc[0])]
|
|
|
+# bids = [[price, qty] for price, qty in
|
|
|
+# zip(df_order_book['bid_price'].iloc[0], df_order_book['bid_qty'].iloc[0])]
|
|
|
+#
|
|
|
+# asks_sorted = sorted(asks, key=lambda x: x[0])
|
|
|
+# bids_sorted = sorted(bids, key=lambda x: x[0], reverse=True)
|
|
|
+#
|
|
|
+# last_qty = last_trade['qty']
|
|
|
+# side = last_trade['side']
|
|
|
+# data = {
|
|
|
+# "asks": asks_sorted,
|
|
|
+# "bids": bids_sorted,
|
|
|
+# "last_price": last_price,
|
|
|
+# "last_qty": last_qty,
|
|
|
+# "side": side,
|
|
|
+# "time": int(time.time() * 1000)
|
|
|
+# }
|
|
|
+# messages.put(data)
|