Selaa lähdekoodia

feat: 添加地址过滤脚本用于匹配特定模式的地址

新增地址过滤脚本,用于筛选以0x2f5e开头且以6f9f结尾的地址
支持处理账户数据结构并输出匹配结果
skyfffire 6 päivää sitten
vanhempi
commit
41a893fd22
1 muutettua tiedostoa jossa 242 lisäystä ja 0 poistoa
  1. 242 0
      src/lighter/filter_addresses.py

+ 242 - 0
src/lighter/filter_addresses.py

@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+"""
+地址过滤脚本
+专门用于过滤符合特定模式的地址:以 0x2f5e 开头且以 6f9f 结尾
+支持处理账户数据结构
+"""
+
+import argparse
+import json
+import os
+from typing import List, Dict, Any
+
+
+def normalize_address(address: str, to_uppercase: bool = False) -> str:
+    """
+    标准化地址格式
+    
+    Args:
+        address: 原始地址字符串
+        to_uppercase: 是否转换为大写
+    
+    Returns:
+        标准化后的地址
+    """
+    if not isinstance(address, str):
+        return str(address)
+    
+    # 移除空白字符
+    address = address.strip()
+    
+    # 确保以 0x 开头
+    if not address.lower().startswith('0x'):
+        address = '0x' + address
+    
+    # 转换大小写
+    if to_uppercase:
+        return address.upper()
+    else:
+        return address.lower()
+
+
+def matches_pattern(address: str) -> bool:
+    """
+    检查地址是否符合指定模式:以 0x2f5e 开头且以 6f9f 结尾
+    转换为大写进行匹配以避免大小写问题
+    
+    Args:
+        address: 要检查的地址
+    
+    Returns:
+        是否匹配模式
+    """
+    if not isinstance(address, str) or not address.strip():
+        return False
+    
+    # 标准化地址并转换为大写进行匹配
+    normalized = normalize_address(address, to_uppercase=True)
+    
+    # 定义匹配模式(大写)
+    start_pattern = '0X2F5E'
+    end_pattern = '6F9F'
+    
+    # 检查模式匹配
+    return normalized.startswith(start_pattern) and normalized.endswith(end_pattern)
+
+
+def filter_accounts_by_address_pattern(input_path: str) -> List[Dict[str, Any]]:
+    """
+    从账户数据文件中过滤符合地址模式的账户
+    专门处理您提供的数据结构格式
+    
+    Args:
+        input_path: 输入文件路径
+    
+    Returns:
+        包含匹配地址的完整数据项列表
+    """
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"输入文件不存在: {input_path}")
+    
+    filtered_results = []
+    
+    with open(input_path, 'r', encoding='utf-8') as f:
+        try:
+            data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"JSON 解析错误: {e}")
+    
+    # 确保数据是列表格式
+    if not isinstance(data, list):
+        data = [data]
+    
+    for item in data:
+        # 检查数据结构
+        if not isinstance(item, dict):
+            continue
+            
+        response = item.get('response')
+        if not response or not isinstance(response, dict):
+            continue
+            
+        accounts = response.get('accounts', [])
+        if not isinstance(accounts, list):
+            continue
+        
+        # 检查每个账户的地址
+        matching_accounts = []
+        for account in accounts:
+            if not isinstance(account, dict):
+                continue
+                
+            l1_address = account.get('l1_address', '')
+            if matches_pattern(l1_address):
+                matching_accounts.append(account)
+        
+        # 如果有匹配的账户,保存整个数据项
+        if matching_accounts:
+            # 创建新的数据项,只包含匹配的账户
+            filtered_item = {
+                'index': item.get('index'),
+                'response': {
+                    'code': response.get('code'),
+                    'total': len(matching_accounts),
+                    'accounts': matching_accounts
+                },
+                'original_total': response.get('total', 0),
+                'matched_count': len(matching_accounts)
+            }
+            filtered_results.append(filtered_item)
+    
+    return filtered_results
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(
+        description='过滤符合特定模式的地址:以 0x2f5e 开头且以 6f9f 结尾',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例用法:
+  # 从账户数据文件中过滤地址
+  python filter_addresses.py -i accounts_0_100000.json -o filtered.json
+  
+  # 只显示匹配的地址列表
+  python filter_addresses.py -i accounts_0_100000.json --addresses-only
+        """
+    )
+    
+    parser.add_argument(
+        '-i', '--input',
+        required=True,
+        help='输入 JSON 文件路径'
+    )
+    
+    parser.add_argument(
+        '-o', '--output',
+        default='filtered_addresses.json',
+        help='输出文件路径 (默认: filtered_addresses.json)'
+    )
+    
+    parser.add_argument(
+        '--addresses-only',
+        action='store_true',
+        help='只输出匹配的地址列表,不包含完整账户数据'
+    )
+    
+    parser.add_argument(
+        '--pattern-info',
+        action='store_true',
+        help='显示匹配模式信息'
+    )
+    
+    args = parser.parse_args()
+    
+    # 显示模式信息
+    if args.pattern_info:
+        print("匹配模式:")
+        print("  开头: 0x2f5e (转换为大写匹配)")
+        print("  结尾: 6f9f (转换为大写匹配)")
+        print("  示例: 0x2f5e...6f9f")
+        print()
+    
+    try:
+        # 过滤账户数据
+        print(f"正在处理文件: {args.input}")
+        filtered_results = filter_accounts_by_address_pattern(args.input)
+        
+        if not filtered_results:
+            print("未找到符合模式的地址")
+            return
+        
+        print(f"找到 {len(filtered_results)} 个包含匹配地址的数据项")
+        
+        # 统计匹配的地址
+        all_addresses = []
+        total_matched_accounts = 0
+        
+        for result in filtered_results:
+            accounts = result['response']['accounts']
+            total_matched_accounts += len(accounts)
+            for account in accounts:
+                l1_address = account.get('l1_address', '')
+                if l1_address:
+                    all_addresses.append(l1_address)
+        
+        print(f"匹配的账户总数: {total_matched_accounts}")
+        print(f"匹配的地址总数: {len(all_addresses)}")
+        
+        # 准备输出数据
+        if args.addresses_only:
+            # 只输出地址列表
+            unique_addresses = sorted(list(set(all_addresses)))
+            output_data = unique_addresses
+            print(f"唯一地址数量: {len(unique_addresses)}")
+        else:
+            # 输出完整的过滤结果
+            output_data = filtered_results
+        
+        # 保存结果
+        with open(args.output, 'w', encoding='utf-8') as f:
+            json.dump(output_data, f, ensure_ascii=False, indent=2)
+        
+        print(f"结果已保存到: {args.output}")
+        
+        # 显示前几个匹配的地址作为预览
+        print("\n匹配地址预览:")
+        preview_addresses = all_addresses[:5]
+        for i, addr in enumerate(preview_addresses):
+            print(f"  {i+1}. {addr}")
+        
+        if len(all_addresses) > 5:
+            print(f"  ... 还有 {len(all_addresses) - 5} 个地址")
+    
+    except Exception as e:
+        print(f"错误: {e}")
+        return 1
+    
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())