filter_accounts.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. from decimal import Decimal, InvalidOperation
  2. import argparse
  3. import json
  4. import os
  5. try:
  6. import ijson
  7. HAS_IJSON = True
  8. except ImportError:
  9. HAS_IJSON = False
  10. def parse_available(value):
  11. try:
  12. return Decimal(str(value)) if value is not None else Decimal('0')
  13. except (InvalidOperation, ValueError, TypeError):
  14. return Decimal('0')
  15. def strip_positions(accounts):
  16. return [{k: v for k, v in acc.items() if k != 'positions'} if isinstance(acc, dict) else acc for acc in accounts]
  17. def filter_accounts(input_path, min_threshold, max_threshold=None):
  18. min_dec = Decimal(str(min_threshold))
  19. max_dec = Decimal(str(max_threshold)) if max_threshold is not None else None
  20. filtered_accounts = []
  21. def should_keep(total: Decimal) -> bool:
  22. return total > min_dec and (max_dec is None or total <= max_dec)
  23. if HAS_IJSON:
  24. with open(input_path, 'r', encoding='utf-8') as f:
  25. for item in ijson.items(f, 'item'):
  26. resp = item.get('response')
  27. if not resp:
  28. continue
  29. accounts = resp.get('accounts') or []
  30. total = sum(parse_available(acc.get('available_balance')) for acc in accounts)
  31. if should_keep(total):
  32. filtered_accounts.extend(strip_positions(accounts))
  33. else:
  34. with open(input_path, 'r', encoding='utf-8') as f:
  35. data = json.load(f)
  36. for item in data:
  37. resp = item.get('response')
  38. if not resp:
  39. continue
  40. accounts = resp.get('accounts') or []
  41. total = sum(parse_available(acc.get('available_balance')) for acc in accounts)
  42. if should_keep(total):
  43. filtered_accounts.extend(strip_positions(accounts))
  44. return filtered_accounts
  45. def main():
  46. parser = argparse.ArgumentParser(description='Filter accounts by total available_balance range.')
  47. parser.add_argument(
  48. '--input',
  49. default=os.path.join('src', 'lighter', 'accounts_0_100000.json'),
  50. help='Input JSON file path (default: src/lighter/accounts_0_100000.json)'
  51. )
  52. parser.add_argument(
  53. '--output',
  54. default=os.path.join('src', 'lighter', 'filtered_accounts.json'),
  55. help='Output JSON file path (default: src/lighter/filtered_accounts.json)'
  56. )
  57. parser.add_argument(
  58. '--min-threshold',
  59. type=float,
  60. default=50000,
  61. help='Minimum total available_balance to include (default: 50000)'
  62. )
  63. parser.add_argument(
  64. '--max-threshold',
  65. type=float,
  66. default=150000,
  67. help='Maximum total available_balance to include; exclude above this (default: 150000)'
  68. )
  69. args = parser.parse_args()
  70. filtered = filter_accounts(args.input, args.min_threshold, args.max_threshold)
  71. with open(args.output, 'w', encoding='utf-8') as out:
  72. json.dump(filtered, out, ensure_ascii=False, indent=2)
  73. print(f'Filtered {len(filtered)} accounts written to: {args.output}')
  74. if __name__ == '__main__':
  75. main()