🧠 RL Models Directory · Ghost Unit AI
Reinforcement Learning for Optimal Trend Detection & Strike Execution
📌 Clarification: The rl-models/ directory contains trained reinforcement learning models for AI-driven trend detection and strike execution. The trained-policy.onnx file is the exported, production-ready neural network policy that runs in the browser via ONNX Runtime Web or on the backend.
📁 RL Models Directory Structure
rl-models/
├── trained-policy.onnx # 🎯 Exported ONNX model for trend prediction
├── trained-policy_metadata.json # 📋 Model metadata (version, accuracy, etc.)
├── training/
│ ├── train_trend_detector.py # 🚀 Main training script
│ ├── environment.py # 🎮 Market simulation environment
│ ├── reward_function.py # 🎁 Custom reward logic
│ └── config.yaml # ⚙️ Hyperparameters
├── models/
│ ├── policy_network.py # 🧠 Neural network architecture
│ ├── dqn_agent.py # 🤖 Deep Q-Network agent
│ └── ppo_agent.py # 📈 Proximal Policy Optimization
├── inference/
│ ├── onnx_runtime.py # 🔮 ONNX inference wrapper (Python)
│ ├── browser_inference.js # 🌐 WebAssembly/ONNX for frontend
│ └── model_loader.py # 📦 Dynamic model loading
├── export/
│ └── export_to_onnx.py # 🔄 PyTorch → ONNX converter
├── trend-detector/ # 📊 RL agent for detecting emerging trends
│ ├── policy.pth # 🔬 PyTorch trained weights
│ ├── config.yaml # ⚙️ Hyperparameters
│ └── feature_extractor.onnx # 🔧 Feature engineering module
├── strike-optimizer/ # ⚡ RL for optimal strike timing
│ ├── dqn_policy.onnx # 🎯 Deep Q-Network for strike decisions
│ ├── reward_function.py # 🎁 Custom reward logic
│ └── environment.py # 🎮 Market simulation env
├── anomaly-scout/ # 🕵️ Unsupervised RL for anomaly detection
│ ├── isolation_forest.onnx # 🌲 Hybrid model
│ └── threshold.pkl # 📊 Dynamic thresholds
├── ensembles/ # 🔄 Model blending
│ └── weighted_ensemble.onnx # ⚖️ Combined predictions
├── checkpoints/ # 💾 Training checkpoints
│ └── checkpoint_ep*.pt # 🔄 Periodic model saves
└── ipfs_storage/ # 📀 Immutable model storage
└── model_versioning.py # 🧬 IPFS model versioning
🎯 What the Trained Policy Does
The trained-policy.onnx file is the exported, production-ready neural network policy that:
- Takes real-time market data as input (31 features)
- Outputs:
{ action: "strike" | "wait" | "pause", confidence: 0.0-1.0 } - Runs in the browser via ONNX Runtime Web or on the backend
- Is immutable and versioned on IPFS for auditability
1️⃣ Core RL Environment
📄 rl-models/training/environment.py
rl-models/training/environment.py
import numpy as np
from gym import Env, spaces
class HealthTrendMarketEnv(Env):
"""
Market simulation environment for RL agent to learn optimal strike timing.
State Space (observation): 31-dimensional continuous space
- price_momentum: 10 recent price changes
- volume_profile: 10 recent volume indicators
- social_sentiment: 5 sentiment scores
- competitor_activity: 3 competitor strike counts
- temporal_features: hour, day_of_week, seasonality
Action Space: 4 discrete actions
- 0: WAIT (do nothing, continue observing)
- 1: STRIKE (execute a health trend strike)
- 2: AGGRESSIVE_STRIKE (double down, higher risk/reward)
- 3: PAUSE (emergency cooldown, negative reward)
"""
def __init__(self, config=None):
super().__init__()
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(31,))
self.action_space = spaces.Discrete(4)
self.balance = 10000
self.strike_history = []
def step(self, action):
reward = self._calculate_reward(action)
observation = self._get_observation()
done = self.balance
2️⃣ PPO Agent
📄 rl-models/models/ppo_agent.py
rl-models/models/ppo_agent.py
import torch
import torch.nn as nn
class PolicyNetwork(nn.Module):
def __init__(self, state_dim=31, action_dim=4):
super().__init__()
self.shared = nn.Sequential(
nn.Linear(state_dim, 256), nn.ReLU(),
nn.Linear(256, 128), nn.ReLU(),
nn.Linear(128, 64), nn.ReLU()
)
self.actor = nn.Sequential(
nn.Linear(64, 32), nn.ReLU(),
nn.Linear(32, action_dim), nn.Softmax(dim=-1)
)
self.critic = nn.Sequential(
nn.Linear(64, 32), nn.ReLU(),
nn.Linear(32, 1)
)
def forward(self, state):
features = self.shared(state)
return self.actor(features), self.critic(features)
class PPOAgent:
def __init__(self):
self.policy = PolicyNetwork()
self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=3e-4)
def get_action(self, state):
with torch.no_grad():
action_probs, _ = self.policy(torch.FloatTensor(state).unsqueeze(0))
return torch.multinomial(action_probs, 1).item()
3️⃣ Training Pipeline
📄 rl-models/training/train_trend_detector.py
rl-models/training/train_trend_detector.py
import yaml
from tqdm import tqdm
class TrainingPipeline:
def __init__(self, config_path='config.yaml'):
with open(config_path, 'r') as f:
self.config = yaml.safe_load(f)
self.env = HealthTrendMarketEnv()
self.agent = PPOAgent()
def train(self, num_episodes=1000):
for episode in tqdm(range(num_episodes)):
state = self.env.reset()
episode_reward = 0
while True:
action = self.agent.get_action(state)
next_state, reward, done, _ = self.env.step(action)
episode_reward += reward
if done:
break
if episode % 100 == 0:
print(f"📊 Episode {episode}: Reward = {episode_reward:.2f}")
return self.agent
def export_to_onnx(self, output_path='../exported/trained-policy.onnx'):
import torch.onnx
self.agent.policy.eval()
dummy_input = torch.randn(1, 31)
torch.onnx.export(self.agent.policy, dummy_input, output_path,
input_names=['market_state'],
output_names=['action_probs', 'state_value'],
opset_version=11)
print(f"✅ Model exported to ONNX: {output_path}")
4️⃣ ONNX Inference · Browser Integration
📄 rl-models/inference/browser_inference.js
rl-models/inference/browser_inference.js
import * as ort from 'onnxruntime-web';
class GhostUnitInference {
constructor() {
this.session = null;
this.actionMap = { 0: 'WAIT', 1: 'STRIKE', 2: 'AGGRESSIVE_STRIKE', 3: 'PAUSE' };
}
async loadModel(modelPath = '/models/trained-policy.onnx') {
this.session = await ort.InferenceSession.create(modelPath, {
executionProviders: ['wasm', 'cpu']
});
console.log('✅ Ghost Unit RL model loaded');
}
async predict(marketFeatures) {
const inputTensor = new ort.Tensor('float32', marketFeatures, [1, 31]);
const results = await this.session.run({ 'market_state': inputTensor });
const actionProbs = results['action_probs'].data;
const actionIndex = actionProbs.indexOf(Math.max(...actionProbs));
return {
action: this.actionMap[actionIndex],
confidence: actionProbs[actionIndex],
timestamp: Date.now()
};
}
}
class RealTimeRLMonitor {
constructor(wsUrl, inferenceEngine) {
this.ws = new WebSocket(wsUrl);
this.inference = inferenceEngine;
this.ws.onmessage = async (event) => {
const decision = await this.inference.predict(JSON.parse(event.data));
if (decision.action === 'STRIKE' && decision.confidence > 0.75) {
await this.executeStrike(decision);
}
};
}
async executeStrike(decision) {
const response = await fetch('/api/v1/strike/execute', {
method: 'POST',
body: JSON.stringify(decision)
});
return response.json();
}
}
export { GhostUnitInference, RealTimeRLMonitor };
5️⃣ Reward Function
📄 rl-models/training/reward_function.py
rl-models/training/reward_function.py
import numpy as np
class RewardFunction:
def calculate(self, action: int, profit: float, market_state: np.ndarray) -> float:
# Financial profit (50% weight)
if profit > 100:
profit_reward = 10.0
elif profit > 0:
profit_reward = profit / 10
else:
profit_reward = max(-20, profit / 5)
# Timing accuracy (20% weight)
momentum = market_state[0] - market_state[5] if len(market_state) > 5 else 0
timing_reward = 2.0 if momentum > 0.05 else (-1.0 if momentum = 10 else 0.5
risk_reward = -2.0 if volatility > 0.1 else (1.0 if volatility
6️⃣ Model Versioning on IPFS
📄 rl-models/ipfs_storage/model_versioning.py
rl-models/ipfs_storage/model_versioning.py
import ipfshttpclient
import hashlib
from datetime import datetime
class ImmutableModelRegistry:
"""Store every trained model version on IPFS"""
def __init__(self, ipfs_gateway='/ip4/127.0.0.1/tcp/5001/http'):
self.client = ipfshttpclient.connect(ipfs_gateway)
def upload_model(self, model_path: str, metadata: dict) -> str:
model_cid = self.client.add(model_path)['Hash']
with open(model_path, 'rb') as f:
sha256 = hashlib.sha256(f.read()).hexdigest()
manifest = {
'model_cid': model_cid,
'version': metadata.get('version', '1.0.0'),
'timestamp': datetime.utcnow().isoformat(),
'sha256': sha256
}
manifest_cid = self.client.add_json(manifest)['Hash']
print(f"✅ Model CID: {model_cid}")
print(f"📋 Manifest CID: {manifest_cid}")
return model_cid
def load_model_version(self, model_cid: str, output_path: str):
self.client.get(model_cid, output_path)
return output_path
📊 Quick Reference: RL Model Actions
| Action Code | Action Name | Description | Risk Level |
|---|---|---|---|
| 0 | WAIT |
Do nothing, continue observing | 🟢 Low |
| 1 | STRIKE |
Execute a health trend strike | 🟡 Medium |
| 2 | AGGRESSIVE_STRIKE |
Double down, higher risk/reward | 🔴 High |
| 3 | PAUSE |
Emergency cooldown | ⚫ Critical |
🧠 Ghost Unit RL Engine · Immutable Model Registry · ONNX Production Ready
No comments:
Post a Comment