Commit ·
e40fd07
0
Parent(s):
first commit
Browse files- .gitignore +42 -0
- Dockerfile +21 -0
- README.md +96 -0
- main.py +140 -0
- openenv.yaml +87 -0
- pyproject.toml +28 -0
- smartgrid_mas/__init__.py +25 -0
- smartgrid_mas/engine/__init__.py +1 -0
- smartgrid_mas/engine/dynamics.py +41 -0
- smartgrid_mas/engine/ldu.py +67 -0
- smartgrid_mas/engine/market.py +102 -0
- smartgrid_mas/engine/policies.py +137 -0
- smartgrid_mas/engine/reward.py +50 -0
- smartgrid_mas/env.py +274 -0
- smartgrid_mas/models.py +98 -0
- smartgrid_mas/tasks.py +56 -0
- smartgrid_mas/train_baseline.py +85 -0
- training/minimal_train.py +5 -0
.gitignore
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
*.so
|
| 7 |
+
|
| 8 |
+
# Virtual environments
|
| 9 |
+
.venv/
|
| 10 |
+
venv/
|
| 11 |
+
env/
|
| 12 |
+
ENV/
|
| 13 |
+
|
| 14 |
+
# Packaging/build outputs
|
| 15 |
+
build/
|
| 16 |
+
dist/
|
| 17 |
+
*.egg-info/
|
| 18 |
+
*.egg
|
| 19 |
+
|
| 20 |
+
# Test and coverage artifacts
|
| 21 |
+
.pytest_cache/
|
| 22 |
+
.coverage
|
| 23 |
+
coverage.xml
|
| 24 |
+
htmlcov/
|
| 25 |
+
|
| 26 |
+
# Notebook checkpoints
|
| 27 |
+
.ipynb_checkpoints/
|
| 28 |
+
|
| 29 |
+
# OS/editor files
|
| 30 |
+
.DS_Store
|
| 31 |
+
Thumbs.db
|
| 32 |
+
.vscode/
|
| 33 |
+
.idea/
|
| 34 |
+
|
| 35 |
+
# Project artifacts
|
| 36 |
+
artifacts/
|
| 37 |
+
openenv_smartgrid_marketsim.egg-info/
|
| 38 |
+
|
| 39 |
+
# Local logs and temp files
|
| 40 |
+
*.log
|
| 41 |
+
*.tmp
|
| 42 |
+
*.swp
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
+
PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
COPY pyproject.toml /app/
|
| 9 |
+
COPY smartgrid_mas /app/smartgrid_mas
|
| 10 |
+
COPY training /app/training
|
| 11 |
+
COPY main.py /app/main.py
|
| 12 |
+
COPY openenv.yaml /app/openenv.yaml
|
| 13 |
+
|
| 14 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 15 |
+
pip install --no-cache-dir .
|
| 16 |
+
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
HEALTHCHECK --interval=30s --timeout=5s --retries=3 CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health', timeout=3)"
|
| 20 |
+
|
| 21 |
+
CMD ["python", "main.py"]
|
README.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OpenEnv Smart Grid MarketSim
|
| 2 |
+
|
| 3 |
+
A new OpenEnv-compatible project for a hybrid hackathon narrative:
|
| 4 |
+
- Theme 1: Multi-agent interactions
|
| 5 |
+
- Theme 2: Long-horizon planning
|
| 6 |
+
- Theme 3.1: Professional world modeling
|
| 7 |
+
|
| 8 |
+
## Core idea
|
| 9 |
+
|
| 10 |
+
The simulator is intentionally multi-layered:
|
| 11 |
+
1. Agents submit strategic market bids.
|
| 12 |
+
2. A market-clearing engine computes tentative allocations and prices.
|
| 13 |
+
3. A Load Dispatching Unit (LDU) enforces physical feasibility.
|
| 14 |
+
4. Grid dynamics evolve with volatility and shock events.
|
| 15 |
+
5. Reward is computed from physically delivered outcomes.
|
| 16 |
+
|
| 17 |
+
This creates tension between strategy and reality, which is the main differentiator.
|
| 18 |
+
|
| 19 |
+
## What is implemented in this first slice
|
| 20 |
+
|
| 21 |
+
- Multi-agent bid object with supply/demand bids.
|
| 22 |
+
- Market clearing with matched quantities and clearing price.
|
| 23 |
+
- Stackelberg-style leader price signal that reshapes bid books before clearing.
|
| 24 |
+
- LDU feasibility corrections:
|
| 25 |
+
- power balance accounting
|
| 26 |
+
- EV storage constraints
|
| 27 |
+
- transmission/storage losses
|
| 28 |
+
- infeasibility correction logs
|
| 29 |
+
- Long-horizon episode flow with shock event support.
|
| 30 |
+
- Personality-aware strategy behavior (greedy, risk-averse, balanced, opportunistic).
|
| 31 |
+
- Per-agent private view metrics in step/event outputs for richer multi-actor analysis.
|
| 32 |
+
- Reward decomposition including infeasibility and blackout penalties.
|
| 33 |
+
- REST API:
|
| 34 |
+
- GET /health
|
| 35 |
+
- POST /reset
|
| 36 |
+
- POST /step
|
| 37 |
+
- GET /state
|
| 38 |
+
- GET /events
|
| 39 |
+
- GET /info
|
| 40 |
+
- POST /run-inference
|
| 41 |
+
- Baseline metric generation script with reward plot output.
|
| 42 |
+
|
| 43 |
+
## Quickstart
|
| 44 |
+
|
| 45 |
+
### Local run
|
| 46 |
+
|
| 47 |
+
```powershell
|
| 48 |
+
pip install -e .
|
| 49 |
+
python main.py
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
Server starts on port 7860.
|
| 53 |
+
|
| 54 |
+
If you also want the OpenEnv framework package locally:
|
| 55 |
+
|
| 56 |
+
```powershell
|
| 57 |
+
pip install -e .[openenv]
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
### Baseline metrics and plot
|
| 61 |
+
|
| 62 |
+
```powershell
|
| 63 |
+
python -m smartgrid_mas.train_baseline --episodes 30 --outdir artifacts
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
Outputs:
|
| 67 |
+
- artifacts/baseline_metrics.csv
|
| 68 |
+
- artifacts/reward_comparison.png
|
| 69 |
+
|
| 70 |
+
Alternative (after editable install):
|
| 71 |
+
|
| 72 |
+
```powershell
|
| 73 |
+
train-baseline --episodes 30 --outdir artifacts
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Inference policy modes
|
| 77 |
+
|
| 78 |
+
The `/run-inference` endpoint supports:
|
| 79 |
+
- `random`
|
| 80 |
+
- `heuristic`
|
| 81 |
+
- `adaptive` (Stackelberg-aware)
|
| 82 |
+
|
| 83 |
+
You can also pass `personality` such as `balanced`, `risk_averse`, or `opportunistic`.
|
| 84 |
+
|
| 85 |
+
### Docker
|
| 86 |
+
|
| 87 |
+
```powershell
|
| 88 |
+
docker build -t openenv-smartgrid-marketsim .
|
| 89 |
+
docker run -p 7860:7860 openenv-smartgrid-marketsim
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
## Next implementation milestones
|
| 93 |
+
|
| 94 |
+
1. Add interactive 3D frontend scene synchronized to /events stream.
|
| 95 |
+
2. Add Unsloth or HF TRL Colab training notebook with real policy updates.
|
| 96 |
+
3. Add full judging artifact checklist in README (HF Space link, mini-blog/video, plots).
|
main.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
from fastapi import FastAPI, HTTPException, Query
|
| 5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
+
from smartgrid_mas.engine.policies import (
|
| 9 |
+
adaptive_stackelberg_action,
|
| 10 |
+
heuristic_joint_action,
|
| 11 |
+
random_joint_action,
|
| 12 |
+
)
|
| 13 |
+
from smartgrid_mas.env import SmartGridMarketEnv
|
| 14 |
+
from smartgrid_mas.models import JointAction, ResetRequest, StepRequest
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
app = FastAPI(
|
| 18 |
+
title="OpenEnv Smart Grid MarketSim",
|
| 19 |
+
description="Multi-agent market simulator with LDU physical feasibility layer.",
|
| 20 |
+
version="0.1.0",
|
| 21 |
+
)
|
| 22 |
+
app.add_middleware(
|
| 23 |
+
CORSMiddleware,
|
| 24 |
+
allow_origins=["*"],
|
| 25 |
+
allow_credentials=True,
|
| 26 |
+
allow_methods=["*"],
|
| 27 |
+
allow_headers=["*"],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
env = SmartGridMarketEnv()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class InferenceRequest(BaseModel):
|
| 34 |
+
policy: str = "heuristic"
|
| 35 |
+
personality: str = "balanced"
|
| 36 |
+
task_id: str = "default"
|
| 37 |
+
seed: Optional[int] = 42
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@app.get("/")
|
| 41 |
+
def root():
|
| 42 |
+
return {
|
| 43 |
+
"name": "OpenEnv Smart Grid MarketSim",
|
| 44 |
+
"status": "ready",
|
| 45 |
+
"docs": "/docs",
|
| 46 |
+
"health": "/health",
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@app.get("/health")
|
| 51 |
+
def health():
|
| 52 |
+
return {"status": "ok", "service": "openenv-smartgrid-marketsim"}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@app.post("/reset")
|
| 56 |
+
def reset(request: ResetRequest):
|
| 57 |
+
try:
|
| 58 |
+
return env.reset(task_id=request.task_id, seed=request.seed)
|
| 59 |
+
except Exception as exc:
|
| 60 |
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@app.post("/step")
|
| 64 |
+
def step(request: StepRequest, session_id: Optional[str] = Query(default=None)):
|
| 65 |
+
try:
|
| 66 |
+
return env.step(action=request.action, session_id=session_id)
|
| 67 |
+
except Exception as exc:
|
| 68 |
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@app.get("/state")
|
| 72 |
+
def state(session_id: Optional[str] = Query(default=None)):
|
| 73 |
+
try:
|
| 74 |
+
return env.state(session_id=session_id)
|
| 75 |
+
except Exception as exc:
|
| 76 |
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@app.get("/events")
|
| 80 |
+
def events(session_id: Optional[str] = Query(default=None)):
|
| 81 |
+
try:
|
| 82 |
+
return env.events(session_id=session_id)
|
| 83 |
+
except Exception as exc:
|
| 84 |
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.get("/info")
|
| 88 |
+
def info():
|
| 89 |
+
return env.get_schema()
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@app.post("/run-inference")
|
| 93 |
+
def run_inference(request: InferenceRequest):
|
| 94 |
+
reset_resp = env.reset(task_id=request.task_id, seed=request.seed)
|
| 95 |
+
sid = reset_resp.session_id
|
| 96 |
+
obs = reset_resp.observation
|
| 97 |
+
|
| 98 |
+
rng = __import__("random").Random(request.seed)
|
| 99 |
+
trajectory = []
|
| 100 |
+
while True:
|
| 101 |
+
if request.policy == "random":
|
| 102 |
+
action = random_joint_action(obs, rng)
|
| 103 |
+
elif request.policy == "adaptive":
|
| 104 |
+
action = adaptive_stackelberg_action(obs, personality=request.personality)
|
| 105 |
+
else:
|
| 106 |
+
action = heuristic_joint_action(obs, personality=request.personality)
|
| 107 |
+
|
| 108 |
+
result = env.step(action=action, session_id=sid)
|
| 109 |
+
trajectory.append(
|
| 110 |
+
{
|
| 111 |
+
"step": len(trajectory) + 1,
|
| 112 |
+
"action": action.model_dump(),
|
| 113 |
+
"reward": result.reward.model_dump(),
|
| 114 |
+
"info": result.info,
|
| 115 |
+
}
|
| 116 |
+
)
|
| 117 |
+
obs = result.observation
|
| 118 |
+
if result.done:
|
| 119 |
+
break
|
| 120 |
+
|
| 121 |
+
avg_reward = sum(t["reward"]["score"] for t in trajectory) / max(1, len(trajectory))
|
| 122 |
+
return {
|
| 123 |
+
"success": True,
|
| 124 |
+
"policy": request.policy,
|
| 125 |
+
"personality": request.personality,
|
| 126 |
+
"steps": len(trajectory),
|
| 127 |
+
"average_reward": round(avg_reward, 4),
|
| 128 |
+
"trajectory": trajectory,
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def main() -> None:
|
| 133 |
+
import uvicorn
|
| 134 |
+
|
| 135 |
+
port = int(os.getenv("PORT", "7860"))
|
| 136 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
if __name__ == "__main__":
|
| 140 |
+
main()
|
openenv.yaml
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: openenv-smartgrid-marketsim
|
| 2 |
+
version: "0.1.0"
|
| 3 |
+
description: >
|
| 4 |
+
Multi-agent smart-grid market simulation where strategic bids are cleared by a market
|
| 5 |
+
and then corrected by a load dispatching unit to enforce physical feasibility.
|
| 6 |
+
|
| 7 |
+
tags:
|
| 8 |
+
- openenv
|
| 9 |
+
- multi-agent
|
| 10 |
+
- market
|
| 11 |
+
- smart-grid
|
| 12 |
+
- long-horizon
|
| 13 |
+
|
| 14 |
+
tasks:
|
| 15 |
+
- id: default
|
| 16 |
+
display_name: Strategic Bidding With Physical Dispatch
|
| 17 |
+
difficulty: medium
|
| 18 |
+
max_steps: 24
|
| 19 |
+
description: >
|
| 20 |
+
Agents bid into a market, then LDU enforces grid feasibility. Reward is based on
|
| 21 |
+
physically delivered outcomes under volatility and shock events.
|
| 22 |
+
success_threshold: 0.62
|
| 23 |
+
|
| 24 |
+
action_space:
|
| 25 |
+
type: object
|
| 26 |
+
fields:
|
| 27 |
+
bids:
|
| 28 |
+
type: array
|
| 29 |
+
description: List of supply and demand bids from multiple agents
|
| 30 |
+
ev_charge_mwh:
|
| 31 |
+
type: float
|
| 32 |
+
min: 0.0
|
| 33 |
+
ev_discharge_mwh:
|
| 34 |
+
type: float
|
| 35 |
+
min: 0.0
|
| 36 |
+
|
| 37 |
+
observation_space:
|
| 38 |
+
type: object
|
| 39 |
+
fields:
|
| 40 |
+
demand_mwh:
|
| 41 |
+
type: float
|
| 42 |
+
renewable_availability_mwh:
|
| 43 |
+
type: float
|
| 44 |
+
peaker_capacity_mwh:
|
| 45 |
+
type: float
|
| 46 |
+
ev_storage_mwh:
|
| 47 |
+
type: float
|
| 48 |
+
last_clearing_price:
|
| 49 |
+
type: float
|
| 50 |
+
shock_active:
|
| 51 |
+
type: bool
|
| 52 |
+
|
| 53 |
+
reward:
|
| 54 |
+
type: object
|
| 55 |
+
fields:
|
| 56 |
+
score:
|
| 57 |
+
type: float
|
| 58 |
+
range: [0.0, 1.0]
|
| 59 |
+
demand_satisfaction_score:
|
| 60 |
+
type: float
|
| 61 |
+
cost_efficiency_score:
|
| 62 |
+
type: float
|
| 63 |
+
renewable_utilization_score:
|
| 64 |
+
type: float
|
| 65 |
+
stability_score:
|
| 66 |
+
type: float
|
| 67 |
+
infeasibility_penalty:
|
| 68 |
+
type: float
|
| 69 |
+
blackout_penalty:
|
| 70 |
+
type: float
|
| 71 |
+
|
| 72 |
+
environment:
|
| 73 |
+
api: REST
|
| 74 |
+
framework: FastAPI
|
| 75 |
+
base_url: http://localhost:7860
|
| 76 |
+
endpoints:
|
| 77 |
+
- GET /health
|
| 78 |
+
- POST /reset
|
| 79 |
+
- POST /step
|
| 80 |
+
- GET /state
|
| 81 |
+
- GET /events
|
| 82 |
+
- GET /info
|
| 83 |
+
- POST /run-inference
|
| 84 |
+
|
| 85 |
+
docker:
|
| 86 |
+
image: openenv-smartgrid-marketsim
|
| 87 |
+
port: 7860
|
pyproject.toml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "openenv-smartgrid-marketsim"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Multi-agent smart-grid market simulator with LDU feasibility layer"
|
| 5 |
+
requires-python = ">=3.10"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"fastapi>=0.115.5,<1",
|
| 8 |
+
"uvicorn>=0.32.1,<1",
|
| 9 |
+
"pydantic>=2.10.3,<3",
|
| 10 |
+
"pyyaml>=6.0.2,<7",
|
| 11 |
+
"matplotlib>=3.9.2,<4",
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
[project.optional-dependencies]
|
| 15 |
+
openenv = [
|
| 16 |
+
"openenv-core>=0.2.0",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[project.scripts]
|
| 20 |
+
server = "main:main"
|
| 21 |
+
train-baseline = "smartgrid_mas.train_baseline:main"
|
| 22 |
+
|
| 23 |
+
[build-system]
|
| 24 |
+
requires = ["setuptools>=68"]
|
| 25 |
+
build-backend = "setuptools.build_meta"
|
| 26 |
+
|
| 27 |
+
[tool.setuptools.packages.find]
|
| 28 |
+
include = ["smartgrid_mas*"]
|
smartgrid_mas/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smartgrid_mas.env import SmartGridMarketEnv
|
| 2 |
+
from smartgrid_mas.models import (
|
| 3 |
+
AgentBid,
|
| 4 |
+
JointAction,
|
| 5 |
+
MarketObservation,
|
| 6 |
+
MarketReward,
|
| 7 |
+
ResetRequest,
|
| 8 |
+
ResetResponse,
|
| 9 |
+
StepRequest,
|
| 10 |
+
StepResponse,
|
| 11 |
+
StateResponse,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
__all__ = [
|
| 15 |
+
"SmartGridMarketEnv",
|
| 16 |
+
"AgentBid",
|
| 17 |
+
"JointAction",
|
| 18 |
+
"MarketObservation",
|
| 19 |
+
"MarketReward",
|
| 20 |
+
"ResetRequest",
|
| 21 |
+
"ResetResponse",
|
| 22 |
+
"StepRequest",
|
| 23 |
+
"StepResponse",
|
| 24 |
+
"StateResponse",
|
| 25 |
+
]
|
smartgrid_mas/engine/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Core engine modules for market clearing, physical dispatch, reward, and dynamics."""
|
smartgrid_mas/engine/dynamics.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Dict, Tuple
|
| 3 |
+
|
| 4 |
+
from smartgrid_mas.tasks import TaskConfig
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def evolve_grid(
|
| 8 |
+
demand_mwh: float,
|
| 9 |
+
renewable_mwh: float,
|
| 10 |
+
base_price_usd_per_mwh: float,
|
| 11 |
+
step: int,
|
| 12 |
+
task: TaskConfig,
|
| 13 |
+
rng: random.Random,
|
| 14 |
+
) -> Tuple[float, float, float, Dict]:
|
| 15 |
+
shock_active = step == task.shock_step
|
| 16 |
+
|
| 17 |
+
demand_noise = rng.gauss(0.0, task.demand_volatility)
|
| 18 |
+
renewable_noise = rng.gauss(0.0, task.renewable_volatility)
|
| 19 |
+
|
| 20 |
+
next_demand = demand_mwh + task.demand_trend_mwh + demand_noise
|
| 21 |
+
next_renewable = renewable_mwh + task.renewable_trend_mwh + renewable_noise
|
| 22 |
+
|
| 23 |
+
if shock_active:
|
| 24 |
+
next_renewable = max(0.0, next_renewable - task.shock_renewable_drop)
|
| 25 |
+
|
| 26 |
+
next_demand = max(20.0, next_demand)
|
| 27 |
+
next_renewable = max(0.0, next_renewable)
|
| 28 |
+
|
| 29 |
+
implied_price = base_price_usd_per_mwh * (1.0 + max(0.0, (next_demand - next_renewable) / 300.0))
|
| 30 |
+
next_price = max(5.0, implied_price)
|
| 31 |
+
|
| 32 |
+
return (
|
| 33 |
+
round(next_demand, 3),
|
| 34 |
+
round(next_renewable, 3),
|
| 35 |
+
round(next_price, 3),
|
| 36 |
+
{
|
| 37 |
+
"shock_active": shock_active,
|
| 38 |
+
"demand_noise": round(demand_noise, 3),
|
| 39 |
+
"renewable_noise": round(renewable_noise, 3),
|
| 40 |
+
},
|
| 41 |
+
)
|
smartgrid_mas/engine/ldu.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Tuple
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def enforce_dispatch(
|
| 5 |
+
market_result: Dict,
|
| 6 |
+
demand_mwh: float,
|
| 7 |
+
renewable_available_mwh: float,
|
| 8 |
+
peaker_capacity_mwh: float,
|
| 9 |
+
ev_storage_mwh: float,
|
| 10 |
+
ev_storage_capacity_mwh: float,
|
| 11 |
+
ev_charge_mwh: float,
|
| 12 |
+
ev_discharge_mwh: float,
|
| 13 |
+
) -> Tuple[Dict, float]:
|
| 14 |
+
corrections = []
|
| 15 |
+
|
| 16 |
+
if ev_charge_mwh > 0 and ev_discharge_mwh > 0:
|
| 17 |
+
ev_discharge_mwh = 0.0
|
| 18 |
+
corrections.append("Simultaneous EV charge and discharge corrected by LDU")
|
| 19 |
+
|
| 20 |
+
max_charge = max(0.0, ev_storage_capacity_mwh - ev_storage_mwh)
|
| 21 |
+
max_discharge = max(0.0, ev_storage_mwh)
|
| 22 |
+
|
| 23 |
+
if ev_charge_mwh > max_charge:
|
| 24 |
+
corrections.append("EV charge exceeded storage headroom")
|
| 25 |
+
ev_charge_mwh = max_charge
|
| 26 |
+
|
| 27 |
+
if ev_discharge_mwh > max_discharge:
|
| 28 |
+
corrections.append("EV discharge exceeded storage state-of-charge")
|
| 29 |
+
ev_discharge_mwh = max_discharge
|
| 30 |
+
|
| 31 |
+
dispatch_from_market = market_result.get("cleared_mwh", 0.0)
|
| 32 |
+
|
| 33 |
+
renewable_dispatch = min(renewable_available_mwh, dispatch_from_market)
|
| 34 |
+
residual = max(0.0, dispatch_from_market - renewable_dispatch)
|
| 35 |
+
peaker_dispatch = min(peaker_capacity_mwh, residual)
|
| 36 |
+
|
| 37 |
+
if residual > peaker_capacity_mwh:
|
| 38 |
+
corrections.append("Market-cleared supply exceeded physical generation capacity")
|
| 39 |
+
|
| 40 |
+
gross_supply = renewable_dispatch + peaker_dispatch + ev_discharge_mwh
|
| 41 |
+
|
| 42 |
+
transmission_loss = 0.03 * gross_supply
|
| 43 |
+
storage_loss = 0.08 * ev_charge_mwh
|
| 44 |
+
|
| 45 |
+
delivered_supply = max(0.0, gross_supply - transmission_loss)
|
| 46 |
+
unmet_demand = max(0.0, demand_mwh - delivered_supply)
|
| 47 |
+
oversupply = max(0.0, delivered_supply - demand_mwh)
|
| 48 |
+
|
| 49 |
+
next_ev_storage = ev_storage_mwh + ev_charge_mwh - ev_discharge_mwh - storage_loss
|
| 50 |
+
next_ev_storage = max(0.0, min(ev_storage_capacity_mwh, next_ev_storage))
|
| 51 |
+
|
| 52 |
+
dispatch = {
|
| 53 |
+
"renewable_dispatch_mwh": round(renewable_dispatch, 3),
|
| 54 |
+
"peaker_dispatch_mwh": round(peaker_dispatch, 3),
|
| 55 |
+
"ev_charge_mwh": round(ev_charge_mwh, 3),
|
| 56 |
+
"ev_discharge_mwh": round(ev_discharge_mwh, 3),
|
| 57 |
+
"transmission_loss_mwh": round(transmission_loss, 3),
|
| 58 |
+
"storage_loss_mwh": round(storage_loss, 3),
|
| 59 |
+
"delivered_supply_mwh": round(delivered_supply, 3),
|
| 60 |
+
"unmet_demand_mwh": round(unmet_demand, 3),
|
| 61 |
+
"oversupply_mwh": round(oversupply, 3),
|
| 62 |
+
"next_ev_storage_mwh": round(next_ev_storage, 3),
|
| 63 |
+
"corrections": corrections,
|
| 64 |
+
"correction_count": len(corrections),
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
return dispatch, next_ev_storage
|
smartgrid_mas/engine/market.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List
|
| 2 |
+
|
| 3 |
+
from smartgrid_mas.models import AgentBid
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _apply_leader_signal(bids: List[AgentBid], leader_price_signal: float) -> Dict:
|
| 7 |
+
adjusted = []
|
| 8 |
+
adjusted_count = 0
|
| 9 |
+
|
| 10 |
+
for bid in bids:
|
| 11 |
+
price = float(bid.price_usd_per_mwh)
|
| 12 |
+
if bid.bid_type == "supply":
|
| 13 |
+
floor = 0.8 * leader_price_signal
|
| 14 |
+
if bid.role == "peaker_plant":
|
| 15 |
+
floor = 0.95 * leader_price_signal
|
| 16 |
+
new_price = max(price, floor)
|
| 17 |
+
else:
|
| 18 |
+
cap = 1.8 * leader_price_signal
|
| 19 |
+
floor = 0.9 * leader_price_signal
|
| 20 |
+
new_price = min(max(price, floor), cap)
|
| 21 |
+
|
| 22 |
+
if abs(new_price - price) > 1e-9:
|
| 23 |
+
adjusted_count += 1
|
| 24 |
+
|
| 25 |
+
adjusted.append(
|
| 26 |
+
{
|
| 27 |
+
"agent_id": bid.agent_id,
|
| 28 |
+
"role": bid.role,
|
| 29 |
+
"bid_type": bid.bid_type,
|
| 30 |
+
"quantity_mwh": float(bid.quantity_mwh),
|
| 31 |
+
"price_usd_per_mwh": float(new_price),
|
| 32 |
+
"raw_price_usd_per_mwh": float(price),
|
| 33 |
+
}
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
return {
|
| 37 |
+
"bids": adjusted,
|
| 38 |
+
"adjusted_count": adjusted_count,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def clear_market(bids: List[AgentBid], leader_price_signal: float) -> Dict:
|
| 43 |
+
leader_adjusted = _apply_leader_signal(bids, leader_price_signal)
|
| 44 |
+
priced_bids = leader_adjusted["bids"]
|
| 45 |
+
|
| 46 |
+
supply = [
|
| 47 |
+
b for b in priced_bids if b["bid_type"] == "supply" and b["quantity_mwh"] > 0
|
| 48 |
+
]
|
| 49 |
+
demand = [
|
| 50 |
+
b for b in priced_bids if b["bid_type"] == "demand" and b["quantity_mwh"] > 0
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
supply_sorted = sorted(supply, key=lambda x: x["price_usd_per_mwh"])
|
| 54 |
+
demand_sorted = sorted(demand, key=lambda x: x["price_usd_per_mwh"], reverse=True)
|
| 55 |
+
|
| 56 |
+
i = 0
|
| 57 |
+
j = 0
|
| 58 |
+
cleared_qty = 0.0
|
| 59 |
+
matched = []
|
| 60 |
+
clearing_price = 0.0
|
| 61 |
+
|
| 62 |
+
while i < len(supply_sorted) and j < len(demand_sorted):
|
| 63 |
+
s = supply_sorted[i]
|
| 64 |
+
d = demand_sorted[j]
|
| 65 |
+
if s["price_usd_per_mwh"] > d["price_usd_per_mwh"]:
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
qty = min(s["quantity_mwh"], d["quantity_mwh"])
|
| 69 |
+
if qty <= 0:
|
| 70 |
+
break
|
| 71 |
+
|
| 72 |
+
cleared_qty += qty
|
| 73 |
+
clearing_price = (s["price_usd_per_mwh"] + d["price_usd_per_mwh"]) / 2.0
|
| 74 |
+
matched.append(
|
| 75 |
+
{
|
| 76 |
+
"supply_agent": s["agent_id"],
|
| 77 |
+
"demand_agent": d["agent_id"],
|
| 78 |
+
"quantity_mwh": round(qty, 3),
|
| 79 |
+
"price_usd_per_mwh": round(clearing_price, 3),
|
| 80 |
+
}
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
s["quantity_mwh"] -= qty
|
| 84 |
+
d["quantity_mwh"] -= qty
|
| 85 |
+
if s["quantity_mwh"] <= 1e-6:
|
| 86 |
+
i += 1
|
| 87 |
+
if d["quantity_mwh"] <= 1e-6:
|
| 88 |
+
j += 1
|
| 89 |
+
|
| 90 |
+
total_supply_offered = sum(float(b.quantity_mwh) for b in bids if b.bid_type == "supply")
|
| 91 |
+
total_demand_bid = sum(float(b.quantity_mwh) for b in bids if b.bid_type == "demand")
|
| 92 |
+
|
| 93 |
+
return {
|
| 94 |
+
"cleared_mwh": round(cleared_qty, 3),
|
| 95 |
+
"clearing_price": round(clearing_price, 3),
|
| 96 |
+
"total_supply_offered": round(total_supply_offered, 3),
|
| 97 |
+
"total_demand_bid": round(total_demand_bid, 3),
|
| 98 |
+
"leader_price_signal": round(leader_price_signal, 3),
|
| 99 |
+
"leader_adjusted_bids": leader_adjusted["adjusted_count"],
|
| 100 |
+
"post_signal_book": priced_bids,
|
| 101 |
+
"matches": matched,
|
| 102 |
+
}
|
smartgrid_mas/engine/policies.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
+
from smartgrid_mas.models import AgentBid, JointAction, MarketObservation
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def random_joint_action(obs: MarketObservation, rng: random.Random) -> JointAction:
|
| 7 |
+
bids = [
|
| 8 |
+
AgentBid(
|
| 9 |
+
agent_id="renewable_1",
|
| 10 |
+
role="renewable_prosumer",
|
| 11 |
+
bid_type="supply",
|
| 12 |
+
quantity_mwh=max(0.0, rng.uniform(10.0, obs.renewable_availability_mwh)),
|
| 13 |
+
price_usd_per_mwh=rng.uniform(10.0, 40.0),
|
| 14 |
+
),
|
| 15 |
+
AgentBid(
|
| 16 |
+
agent_id="peaker_1",
|
| 17 |
+
role="peaker_plant",
|
| 18 |
+
bid_type="supply",
|
| 19 |
+
quantity_mwh=rng.uniform(5.0, obs.peaker_capacity_mwh),
|
| 20 |
+
price_usd_per_mwh=rng.uniform(35.0, 80.0),
|
| 21 |
+
),
|
| 22 |
+
AgentBid(
|
| 23 |
+
agent_id="industrial_1",
|
| 24 |
+
role="industrial_load",
|
| 25 |
+
bid_type="demand",
|
| 26 |
+
quantity_mwh=rng.uniform(0.6 * obs.demand_mwh, 1.1 * obs.demand_mwh),
|
| 27 |
+
price_usd_per_mwh=rng.uniform(30.0, 95.0),
|
| 28 |
+
),
|
| 29 |
+
]
|
| 30 |
+
return JointAction(
|
| 31 |
+
bids=bids,
|
| 32 |
+
ev_charge_mwh=rng.uniform(0.0, 8.0),
|
| 33 |
+
ev_discharge_mwh=rng.uniform(0.0, 8.0),
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def heuristic_joint_action(obs: MarketObservation, personality: str = "balanced") -> JointAction:
|
| 38 |
+
demand = obs.demand_mwh
|
| 39 |
+
renewable_offer = min(obs.renewable_availability_mwh, demand * 0.55)
|
| 40 |
+
peaker_offer = min(obs.peaker_capacity_mwh, max(0.0, demand - renewable_offer))
|
| 41 |
+
|
| 42 |
+
if personality == "greedy":
|
| 43 |
+
industrial_price = 95.0
|
| 44 |
+
peaker_price = max(62.0, obs.leader_price_signal * 1.08)
|
| 45 |
+
charge = 1.0
|
| 46 |
+
discharge = 5.0 if obs.scarcity_index > 0.2 else 2.0
|
| 47 |
+
elif personality == "risk_averse":
|
| 48 |
+
industrial_price = 90.0
|
| 49 |
+
peaker_price = max(54.0, obs.leader_price_signal * 0.98)
|
| 50 |
+
charge = 5.0 if obs.renewable_availability_mwh > demand else 1.0
|
| 51 |
+
discharge = 2.0 if obs.scarcity_index > 0.35 else 0.0
|
| 52 |
+
else:
|
| 53 |
+
industrial_price = 85.0
|
| 54 |
+
peaker_price = max(58.0, obs.leader_price_signal * 1.02)
|
| 55 |
+
charge = 3.0 if obs.renewable_availability_mwh > demand else 0.0
|
| 56 |
+
discharge = 4.0 if obs.renewable_availability_mwh < 0.8 * demand else 0.0
|
| 57 |
+
|
| 58 |
+
bids = [
|
| 59 |
+
AgentBid(
|
| 60 |
+
agent_id="renewable_1",
|
| 61 |
+
role="renewable_prosumer",
|
| 62 |
+
bid_type="supply",
|
| 63 |
+
quantity_mwh=max(0.0, renewable_offer),
|
| 64 |
+
price_usd_per_mwh=20.0,
|
| 65 |
+
),
|
| 66 |
+
AgentBid(
|
| 67 |
+
agent_id="peaker_1",
|
| 68 |
+
role="peaker_plant",
|
| 69 |
+
bid_type="supply",
|
| 70 |
+
quantity_mwh=max(0.0, peaker_offer),
|
| 71 |
+
price_usd_per_mwh=peaker_price,
|
| 72 |
+
),
|
| 73 |
+
AgentBid(
|
| 74 |
+
agent_id="industrial_1",
|
| 75 |
+
role="industrial_load",
|
| 76 |
+
bid_type="demand",
|
| 77 |
+
quantity_mwh=demand,
|
| 78 |
+
price_usd_per_mwh=industrial_price,
|
| 79 |
+
),
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
return JointAction(bids=bids, ev_charge_mwh=charge, ev_discharge_mwh=discharge)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def adaptive_stackelberg_action(obs: MarketObservation, personality: str = "balanced") -> JointAction:
|
| 86 |
+
demand = obs.demand_mwh
|
| 87 |
+
scarcity = max(0.0, obs.scarcity_index)
|
| 88 |
+
leader = max(1.0, obs.leader_price_signal)
|
| 89 |
+
|
| 90 |
+
renewable_offer = min(obs.renewable_availability_mwh, demand * (0.52 + 0.18 * (1.0 - scarcity)))
|
| 91 |
+
peaker_offer = min(obs.peaker_capacity_mwh, max(0.0, demand - renewable_offer) * (1.0 + 0.25 * scarcity))
|
| 92 |
+
|
| 93 |
+
if personality == "opportunistic":
|
| 94 |
+
peaker_markup = 1.16
|
| 95 |
+
load_budget = 1.6
|
| 96 |
+
charge_bias = 0.5
|
| 97 |
+
elif personality == "risk_averse":
|
| 98 |
+
peaker_markup = 1.03
|
| 99 |
+
load_budget = 1.35
|
| 100 |
+
charge_bias = 1.25
|
| 101 |
+
else:
|
| 102 |
+
peaker_markup = 1.1
|
| 103 |
+
load_budget = 1.45
|
| 104 |
+
charge_bias = 1.0
|
| 105 |
+
|
| 106 |
+
bids = [
|
| 107 |
+
AgentBid(
|
| 108 |
+
agent_id="renewable_1",
|
| 109 |
+
role="renewable_prosumer",
|
| 110 |
+
bid_type="supply",
|
| 111 |
+
quantity_mwh=max(0.0, renewable_offer),
|
| 112 |
+
price_usd_per_mwh=max(15.0, leader * 0.82),
|
| 113 |
+
),
|
| 114 |
+
AgentBid(
|
| 115 |
+
agent_id="peaker_1",
|
| 116 |
+
role="peaker_plant",
|
| 117 |
+
bid_type="supply",
|
| 118 |
+
quantity_mwh=max(0.0, peaker_offer),
|
| 119 |
+
price_usd_per_mwh=max(42.0, leader * peaker_markup),
|
| 120 |
+
),
|
| 121 |
+
AgentBid(
|
| 122 |
+
agent_id="industrial_1",
|
| 123 |
+
role="industrial_load",
|
| 124 |
+
bid_type="demand",
|
| 125 |
+
quantity_mwh=demand,
|
| 126 |
+
price_usd_per_mwh=leader * load_budget,
|
| 127 |
+
),
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
if scarcity > 0.25:
|
| 131 |
+
discharge = min(obs.ev_storage_mwh, 3.0 + 8.0 * scarcity)
|
| 132 |
+
charge = 0.0
|
| 133 |
+
else:
|
| 134 |
+
charge = min(obs.ev_storage_capacity_mwh - obs.ev_storage_mwh, 2.0 * charge_bias)
|
| 135 |
+
discharge = 0.0
|
| 136 |
+
|
| 137 |
+
return JointAction(bids=bids, ev_charge_mwh=max(0.0, charge), ev_discharge_mwh=max(0.0, discharge))
|
smartgrid_mas/engine/reward.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smartgrid_mas.models import MarketReward
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def compute_reward(dispatch: dict, clearing_price: float, demand_mwh: float, prior_gap: float) -> MarketReward:
|
| 5 |
+
delivered = dispatch["delivered_supply_mwh"]
|
| 6 |
+
unmet = dispatch["unmet_demand_mwh"]
|
| 7 |
+
oversupply = dispatch["oversupply_mwh"]
|
| 8 |
+
correction_count = dispatch["correction_count"]
|
| 9 |
+
|
| 10 |
+
demand_satisfaction = min(1.0, delivered / max(demand_mwh, 1e-6))
|
| 11 |
+
|
| 12 |
+
unit_cost = clearing_price if clearing_price > 0 else 45.0
|
| 13 |
+
total_cost = delivered * unit_cost
|
| 14 |
+
cost_efficiency = max(0.0, 1.0 - total_cost / 12000.0)
|
| 15 |
+
|
| 16 |
+
renewable_utilization = min(1.0, dispatch["renewable_dispatch_mwh"] / max(delivered, 1e-6))
|
| 17 |
+
|
| 18 |
+
current_gap = delivered - demand_mwh
|
| 19 |
+
stability_delta = abs(current_gap - prior_gap)
|
| 20 |
+
stability_score = max(0.0, 1.0 - stability_delta / 80.0)
|
| 21 |
+
|
| 22 |
+
infeasibility_penalty = min(1.0, correction_count * 0.15 + dispatch["storage_loss_mwh"] * 0.01)
|
| 23 |
+
blackout_penalty = min(1.0, unmet / max(demand_mwh, 1e-6))
|
| 24 |
+
|
| 25 |
+
raw = (
|
| 26 |
+
0.34 * demand_satisfaction
|
| 27 |
+
+ 0.23 * cost_efficiency
|
| 28 |
+
+ 0.18 * renewable_utilization
|
| 29 |
+
+ 0.15 * stability_score
|
| 30 |
+
- 0.2 * infeasibility_penalty
|
| 31 |
+
- 0.2 * blackout_penalty
|
| 32 |
+
- 0.03 * min(1.0, oversupply / max(demand_mwh, 1e-6))
|
| 33 |
+
)
|
| 34 |
+
score = max(0.0, min(1.0, raw))
|
| 35 |
+
|
| 36 |
+
reason = (
|
| 37 |
+
f"delivered={delivered:.1f} demand={demand_mwh:.1f} unmet={unmet:.1f} "
|
| 38 |
+
f"price={unit_cost:.1f} corrections={correction_count}"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
return MarketReward(
|
| 42 |
+
score=score,
|
| 43 |
+
reason=reason,
|
| 44 |
+
demand_satisfaction_score=demand_satisfaction,
|
| 45 |
+
cost_efficiency_score=cost_efficiency,
|
| 46 |
+
renewable_utilization_score=renewable_utilization,
|
| 47 |
+
stability_score=stability_score,
|
| 48 |
+
infeasibility_penalty=infeasibility_penalty,
|
| 49 |
+
blackout_penalty=blackout_penalty,
|
| 50 |
+
)
|
smartgrid_mas/env.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import uuid
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Dict, Optional
|
| 5 |
+
|
| 6 |
+
from smartgrid_mas.engine.dynamics import evolve_grid
|
| 7 |
+
from smartgrid_mas.engine.ldu import enforce_dispatch
|
| 8 |
+
from smartgrid_mas.engine.market import clear_market
|
| 9 |
+
from smartgrid_mas.engine.reward import compute_reward
|
| 10 |
+
from smartgrid_mas.models import (
|
| 11 |
+
JointAction,
|
| 12 |
+
MarketObservation,
|
| 13 |
+
MarketReward,
|
| 14 |
+
ResetResponse,
|
| 15 |
+
StateResponse,
|
| 16 |
+
StepResponse,
|
| 17 |
+
)
|
| 18 |
+
from smartgrid_mas.tasks import TaskConfig, get_task
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
SCHEMA_INFO = (
|
| 22 |
+
"Provide a JointAction with supply and demand bids from multiple agents plus EV charge/discharge "
|
| 23 |
+
"commands. Market clears bids first, then LDU enforces physical feasibility and logs corrections."
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class Session:
|
| 29 |
+
task: TaskConfig
|
| 30 |
+
rng: random.Random
|
| 31 |
+
session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
| 32 |
+
step: int = 0
|
| 33 |
+
done: bool = False
|
| 34 |
+
demand_mwh: float = 0.0
|
| 35 |
+
renewable_mwh: float = 0.0
|
| 36 |
+
peaker_capacity_mwh: float = 0.0
|
| 37 |
+
ev_storage_mwh: float = 0.0
|
| 38 |
+
ev_storage_capacity_mwh: float = 0.0
|
| 39 |
+
base_price: float = 0.0
|
| 40 |
+
last_clearing_price: float = 0.0
|
| 41 |
+
prior_gap: float = 0.0
|
| 42 |
+
correction_count: int = 0
|
| 43 |
+
infeasible_actions: int = 0
|
| 44 |
+
total_demand_met: float = 0.0
|
| 45 |
+
total_cost: float = 0.0
|
| 46 |
+
reward_history: list = field(default_factory=list)
|
| 47 |
+
event_log: list = field(default_factory=list)
|
| 48 |
+
shock_seen: bool = False
|
| 49 |
+
personalities: Dict[str, str] = field(default_factory=dict)
|
| 50 |
+
|
| 51 |
+
def to_observation(self, hint: Optional[str] = None, error_message: Optional[str] = None) -> MarketObservation:
|
| 52 |
+
public_signal = (
|
| 53 |
+
"Shock regime active; renewable volatility is elevated"
|
| 54 |
+
if self.shock_seen
|
| 55 |
+
else "Normal regime; optimize demand satisfaction with low infeasibility"
|
| 56 |
+
)
|
| 57 |
+
return MarketObservation(
|
| 58 |
+
step=self.step,
|
| 59 |
+
steps_taken=self.step,
|
| 60 |
+
max_steps=self.task.max_steps,
|
| 61 |
+
demand_mwh=round(self.demand_mwh, 3),
|
| 62 |
+
renewable_availability_mwh=round(self.renewable_mwh, 3),
|
| 63 |
+
peaker_capacity_mwh=round(self.peaker_capacity_mwh, 3),
|
| 64 |
+
ev_storage_mwh=round(self.ev_storage_mwh, 3),
|
| 65 |
+
ev_storage_capacity_mwh=round(self.ev_storage_capacity_mwh, 3),
|
| 66 |
+
last_clearing_price=round(self.last_clearing_price, 3),
|
| 67 |
+
leader_price_signal=round(self.base_price, 3),
|
| 68 |
+
scarcity_index=round(max(0.0, (self.demand_mwh - self.renewable_mwh) / max(self.demand_mwh, 1e-6)), 4),
|
| 69 |
+
shock_active=self.shock_seen,
|
| 70 |
+
public_signal=public_signal,
|
| 71 |
+
schema_info=SCHEMA_INFO,
|
| 72 |
+
hint=hint,
|
| 73 |
+
error_message=error_message,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class SmartGridMarketEnv:
|
| 78 |
+
def __init__(self):
|
| 79 |
+
self._sessions: Dict[str, Session] = {}
|
| 80 |
+
self._latest_session_id: Optional[str] = None
|
| 81 |
+
|
| 82 |
+
def reset(self, task_id: str = "default", seed: Optional[int] = None) -> ResetResponse:
|
| 83 |
+
task = get_task(task_id)
|
| 84 |
+
rng = random.Random(seed)
|
| 85 |
+
session = Session(
|
| 86 |
+
task=task,
|
| 87 |
+
rng=rng,
|
| 88 |
+
demand_mwh=task.initial_demand_mwh,
|
| 89 |
+
renewable_mwh=task.initial_renewable_mwh,
|
| 90 |
+
peaker_capacity_mwh=task.peaker_capacity_mwh,
|
| 91 |
+
ev_storage_mwh=task.ev_storage_mwh,
|
| 92 |
+
ev_storage_capacity_mwh=task.ev_storage_capacity_mwh,
|
| 93 |
+
base_price=task.base_price_usd_per_mwh,
|
| 94 |
+
last_clearing_price=task.base_price_usd_per_mwh,
|
| 95 |
+
personalities={
|
| 96 |
+
"renewable_1": rng.choice(["opportunistic", "balanced"]),
|
| 97 |
+
"peaker_1": rng.choice(["greedy", "balanced", "risk_averse"]),
|
| 98 |
+
"industrial_1": rng.choice(["risk_averse", "balanced"]),
|
| 99 |
+
"ev_1": rng.choice(["balanced", "risk_averse"]),
|
| 100 |
+
},
|
| 101 |
+
)
|
| 102 |
+
self._sessions[session.session_id] = session
|
| 103 |
+
self._latest_session_id = session.session_id
|
| 104 |
+
|
| 105 |
+
return ResetResponse(
|
| 106 |
+
session_id=session.session_id,
|
| 107 |
+
task_id=task.task_id,
|
| 108 |
+
task_description=task.description,
|
| 109 |
+
schema_info=SCHEMA_INFO,
|
| 110 |
+
steps_taken=0,
|
| 111 |
+
observation=session.to_observation(hint=task.hint),
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
def step(self, action: JointAction, session_id: Optional[str] = None) -> StepResponse:
|
| 115 |
+
session = self._get_session(session_id)
|
| 116 |
+
if session.done:
|
| 117 |
+
return StepResponse(
|
| 118 |
+
observation=session.to_observation(error_message="Episode finished. Call reset."),
|
| 119 |
+
reward=compute_reward(
|
| 120 |
+
dispatch={
|
| 121 |
+
"delivered_supply_mwh": 0.0,
|
| 122 |
+
"unmet_demand_mwh": 0.0,
|
| 123 |
+
"oversupply_mwh": 0.0,
|
| 124 |
+
"correction_count": 0,
|
| 125 |
+
"storage_loss_mwh": 0.0,
|
| 126 |
+
"renewable_dispatch_mwh": 0.0,
|
| 127 |
+
},
|
| 128 |
+
clearing_price=session.last_clearing_price,
|
| 129 |
+
demand_mwh=max(1.0, session.demand_mwh),
|
| 130 |
+
prior_gap=0.0,
|
| 131 |
+
),
|
| 132 |
+
done=True,
|
| 133 |
+
truncated=False,
|
| 134 |
+
info={"error": "episode_done"},
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
market = clear_market(action.bids, leader_price_signal=session.base_price)
|
| 138 |
+
dispatch, next_storage = enforce_dispatch(
|
| 139 |
+
market_result=market,
|
| 140 |
+
demand_mwh=session.demand_mwh,
|
| 141 |
+
renewable_available_mwh=session.renewable_mwh,
|
| 142 |
+
peaker_capacity_mwh=session.peaker_capacity_mwh,
|
| 143 |
+
ev_storage_mwh=session.ev_storage_mwh,
|
| 144 |
+
ev_storage_capacity_mwh=session.ev_storage_capacity_mwh,
|
| 145 |
+
ev_charge_mwh=action.ev_charge_mwh,
|
| 146 |
+
ev_discharge_mwh=action.ev_discharge_mwh,
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
reward = compute_reward(
|
| 150 |
+
dispatch=dispatch,
|
| 151 |
+
clearing_price=market["clearing_price"] or session.base_price,
|
| 152 |
+
demand_mwh=session.demand_mwh,
|
| 153 |
+
prior_gap=session.prior_gap,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
session.step += 1
|
| 157 |
+
session.ev_storage_mwh = next_storage
|
| 158 |
+
session.last_clearing_price = market["clearing_price"] or session.base_price
|
| 159 |
+
session.prior_gap = dispatch["delivered_supply_mwh"] - session.demand_mwh
|
| 160 |
+
session.correction_count += dispatch["correction_count"]
|
| 161 |
+
if dispatch["correction_count"] > 0:
|
| 162 |
+
session.infeasible_actions += 1
|
| 163 |
+
session.total_demand_met += min(session.demand_mwh, dispatch["delivered_supply_mwh"])
|
| 164 |
+
session.total_cost += dispatch["delivered_supply_mwh"] * session.last_clearing_price
|
| 165 |
+
session.reward_history.append(reward.score)
|
| 166 |
+
|
| 167 |
+
private_views = self._build_private_agent_views(session, market, dispatch)
|
| 168 |
+
|
| 169 |
+
next_demand, next_renewable, next_price, dyn_info = evolve_grid(
|
| 170 |
+
demand_mwh=session.demand_mwh,
|
| 171 |
+
renewable_mwh=session.renewable_mwh,
|
| 172 |
+
base_price_usd_per_mwh=session.base_price,
|
| 173 |
+
step=session.step,
|
| 174 |
+
task=session.task,
|
| 175 |
+
rng=session.rng,
|
| 176 |
+
)
|
| 177 |
+
session.demand_mwh = next_demand
|
| 178 |
+
session.renewable_mwh = next_renewable
|
| 179 |
+
session.base_price = next_price
|
| 180 |
+
session.shock_seen = session.shock_seen or dyn_info["shock_active"]
|
| 181 |
+
|
| 182 |
+
event = {
|
| 183 |
+
"step": session.step,
|
| 184 |
+
"market": market,
|
| 185 |
+
"dispatch": dispatch,
|
| 186 |
+
"reward": reward.model_dump(),
|
| 187 |
+
"dynamics": dyn_info,
|
| 188 |
+
"agent_private_views": private_views,
|
| 189 |
+
}
|
| 190 |
+
session.event_log.append(event)
|
| 191 |
+
|
| 192 |
+
done = session.step >= session.task.max_steps
|
| 193 |
+
session.done = done
|
| 194 |
+
|
| 195 |
+
info = {
|
| 196 |
+
"market": market,
|
| 197 |
+
"dispatch": dispatch,
|
| 198 |
+
"dynamics": dyn_info,
|
| 199 |
+
"agent_private_views": private_views,
|
| 200 |
+
"summary": {
|
| 201 |
+
"avg_reward": round(sum(session.reward_history) / len(session.reward_history), 4),
|
| 202 |
+
"total_demand_met_mwh": round(session.total_demand_met, 3),
|
| 203 |
+
"total_cost_usd": round(session.total_cost, 3),
|
| 204 |
+
"infeasible_actions": session.infeasible_actions,
|
| 205 |
+
"ldu_corrections": session.correction_count,
|
| 206 |
+
"leader_adjusted_bids": market["leader_adjusted_bids"],
|
| 207 |
+
"personality_map": session.personalities,
|
| 208 |
+
},
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
return StepResponse(
|
| 212 |
+
observation=session.to_observation(),
|
| 213 |
+
reward=reward,
|
| 214 |
+
done=done,
|
| 215 |
+
truncated=False,
|
| 216 |
+
info=info,
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
def state(self, session_id: Optional[str] = None) -> StateResponse:
|
| 220 |
+
session = self._get_session(session_id)
|
| 221 |
+
return StateResponse(
|
| 222 |
+
current_task_id=session.task.task_id,
|
| 223 |
+
steps_taken=session.step,
|
| 224 |
+
episode_done=session.done,
|
| 225 |
+
observation=session.to_observation(),
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
def events(self, session_id: Optional[str] = None) -> Dict:
|
| 229 |
+
session = self._get_session(session_id)
|
| 230 |
+
return {"session_id": session.session_id, "events": session.event_log[-50:]}
|
| 231 |
+
|
| 232 |
+
def get_schema(self) -> Dict:
|
| 233 |
+
return {
|
| 234 |
+
"action_schema": JointAction.model_json_schema(),
|
| 235 |
+
"observation_schema": MarketObservation.model_json_schema(),
|
| 236 |
+
"reward_schema": MarketReward.model_json_schema(),
|
| 237 |
+
"tasks": ["default"],
|
| 238 |
+
"notes": "Hybrid Theme 1+2+3.1 baseline implementation with LDU as core physical layer",
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
def _get_session(self, session_id: Optional[str]) -> Session:
|
| 242 |
+
sid = session_id or self._latest_session_id
|
| 243 |
+
if sid is None or sid not in self._sessions:
|
| 244 |
+
raise KeyError("No active session. Call /reset first.")
|
| 245 |
+
return self._sessions[sid]
|
| 246 |
+
|
| 247 |
+
def _build_private_agent_views(self, session: Session, market: Dict, dispatch: Dict) -> Dict[str, Dict]:
|
| 248 |
+
scarcity = max(0.0, (session.demand_mwh - session.renewable_mwh) / max(session.demand_mwh, 1e-6))
|
| 249 |
+
spread = max(0.0, session.base_price - session.last_clearing_price)
|
| 250 |
+
return {
|
| 251 |
+
"renewable_1": {
|
| 252 |
+
"personality": session.personalities.get("renewable_1", "balanced"),
|
| 253 |
+
"curtailment_risk": round(max(0.0, session.renewable_mwh - market.get("cleared_mwh", 0.0)), 3),
|
| 254 |
+
"forecast_bias": round(session.rng.uniform(-3.0, 3.0), 3),
|
| 255 |
+
},
|
| 256 |
+
"peaker_1": {
|
| 257 |
+
"personality": session.personalities.get("peaker_1", "balanced"),
|
| 258 |
+
"scarcity_index": round(scarcity, 4),
|
| 259 |
+
"margin_signal": round(market.get("clearing_price", session.base_price) - 42.0, 3),
|
| 260 |
+
},
|
| 261 |
+
"industrial_1": {
|
| 262 |
+
"personality": session.personalities.get("industrial_1", "balanced"),
|
| 263 |
+
"budget_pressure": round(
|
| 264 |
+
market.get("clearing_price", session.base_price) / max(session.base_price, 1e-6),
|
| 265 |
+
4,
|
| 266 |
+
),
|
| 267 |
+
"unmet_demand_mwh": dispatch["unmet_demand_mwh"],
|
| 268 |
+
},
|
| 269 |
+
"ev_1": {
|
| 270 |
+
"personality": session.personalities.get("ev_1", "balanced"),
|
| 271 |
+
"soc_ratio": round(session.ev_storage_mwh / max(session.ev_storage_capacity_mwh, 1e-6), 4),
|
| 272 |
+
"arbitrage_spread": round(spread, 3),
|
| 273 |
+
},
|
| 274 |
+
}
|
smartgrid_mas/models.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Literal, Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
AgentRole = Literal[
|
| 7 |
+
"renewable_prosumer",
|
| 8 |
+
"ev_aggregator",
|
| 9 |
+
"peaker_plant",
|
| 10 |
+
"industrial_load",
|
| 11 |
+
]
|
| 12 |
+
BidType = Literal["supply", "demand"]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class AgentBid(BaseModel):
|
| 16 |
+
agent_id: str = Field(..., description="Unique agent id")
|
| 17 |
+
role: AgentRole = Field(..., description="Agent role")
|
| 18 |
+
bid_type: BidType = Field(..., description="Supply or demand bid")
|
| 19 |
+
quantity_mwh: float = Field(..., ge=0.0, description="Bid quantity in MWh")
|
| 20 |
+
price_usd_per_mwh: float = Field(..., ge=0.0, description="Bid price")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class JointAction(BaseModel):
|
| 24 |
+
bids: List[AgentBid] = Field(default_factory=list, description="Bids from all agents")
|
| 25 |
+
ev_charge_mwh: float = Field(0.0, ge=0.0, description="EV fleet charge command")
|
| 26 |
+
ev_discharge_mwh: float = Field(0.0, ge=0.0, description="EV fleet discharge command")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class MarketObservation(BaseModel):
|
| 30 |
+
step: int
|
| 31 |
+
steps_taken: int
|
| 32 |
+
max_steps: int
|
| 33 |
+
demand_mwh: float
|
| 34 |
+
renewable_availability_mwh: float
|
| 35 |
+
peaker_capacity_mwh: float
|
| 36 |
+
ev_storage_mwh: float
|
| 37 |
+
ev_storage_capacity_mwh: float
|
| 38 |
+
last_clearing_price: float
|
| 39 |
+
leader_price_signal: float
|
| 40 |
+
scarcity_index: float
|
| 41 |
+
shock_active: bool
|
| 42 |
+
public_signal: str
|
| 43 |
+
schema_info: str
|
| 44 |
+
hint: Optional[str] = None
|
| 45 |
+
error_message: Optional[str] = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class MarketReward(BaseModel):
|
| 49 |
+
score: float = Field(..., ge=0.0, le=1.0)
|
| 50 |
+
reason: str
|
| 51 |
+
demand_satisfaction_score: float
|
| 52 |
+
cost_efficiency_score: float
|
| 53 |
+
renewable_utilization_score: float
|
| 54 |
+
stability_score: float
|
| 55 |
+
infeasibility_penalty: float
|
| 56 |
+
blackout_penalty: float
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class ResetRequest(BaseModel):
|
| 60 |
+
task_id: str = "default"
|
| 61 |
+
seed: Optional[int] = None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class ResetResponse(BaseModel):
|
| 65 |
+
session_id: str
|
| 66 |
+
task_id: str
|
| 67 |
+
task_description: str
|
| 68 |
+
schema_info: str
|
| 69 |
+
steps_taken: int
|
| 70 |
+
observation: MarketObservation
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class StepRequest(BaseModel):
|
| 74 |
+
action: JointAction
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class StepResponse(BaseModel):
|
| 78 |
+
observation: MarketObservation
|
| 79 |
+
reward: MarketReward
|
| 80 |
+
done: bool
|
| 81 |
+
truncated: bool
|
| 82 |
+
info: Dict
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class StateResponse(BaseModel):
|
| 86 |
+
current_task_id: str
|
| 87 |
+
steps_taken: int
|
| 88 |
+
episode_done: bool
|
| 89 |
+
observation: Optional[MarketObservation] = None
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class EpisodeSummary(BaseModel):
|
| 93 |
+
average_reward: float
|
| 94 |
+
total_demand_met: float
|
| 95 |
+
total_cost: float
|
| 96 |
+
infeasible_actions: int
|
| 97 |
+
corrections: int
|
| 98 |
+
shock_response_score: float
|
smartgrid_mas/tasks.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class TaskConfig:
|
| 7 |
+
task_id: str
|
| 8 |
+
description: str
|
| 9 |
+
max_steps: int
|
| 10 |
+
initial_demand_mwh: float
|
| 11 |
+
initial_renewable_mwh: float
|
| 12 |
+
peaker_capacity_mwh: float
|
| 13 |
+
ev_storage_mwh: float
|
| 14 |
+
ev_storage_capacity_mwh: float
|
| 15 |
+
base_price_usd_per_mwh: float
|
| 16 |
+
demand_trend_mwh: float
|
| 17 |
+
renewable_trend_mwh: float
|
| 18 |
+
demand_volatility: float
|
| 19 |
+
renewable_volatility: float
|
| 20 |
+
shock_step: int
|
| 21 |
+
shock_renewable_drop: float
|
| 22 |
+
hint: str
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
TASKS: Dict[str, TaskConfig] = {
|
| 26 |
+
"default": TaskConfig(
|
| 27 |
+
task_id="default",
|
| 28 |
+
description=(
|
| 29 |
+
"Hybrid multi-agent smart-grid market simulation. Agents submit strategic bids, "
|
| 30 |
+
"market clears, and LDU enforces physical feasibility with correction logs."
|
| 31 |
+
),
|
| 32 |
+
max_steps=24,
|
| 33 |
+
initial_demand_mwh=120.0,
|
| 34 |
+
initial_renewable_mwh=70.0,
|
| 35 |
+
peaker_capacity_mwh=85.0,
|
| 36 |
+
ev_storage_mwh=25.0,
|
| 37 |
+
ev_storage_capacity_mwh=60.0,
|
| 38 |
+
base_price_usd_per_mwh=45.0,
|
| 39 |
+
demand_trend_mwh=1.2,
|
| 40 |
+
renewable_trend_mwh=-0.6,
|
| 41 |
+
demand_volatility=4.0,
|
| 42 |
+
renewable_volatility=6.0,
|
| 43 |
+
shock_step=16,
|
| 44 |
+
shock_renewable_drop=22.0,
|
| 45 |
+
hint=(
|
| 46 |
+
"Coordinate bids with expected dispatch feasibility. Market-optimal bids that violate "
|
| 47 |
+
"physical constraints are corrected by LDU and reduce reward."
|
| 48 |
+
),
|
| 49 |
+
)
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def get_task(task_id: str) -> TaskConfig:
|
| 54 |
+
if task_id not in TASKS:
|
| 55 |
+
raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASKS.keys())}")
|
| 56 |
+
return TASKS[task_id]
|
smartgrid_mas/train_baseline.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import csv
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
|
| 9 |
+
from smartgrid_mas.engine.policies import (
|
| 10 |
+
adaptive_stackelberg_action,
|
| 11 |
+
heuristic_joint_action,
|
| 12 |
+
random_joint_action,
|
| 13 |
+
)
|
| 14 |
+
from smartgrid_mas.env import SmartGridMarketEnv
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def run_episode(env: SmartGridMarketEnv, policy_name: str, seed: int) -> float:
|
| 18 |
+
reset = env.reset(task_id="default", seed=seed)
|
| 19 |
+
sid = reset.session_id
|
| 20 |
+
obs = reset.observation
|
| 21 |
+
rng = random.Random(seed)
|
| 22 |
+
|
| 23 |
+
rewards: List[float] = []
|
| 24 |
+
while True:
|
| 25 |
+
if policy_name == "random":
|
| 26 |
+
action = random_joint_action(obs, rng)
|
| 27 |
+
elif policy_name == "adaptive":
|
| 28 |
+
action = adaptive_stackelberg_action(obs, personality="balanced")
|
| 29 |
+
else:
|
| 30 |
+
action = heuristic_joint_action(obs, personality="balanced")
|
| 31 |
+
|
| 32 |
+
step = env.step(action=action, session_id=sid)
|
| 33 |
+
rewards.append(step.reward.score)
|
| 34 |
+
obs = step.observation
|
| 35 |
+
if step.done:
|
| 36 |
+
break
|
| 37 |
+
|
| 38 |
+
return sum(rewards) / max(1, len(rewards))
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def main() -> None:
|
| 42 |
+
parser = argparse.ArgumentParser(description="Minimal baseline training/eval runner")
|
| 43 |
+
parser.add_argument("--episodes", type=int, default=30)
|
| 44 |
+
parser.add_argument("--outdir", type=str, default="artifacts")
|
| 45 |
+
args = parser.parse_args()
|
| 46 |
+
|
| 47 |
+
os.makedirs(args.outdir, exist_ok=True)
|
| 48 |
+
|
| 49 |
+
env = SmartGridMarketEnv()
|
| 50 |
+
random_curve = []
|
| 51 |
+
heuristic_curve = []
|
| 52 |
+
adaptive_curve = []
|
| 53 |
+
|
| 54 |
+
for ep in range(args.episodes):
|
| 55 |
+
random_curve.append(run_episode(env, "random", seed=1000 + ep))
|
| 56 |
+
heuristic_curve.append(run_episode(env, "heuristic", seed=2000 + ep))
|
| 57 |
+
adaptive_curve.append(run_episode(env, "adaptive", seed=3000 + ep))
|
| 58 |
+
|
| 59 |
+
csv_path = os.path.join(args.outdir, "baseline_metrics.csv")
|
| 60 |
+
with open(csv_path, "w", newline="", encoding="utf-8") as f:
|
| 61 |
+
writer = csv.writer(f)
|
| 62 |
+
writer.writerow(["episode", "random_avg_reward", "heuristic_avg_reward", "adaptive_avg_reward"])
|
| 63 |
+
for i, (r, h, a) in enumerate(zip(random_curve, heuristic_curve, adaptive_curve), start=1):
|
| 64 |
+
writer.writerow([i, round(r, 6), round(h, 6), round(a, 6)])
|
| 65 |
+
|
| 66 |
+
plt.figure(figsize=(10, 5))
|
| 67 |
+
plt.plot(range(1, args.episodes + 1), random_curve, label="Random baseline")
|
| 68 |
+
plt.plot(range(1, args.episodes + 1), heuristic_curve, label="Heuristic improved")
|
| 69 |
+
plt.plot(range(1, args.episodes + 1), adaptive_curve, label="Adaptive Stackelberg")
|
| 70 |
+
plt.xlabel("Episode")
|
| 71 |
+
plt.ylabel("Average reward")
|
| 72 |
+
plt.title("Baseline vs Improved Policy Reward")
|
| 73 |
+
plt.legend()
|
| 74 |
+
plt.grid(alpha=0.25)
|
| 75 |
+
|
| 76 |
+
fig_path = os.path.join(args.outdir, "reward_comparison.png")
|
| 77 |
+
plt.tight_layout()
|
| 78 |
+
plt.savefig(fig_path, dpi=160)
|
| 79 |
+
|
| 80 |
+
print(f"Saved metrics CSV: {csv_path}")
|
| 81 |
+
print(f"Saved reward plot: {fig_path}")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
if __name__ == "__main__":
|
| 85 |
+
main()
|
training/minimal_train.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smartgrid_mas.train_baseline import main
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
main()
|