Not-OmKar commited on
Commit
e40fd07
·
0 Parent(s):

first commit

Browse files
.gitignore ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ *.so
7
+
8
+ # Virtual environments
9
+ .venv/
10
+ venv/
11
+ env/
12
+ ENV/
13
+
14
+ # Packaging/build outputs
15
+ build/
16
+ dist/
17
+ *.egg-info/
18
+ *.egg
19
+
20
+ # Test and coverage artifacts
21
+ .pytest_cache/
22
+ .coverage
23
+ coverage.xml
24
+ htmlcov/
25
+
26
+ # Notebook checkpoints
27
+ .ipynb_checkpoints/
28
+
29
+ # OS/editor files
30
+ .DS_Store
31
+ Thumbs.db
32
+ .vscode/
33
+ .idea/
34
+
35
+ # Project artifacts
36
+ artifacts/
37
+ openenv_smartgrid_marketsim.egg-info/
38
+
39
+ # Local logs and temp files
40
+ *.log
41
+ *.tmp
42
+ *.swp
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1
5
+
6
+ WORKDIR /app
7
+
8
+ COPY pyproject.toml /app/
9
+ COPY smartgrid_mas /app/smartgrid_mas
10
+ COPY training /app/training
11
+ COPY main.py /app/main.py
12
+ COPY openenv.yaml /app/openenv.yaml
13
+
14
+ RUN pip install --no-cache-dir --upgrade pip && \
15
+ pip install --no-cache-dir .
16
+
17
+ EXPOSE 7860
18
+
19
+ HEALTHCHECK --interval=30s --timeout=5s --retries=3 CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health', timeout=3)"
20
+
21
+ CMD ["python", "main.py"]
README.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenEnv Smart Grid MarketSim
2
+
3
+ A new OpenEnv-compatible project for a hybrid hackathon narrative:
4
+ - Theme 1: Multi-agent interactions
5
+ - Theme 2: Long-horizon planning
6
+ - Theme 3.1: Professional world modeling
7
+
8
+ ## Core idea
9
+
10
+ The simulator is intentionally multi-layered:
11
+ 1. Agents submit strategic market bids.
12
+ 2. A market-clearing engine computes tentative allocations and prices.
13
+ 3. A Load Dispatching Unit (LDU) enforces physical feasibility.
14
+ 4. Grid dynamics evolve with volatility and shock events.
15
+ 5. Reward is computed from physically delivered outcomes.
16
+
17
+ This creates tension between strategy and reality, which is the main differentiator.
18
+
19
+ ## What is implemented in this first slice
20
+
21
+ - Multi-agent bid object with supply/demand bids.
22
+ - Market clearing with matched quantities and clearing price.
23
+ - Stackelberg-style leader price signal that reshapes bid books before clearing.
24
+ - LDU feasibility corrections:
25
+ - power balance accounting
26
+ - EV storage constraints
27
+ - transmission/storage losses
28
+ - infeasibility correction logs
29
+ - Long-horizon episode flow with shock event support.
30
+ - Personality-aware strategy behavior (greedy, risk-averse, balanced, opportunistic).
31
+ - Per-agent private view metrics in step/event outputs for richer multi-actor analysis.
32
+ - Reward decomposition including infeasibility and blackout penalties.
33
+ - REST API:
34
+ - GET /health
35
+ - POST /reset
36
+ - POST /step
37
+ - GET /state
38
+ - GET /events
39
+ - GET /info
40
+ - POST /run-inference
41
+ - Baseline metric generation script with reward plot output.
42
+
43
+ ## Quickstart
44
+
45
+ ### Local run
46
+
47
+ ```powershell
48
+ pip install -e .
49
+ python main.py
50
+ ```
51
+
52
+ Server starts on port 7860.
53
+
54
+ If you also want the OpenEnv framework package locally:
55
+
56
+ ```powershell
57
+ pip install -e .[openenv]
58
+ ```
59
+
60
+ ### Baseline metrics and plot
61
+
62
+ ```powershell
63
+ python -m smartgrid_mas.train_baseline --episodes 30 --outdir artifacts
64
+ ```
65
+
66
+ Outputs:
67
+ - artifacts/baseline_metrics.csv
68
+ - artifacts/reward_comparison.png
69
+
70
+ Alternative (after editable install):
71
+
72
+ ```powershell
73
+ train-baseline --episodes 30 --outdir artifacts
74
+ ```
75
+
76
+ ### Inference policy modes
77
+
78
+ The `/run-inference` endpoint supports:
79
+ - `random`
80
+ - `heuristic`
81
+ - `adaptive` (Stackelberg-aware)
82
+
83
+ You can also pass `personality` such as `balanced`, `risk_averse`, or `opportunistic`.
84
+
85
+ ### Docker
86
+
87
+ ```powershell
88
+ docker build -t openenv-smartgrid-marketsim .
89
+ docker run -p 7860:7860 openenv-smartgrid-marketsim
90
+ ```
91
+
92
+ ## Next implementation milestones
93
+
94
+ 1. Add interactive 3D frontend scene synchronized to /events stream.
95
+ 2. Add Unsloth or HF TRL Colab training notebook with real policy updates.
96
+ 3. Add full judging artifact checklist in README (HF Space link, mini-blog/video, plots).
main.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional
3
+
4
+ from fastapi import FastAPI, HTTPException, Query
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from pydantic import BaseModel
7
+
8
+ from smartgrid_mas.engine.policies import (
9
+ adaptive_stackelberg_action,
10
+ heuristic_joint_action,
11
+ random_joint_action,
12
+ )
13
+ from smartgrid_mas.env import SmartGridMarketEnv
14
+ from smartgrid_mas.models import JointAction, ResetRequest, StepRequest
15
+
16
+
17
+ app = FastAPI(
18
+ title="OpenEnv Smart Grid MarketSim",
19
+ description="Multi-agent market simulator with LDU physical feasibility layer.",
20
+ version="0.1.0",
21
+ )
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ env = SmartGridMarketEnv()
31
+
32
+
33
+ class InferenceRequest(BaseModel):
34
+ policy: str = "heuristic"
35
+ personality: str = "balanced"
36
+ task_id: str = "default"
37
+ seed: Optional[int] = 42
38
+
39
+
40
+ @app.get("/")
41
+ def root():
42
+ return {
43
+ "name": "OpenEnv Smart Grid MarketSim",
44
+ "status": "ready",
45
+ "docs": "/docs",
46
+ "health": "/health",
47
+ }
48
+
49
+
50
+ @app.get("/health")
51
+ def health():
52
+ return {"status": "ok", "service": "openenv-smartgrid-marketsim"}
53
+
54
+
55
+ @app.post("/reset")
56
+ def reset(request: ResetRequest):
57
+ try:
58
+ return env.reset(task_id=request.task_id, seed=request.seed)
59
+ except Exception as exc:
60
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
61
+
62
+
63
+ @app.post("/step")
64
+ def step(request: StepRequest, session_id: Optional[str] = Query(default=None)):
65
+ try:
66
+ return env.step(action=request.action, session_id=session_id)
67
+ except Exception as exc:
68
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
69
+
70
+
71
+ @app.get("/state")
72
+ def state(session_id: Optional[str] = Query(default=None)):
73
+ try:
74
+ return env.state(session_id=session_id)
75
+ except Exception as exc:
76
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
77
+
78
+
79
+ @app.get("/events")
80
+ def events(session_id: Optional[str] = Query(default=None)):
81
+ try:
82
+ return env.events(session_id=session_id)
83
+ except Exception as exc:
84
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
85
+
86
+
87
+ @app.get("/info")
88
+ def info():
89
+ return env.get_schema()
90
+
91
+
92
+ @app.post("/run-inference")
93
+ def run_inference(request: InferenceRequest):
94
+ reset_resp = env.reset(task_id=request.task_id, seed=request.seed)
95
+ sid = reset_resp.session_id
96
+ obs = reset_resp.observation
97
+
98
+ rng = __import__("random").Random(request.seed)
99
+ trajectory = []
100
+ while True:
101
+ if request.policy == "random":
102
+ action = random_joint_action(obs, rng)
103
+ elif request.policy == "adaptive":
104
+ action = adaptive_stackelberg_action(obs, personality=request.personality)
105
+ else:
106
+ action = heuristic_joint_action(obs, personality=request.personality)
107
+
108
+ result = env.step(action=action, session_id=sid)
109
+ trajectory.append(
110
+ {
111
+ "step": len(trajectory) + 1,
112
+ "action": action.model_dump(),
113
+ "reward": result.reward.model_dump(),
114
+ "info": result.info,
115
+ }
116
+ )
117
+ obs = result.observation
118
+ if result.done:
119
+ break
120
+
121
+ avg_reward = sum(t["reward"]["score"] for t in trajectory) / max(1, len(trajectory))
122
+ return {
123
+ "success": True,
124
+ "policy": request.policy,
125
+ "personality": request.personality,
126
+ "steps": len(trajectory),
127
+ "average_reward": round(avg_reward, 4),
128
+ "trajectory": trajectory,
129
+ }
130
+
131
+
132
+ def main() -> None:
133
+ import uvicorn
134
+
135
+ port = int(os.getenv("PORT", "7860"))
136
+ uvicorn.run(app, host="0.0.0.0", port=port)
137
+
138
+
139
+ if __name__ == "__main__":
140
+ main()
openenv.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: openenv-smartgrid-marketsim
2
+ version: "0.1.0"
3
+ description: >
4
+ Multi-agent smart-grid market simulation where strategic bids are cleared by a market
5
+ and then corrected by a load dispatching unit to enforce physical feasibility.
6
+
7
+ tags:
8
+ - openenv
9
+ - multi-agent
10
+ - market
11
+ - smart-grid
12
+ - long-horizon
13
+
14
+ tasks:
15
+ - id: default
16
+ display_name: Strategic Bidding With Physical Dispatch
17
+ difficulty: medium
18
+ max_steps: 24
19
+ description: >
20
+ Agents bid into a market, then LDU enforces grid feasibility. Reward is based on
21
+ physically delivered outcomes under volatility and shock events.
22
+ success_threshold: 0.62
23
+
24
+ action_space:
25
+ type: object
26
+ fields:
27
+ bids:
28
+ type: array
29
+ description: List of supply and demand bids from multiple agents
30
+ ev_charge_mwh:
31
+ type: float
32
+ min: 0.0
33
+ ev_discharge_mwh:
34
+ type: float
35
+ min: 0.0
36
+
37
+ observation_space:
38
+ type: object
39
+ fields:
40
+ demand_mwh:
41
+ type: float
42
+ renewable_availability_mwh:
43
+ type: float
44
+ peaker_capacity_mwh:
45
+ type: float
46
+ ev_storage_mwh:
47
+ type: float
48
+ last_clearing_price:
49
+ type: float
50
+ shock_active:
51
+ type: bool
52
+
53
+ reward:
54
+ type: object
55
+ fields:
56
+ score:
57
+ type: float
58
+ range: [0.0, 1.0]
59
+ demand_satisfaction_score:
60
+ type: float
61
+ cost_efficiency_score:
62
+ type: float
63
+ renewable_utilization_score:
64
+ type: float
65
+ stability_score:
66
+ type: float
67
+ infeasibility_penalty:
68
+ type: float
69
+ blackout_penalty:
70
+ type: float
71
+
72
+ environment:
73
+ api: REST
74
+ framework: FastAPI
75
+ base_url: http://localhost:7860
76
+ endpoints:
77
+ - GET /health
78
+ - POST /reset
79
+ - POST /step
80
+ - GET /state
81
+ - GET /events
82
+ - GET /info
83
+ - POST /run-inference
84
+
85
+ docker:
86
+ image: openenv-smartgrid-marketsim
87
+ port: 7860
pyproject.toml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "openenv-smartgrid-marketsim"
3
+ version = "0.1.0"
4
+ description = "Multi-agent smart-grid market simulator with LDU feasibility layer"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "fastapi>=0.115.5,<1",
8
+ "uvicorn>=0.32.1,<1",
9
+ "pydantic>=2.10.3,<3",
10
+ "pyyaml>=6.0.2,<7",
11
+ "matplotlib>=3.9.2,<4",
12
+ ]
13
+
14
+ [project.optional-dependencies]
15
+ openenv = [
16
+ "openenv-core>=0.2.0",
17
+ ]
18
+
19
+ [project.scripts]
20
+ server = "main:main"
21
+ train-baseline = "smartgrid_mas.train_baseline:main"
22
+
23
+ [build-system]
24
+ requires = ["setuptools>=68"]
25
+ build-backend = "setuptools.build_meta"
26
+
27
+ [tool.setuptools.packages.find]
28
+ include = ["smartgrid_mas*"]
smartgrid_mas/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smartgrid_mas.env import SmartGridMarketEnv
2
+ from smartgrid_mas.models import (
3
+ AgentBid,
4
+ JointAction,
5
+ MarketObservation,
6
+ MarketReward,
7
+ ResetRequest,
8
+ ResetResponse,
9
+ StepRequest,
10
+ StepResponse,
11
+ StateResponse,
12
+ )
13
+
14
+ __all__ = [
15
+ "SmartGridMarketEnv",
16
+ "AgentBid",
17
+ "JointAction",
18
+ "MarketObservation",
19
+ "MarketReward",
20
+ "ResetRequest",
21
+ "ResetResponse",
22
+ "StepRequest",
23
+ "StepResponse",
24
+ "StateResponse",
25
+ ]
smartgrid_mas/engine/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Core engine modules for market clearing, physical dispatch, reward, and dynamics."""
smartgrid_mas/engine/dynamics.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Dict, Tuple
3
+
4
+ from smartgrid_mas.tasks import TaskConfig
5
+
6
+
7
+ def evolve_grid(
8
+ demand_mwh: float,
9
+ renewable_mwh: float,
10
+ base_price_usd_per_mwh: float,
11
+ step: int,
12
+ task: TaskConfig,
13
+ rng: random.Random,
14
+ ) -> Tuple[float, float, float, Dict]:
15
+ shock_active = step == task.shock_step
16
+
17
+ demand_noise = rng.gauss(0.0, task.demand_volatility)
18
+ renewable_noise = rng.gauss(0.0, task.renewable_volatility)
19
+
20
+ next_demand = demand_mwh + task.demand_trend_mwh + demand_noise
21
+ next_renewable = renewable_mwh + task.renewable_trend_mwh + renewable_noise
22
+
23
+ if shock_active:
24
+ next_renewable = max(0.0, next_renewable - task.shock_renewable_drop)
25
+
26
+ next_demand = max(20.0, next_demand)
27
+ next_renewable = max(0.0, next_renewable)
28
+
29
+ implied_price = base_price_usd_per_mwh * (1.0 + max(0.0, (next_demand - next_renewable) / 300.0))
30
+ next_price = max(5.0, implied_price)
31
+
32
+ return (
33
+ round(next_demand, 3),
34
+ round(next_renewable, 3),
35
+ round(next_price, 3),
36
+ {
37
+ "shock_active": shock_active,
38
+ "demand_noise": round(demand_noise, 3),
39
+ "renewable_noise": round(renewable_noise, 3),
40
+ },
41
+ )
smartgrid_mas/engine/ldu.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Tuple
2
+
3
+
4
+ def enforce_dispatch(
5
+ market_result: Dict,
6
+ demand_mwh: float,
7
+ renewable_available_mwh: float,
8
+ peaker_capacity_mwh: float,
9
+ ev_storage_mwh: float,
10
+ ev_storage_capacity_mwh: float,
11
+ ev_charge_mwh: float,
12
+ ev_discharge_mwh: float,
13
+ ) -> Tuple[Dict, float]:
14
+ corrections = []
15
+
16
+ if ev_charge_mwh > 0 and ev_discharge_mwh > 0:
17
+ ev_discharge_mwh = 0.0
18
+ corrections.append("Simultaneous EV charge and discharge corrected by LDU")
19
+
20
+ max_charge = max(0.0, ev_storage_capacity_mwh - ev_storage_mwh)
21
+ max_discharge = max(0.0, ev_storage_mwh)
22
+
23
+ if ev_charge_mwh > max_charge:
24
+ corrections.append("EV charge exceeded storage headroom")
25
+ ev_charge_mwh = max_charge
26
+
27
+ if ev_discharge_mwh > max_discharge:
28
+ corrections.append("EV discharge exceeded storage state-of-charge")
29
+ ev_discharge_mwh = max_discharge
30
+
31
+ dispatch_from_market = market_result.get("cleared_mwh", 0.0)
32
+
33
+ renewable_dispatch = min(renewable_available_mwh, dispatch_from_market)
34
+ residual = max(0.0, dispatch_from_market - renewable_dispatch)
35
+ peaker_dispatch = min(peaker_capacity_mwh, residual)
36
+
37
+ if residual > peaker_capacity_mwh:
38
+ corrections.append("Market-cleared supply exceeded physical generation capacity")
39
+
40
+ gross_supply = renewable_dispatch + peaker_dispatch + ev_discharge_mwh
41
+
42
+ transmission_loss = 0.03 * gross_supply
43
+ storage_loss = 0.08 * ev_charge_mwh
44
+
45
+ delivered_supply = max(0.0, gross_supply - transmission_loss)
46
+ unmet_demand = max(0.0, demand_mwh - delivered_supply)
47
+ oversupply = max(0.0, delivered_supply - demand_mwh)
48
+
49
+ next_ev_storage = ev_storage_mwh + ev_charge_mwh - ev_discharge_mwh - storage_loss
50
+ next_ev_storage = max(0.0, min(ev_storage_capacity_mwh, next_ev_storage))
51
+
52
+ dispatch = {
53
+ "renewable_dispatch_mwh": round(renewable_dispatch, 3),
54
+ "peaker_dispatch_mwh": round(peaker_dispatch, 3),
55
+ "ev_charge_mwh": round(ev_charge_mwh, 3),
56
+ "ev_discharge_mwh": round(ev_discharge_mwh, 3),
57
+ "transmission_loss_mwh": round(transmission_loss, 3),
58
+ "storage_loss_mwh": round(storage_loss, 3),
59
+ "delivered_supply_mwh": round(delivered_supply, 3),
60
+ "unmet_demand_mwh": round(unmet_demand, 3),
61
+ "oversupply_mwh": round(oversupply, 3),
62
+ "next_ev_storage_mwh": round(next_ev_storage, 3),
63
+ "corrections": corrections,
64
+ "correction_count": len(corrections),
65
+ }
66
+
67
+ return dispatch, next_ev_storage
smartgrid_mas/engine/market.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ from smartgrid_mas.models import AgentBid
4
+
5
+
6
+ def _apply_leader_signal(bids: List[AgentBid], leader_price_signal: float) -> Dict:
7
+ adjusted = []
8
+ adjusted_count = 0
9
+
10
+ for bid in bids:
11
+ price = float(bid.price_usd_per_mwh)
12
+ if bid.bid_type == "supply":
13
+ floor = 0.8 * leader_price_signal
14
+ if bid.role == "peaker_plant":
15
+ floor = 0.95 * leader_price_signal
16
+ new_price = max(price, floor)
17
+ else:
18
+ cap = 1.8 * leader_price_signal
19
+ floor = 0.9 * leader_price_signal
20
+ new_price = min(max(price, floor), cap)
21
+
22
+ if abs(new_price - price) > 1e-9:
23
+ adjusted_count += 1
24
+
25
+ adjusted.append(
26
+ {
27
+ "agent_id": bid.agent_id,
28
+ "role": bid.role,
29
+ "bid_type": bid.bid_type,
30
+ "quantity_mwh": float(bid.quantity_mwh),
31
+ "price_usd_per_mwh": float(new_price),
32
+ "raw_price_usd_per_mwh": float(price),
33
+ }
34
+ )
35
+
36
+ return {
37
+ "bids": adjusted,
38
+ "adjusted_count": adjusted_count,
39
+ }
40
+
41
+
42
+ def clear_market(bids: List[AgentBid], leader_price_signal: float) -> Dict:
43
+ leader_adjusted = _apply_leader_signal(bids, leader_price_signal)
44
+ priced_bids = leader_adjusted["bids"]
45
+
46
+ supply = [
47
+ b for b in priced_bids if b["bid_type"] == "supply" and b["quantity_mwh"] > 0
48
+ ]
49
+ demand = [
50
+ b for b in priced_bids if b["bid_type"] == "demand" and b["quantity_mwh"] > 0
51
+ ]
52
+
53
+ supply_sorted = sorted(supply, key=lambda x: x["price_usd_per_mwh"])
54
+ demand_sorted = sorted(demand, key=lambda x: x["price_usd_per_mwh"], reverse=True)
55
+
56
+ i = 0
57
+ j = 0
58
+ cleared_qty = 0.0
59
+ matched = []
60
+ clearing_price = 0.0
61
+
62
+ while i < len(supply_sorted) and j < len(demand_sorted):
63
+ s = supply_sorted[i]
64
+ d = demand_sorted[j]
65
+ if s["price_usd_per_mwh"] > d["price_usd_per_mwh"]:
66
+ break
67
+
68
+ qty = min(s["quantity_mwh"], d["quantity_mwh"])
69
+ if qty <= 0:
70
+ break
71
+
72
+ cleared_qty += qty
73
+ clearing_price = (s["price_usd_per_mwh"] + d["price_usd_per_mwh"]) / 2.0
74
+ matched.append(
75
+ {
76
+ "supply_agent": s["agent_id"],
77
+ "demand_agent": d["agent_id"],
78
+ "quantity_mwh": round(qty, 3),
79
+ "price_usd_per_mwh": round(clearing_price, 3),
80
+ }
81
+ )
82
+
83
+ s["quantity_mwh"] -= qty
84
+ d["quantity_mwh"] -= qty
85
+ if s["quantity_mwh"] <= 1e-6:
86
+ i += 1
87
+ if d["quantity_mwh"] <= 1e-6:
88
+ j += 1
89
+
90
+ total_supply_offered = sum(float(b.quantity_mwh) for b in bids if b.bid_type == "supply")
91
+ total_demand_bid = sum(float(b.quantity_mwh) for b in bids if b.bid_type == "demand")
92
+
93
+ return {
94
+ "cleared_mwh": round(cleared_qty, 3),
95
+ "clearing_price": round(clearing_price, 3),
96
+ "total_supply_offered": round(total_supply_offered, 3),
97
+ "total_demand_bid": round(total_demand_bid, 3),
98
+ "leader_price_signal": round(leader_price_signal, 3),
99
+ "leader_adjusted_bids": leader_adjusted["adjusted_count"],
100
+ "post_signal_book": priced_bids,
101
+ "matches": matched,
102
+ }
smartgrid_mas/engine/policies.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ from smartgrid_mas.models import AgentBid, JointAction, MarketObservation
4
+
5
+
6
+ def random_joint_action(obs: MarketObservation, rng: random.Random) -> JointAction:
7
+ bids = [
8
+ AgentBid(
9
+ agent_id="renewable_1",
10
+ role="renewable_prosumer",
11
+ bid_type="supply",
12
+ quantity_mwh=max(0.0, rng.uniform(10.0, obs.renewable_availability_mwh)),
13
+ price_usd_per_mwh=rng.uniform(10.0, 40.0),
14
+ ),
15
+ AgentBid(
16
+ agent_id="peaker_1",
17
+ role="peaker_plant",
18
+ bid_type="supply",
19
+ quantity_mwh=rng.uniform(5.0, obs.peaker_capacity_mwh),
20
+ price_usd_per_mwh=rng.uniform(35.0, 80.0),
21
+ ),
22
+ AgentBid(
23
+ agent_id="industrial_1",
24
+ role="industrial_load",
25
+ bid_type="demand",
26
+ quantity_mwh=rng.uniform(0.6 * obs.demand_mwh, 1.1 * obs.demand_mwh),
27
+ price_usd_per_mwh=rng.uniform(30.0, 95.0),
28
+ ),
29
+ ]
30
+ return JointAction(
31
+ bids=bids,
32
+ ev_charge_mwh=rng.uniform(0.0, 8.0),
33
+ ev_discharge_mwh=rng.uniform(0.0, 8.0),
34
+ )
35
+
36
+
37
+ def heuristic_joint_action(obs: MarketObservation, personality: str = "balanced") -> JointAction:
38
+ demand = obs.demand_mwh
39
+ renewable_offer = min(obs.renewable_availability_mwh, demand * 0.55)
40
+ peaker_offer = min(obs.peaker_capacity_mwh, max(0.0, demand - renewable_offer))
41
+
42
+ if personality == "greedy":
43
+ industrial_price = 95.0
44
+ peaker_price = max(62.0, obs.leader_price_signal * 1.08)
45
+ charge = 1.0
46
+ discharge = 5.0 if obs.scarcity_index > 0.2 else 2.0
47
+ elif personality == "risk_averse":
48
+ industrial_price = 90.0
49
+ peaker_price = max(54.0, obs.leader_price_signal * 0.98)
50
+ charge = 5.0 if obs.renewable_availability_mwh > demand else 1.0
51
+ discharge = 2.0 if obs.scarcity_index > 0.35 else 0.0
52
+ else:
53
+ industrial_price = 85.0
54
+ peaker_price = max(58.0, obs.leader_price_signal * 1.02)
55
+ charge = 3.0 if obs.renewable_availability_mwh > demand else 0.0
56
+ discharge = 4.0 if obs.renewable_availability_mwh < 0.8 * demand else 0.0
57
+
58
+ bids = [
59
+ AgentBid(
60
+ agent_id="renewable_1",
61
+ role="renewable_prosumer",
62
+ bid_type="supply",
63
+ quantity_mwh=max(0.0, renewable_offer),
64
+ price_usd_per_mwh=20.0,
65
+ ),
66
+ AgentBid(
67
+ agent_id="peaker_1",
68
+ role="peaker_plant",
69
+ bid_type="supply",
70
+ quantity_mwh=max(0.0, peaker_offer),
71
+ price_usd_per_mwh=peaker_price,
72
+ ),
73
+ AgentBid(
74
+ agent_id="industrial_1",
75
+ role="industrial_load",
76
+ bid_type="demand",
77
+ quantity_mwh=demand,
78
+ price_usd_per_mwh=industrial_price,
79
+ ),
80
+ ]
81
+
82
+ return JointAction(bids=bids, ev_charge_mwh=charge, ev_discharge_mwh=discharge)
83
+
84
+
85
+ def adaptive_stackelberg_action(obs: MarketObservation, personality: str = "balanced") -> JointAction:
86
+ demand = obs.demand_mwh
87
+ scarcity = max(0.0, obs.scarcity_index)
88
+ leader = max(1.0, obs.leader_price_signal)
89
+
90
+ renewable_offer = min(obs.renewable_availability_mwh, demand * (0.52 + 0.18 * (1.0 - scarcity)))
91
+ peaker_offer = min(obs.peaker_capacity_mwh, max(0.0, demand - renewable_offer) * (1.0 + 0.25 * scarcity))
92
+
93
+ if personality == "opportunistic":
94
+ peaker_markup = 1.16
95
+ load_budget = 1.6
96
+ charge_bias = 0.5
97
+ elif personality == "risk_averse":
98
+ peaker_markup = 1.03
99
+ load_budget = 1.35
100
+ charge_bias = 1.25
101
+ else:
102
+ peaker_markup = 1.1
103
+ load_budget = 1.45
104
+ charge_bias = 1.0
105
+
106
+ bids = [
107
+ AgentBid(
108
+ agent_id="renewable_1",
109
+ role="renewable_prosumer",
110
+ bid_type="supply",
111
+ quantity_mwh=max(0.0, renewable_offer),
112
+ price_usd_per_mwh=max(15.0, leader * 0.82),
113
+ ),
114
+ AgentBid(
115
+ agent_id="peaker_1",
116
+ role="peaker_plant",
117
+ bid_type="supply",
118
+ quantity_mwh=max(0.0, peaker_offer),
119
+ price_usd_per_mwh=max(42.0, leader * peaker_markup),
120
+ ),
121
+ AgentBid(
122
+ agent_id="industrial_1",
123
+ role="industrial_load",
124
+ bid_type="demand",
125
+ quantity_mwh=demand,
126
+ price_usd_per_mwh=leader * load_budget,
127
+ ),
128
+ ]
129
+
130
+ if scarcity > 0.25:
131
+ discharge = min(obs.ev_storage_mwh, 3.0 + 8.0 * scarcity)
132
+ charge = 0.0
133
+ else:
134
+ charge = min(obs.ev_storage_capacity_mwh - obs.ev_storage_mwh, 2.0 * charge_bias)
135
+ discharge = 0.0
136
+
137
+ return JointAction(bids=bids, ev_charge_mwh=max(0.0, charge), ev_discharge_mwh=max(0.0, discharge))
smartgrid_mas/engine/reward.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smartgrid_mas.models import MarketReward
2
+
3
+
4
+ def compute_reward(dispatch: dict, clearing_price: float, demand_mwh: float, prior_gap: float) -> MarketReward:
5
+ delivered = dispatch["delivered_supply_mwh"]
6
+ unmet = dispatch["unmet_demand_mwh"]
7
+ oversupply = dispatch["oversupply_mwh"]
8
+ correction_count = dispatch["correction_count"]
9
+
10
+ demand_satisfaction = min(1.0, delivered / max(demand_mwh, 1e-6))
11
+
12
+ unit_cost = clearing_price if clearing_price > 0 else 45.0
13
+ total_cost = delivered * unit_cost
14
+ cost_efficiency = max(0.0, 1.0 - total_cost / 12000.0)
15
+
16
+ renewable_utilization = min(1.0, dispatch["renewable_dispatch_mwh"] / max(delivered, 1e-6))
17
+
18
+ current_gap = delivered - demand_mwh
19
+ stability_delta = abs(current_gap - prior_gap)
20
+ stability_score = max(0.0, 1.0 - stability_delta / 80.0)
21
+
22
+ infeasibility_penalty = min(1.0, correction_count * 0.15 + dispatch["storage_loss_mwh"] * 0.01)
23
+ blackout_penalty = min(1.0, unmet / max(demand_mwh, 1e-6))
24
+
25
+ raw = (
26
+ 0.34 * demand_satisfaction
27
+ + 0.23 * cost_efficiency
28
+ + 0.18 * renewable_utilization
29
+ + 0.15 * stability_score
30
+ - 0.2 * infeasibility_penalty
31
+ - 0.2 * blackout_penalty
32
+ - 0.03 * min(1.0, oversupply / max(demand_mwh, 1e-6))
33
+ )
34
+ score = max(0.0, min(1.0, raw))
35
+
36
+ reason = (
37
+ f"delivered={delivered:.1f} demand={demand_mwh:.1f} unmet={unmet:.1f} "
38
+ f"price={unit_cost:.1f} corrections={correction_count}"
39
+ )
40
+
41
+ return MarketReward(
42
+ score=score,
43
+ reason=reason,
44
+ demand_satisfaction_score=demand_satisfaction,
45
+ cost_efficiency_score=cost_efficiency,
46
+ renewable_utilization_score=renewable_utilization,
47
+ stability_score=stability_score,
48
+ infeasibility_penalty=infeasibility_penalty,
49
+ blackout_penalty=blackout_penalty,
50
+ )
smartgrid_mas/env.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import uuid
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, Optional
5
+
6
+ from smartgrid_mas.engine.dynamics import evolve_grid
7
+ from smartgrid_mas.engine.ldu import enforce_dispatch
8
+ from smartgrid_mas.engine.market import clear_market
9
+ from smartgrid_mas.engine.reward import compute_reward
10
+ from smartgrid_mas.models import (
11
+ JointAction,
12
+ MarketObservation,
13
+ MarketReward,
14
+ ResetResponse,
15
+ StateResponse,
16
+ StepResponse,
17
+ )
18
+ from smartgrid_mas.tasks import TaskConfig, get_task
19
+
20
+
21
+ SCHEMA_INFO = (
22
+ "Provide a JointAction with supply and demand bids from multiple agents plus EV charge/discharge "
23
+ "commands. Market clears bids first, then LDU enforces physical feasibility and logs corrections."
24
+ )
25
+
26
+
27
+ @dataclass
28
+ class Session:
29
+ task: TaskConfig
30
+ rng: random.Random
31
+ session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
32
+ step: int = 0
33
+ done: bool = False
34
+ demand_mwh: float = 0.0
35
+ renewable_mwh: float = 0.0
36
+ peaker_capacity_mwh: float = 0.0
37
+ ev_storage_mwh: float = 0.0
38
+ ev_storage_capacity_mwh: float = 0.0
39
+ base_price: float = 0.0
40
+ last_clearing_price: float = 0.0
41
+ prior_gap: float = 0.0
42
+ correction_count: int = 0
43
+ infeasible_actions: int = 0
44
+ total_demand_met: float = 0.0
45
+ total_cost: float = 0.0
46
+ reward_history: list = field(default_factory=list)
47
+ event_log: list = field(default_factory=list)
48
+ shock_seen: bool = False
49
+ personalities: Dict[str, str] = field(default_factory=dict)
50
+
51
+ def to_observation(self, hint: Optional[str] = None, error_message: Optional[str] = None) -> MarketObservation:
52
+ public_signal = (
53
+ "Shock regime active; renewable volatility is elevated"
54
+ if self.shock_seen
55
+ else "Normal regime; optimize demand satisfaction with low infeasibility"
56
+ )
57
+ return MarketObservation(
58
+ step=self.step,
59
+ steps_taken=self.step,
60
+ max_steps=self.task.max_steps,
61
+ demand_mwh=round(self.demand_mwh, 3),
62
+ renewable_availability_mwh=round(self.renewable_mwh, 3),
63
+ peaker_capacity_mwh=round(self.peaker_capacity_mwh, 3),
64
+ ev_storage_mwh=round(self.ev_storage_mwh, 3),
65
+ ev_storage_capacity_mwh=round(self.ev_storage_capacity_mwh, 3),
66
+ last_clearing_price=round(self.last_clearing_price, 3),
67
+ leader_price_signal=round(self.base_price, 3),
68
+ scarcity_index=round(max(0.0, (self.demand_mwh - self.renewable_mwh) / max(self.demand_mwh, 1e-6)), 4),
69
+ shock_active=self.shock_seen,
70
+ public_signal=public_signal,
71
+ schema_info=SCHEMA_INFO,
72
+ hint=hint,
73
+ error_message=error_message,
74
+ )
75
+
76
+
77
+ class SmartGridMarketEnv:
78
+ def __init__(self):
79
+ self._sessions: Dict[str, Session] = {}
80
+ self._latest_session_id: Optional[str] = None
81
+
82
+ def reset(self, task_id: str = "default", seed: Optional[int] = None) -> ResetResponse:
83
+ task = get_task(task_id)
84
+ rng = random.Random(seed)
85
+ session = Session(
86
+ task=task,
87
+ rng=rng,
88
+ demand_mwh=task.initial_demand_mwh,
89
+ renewable_mwh=task.initial_renewable_mwh,
90
+ peaker_capacity_mwh=task.peaker_capacity_mwh,
91
+ ev_storage_mwh=task.ev_storage_mwh,
92
+ ev_storage_capacity_mwh=task.ev_storage_capacity_mwh,
93
+ base_price=task.base_price_usd_per_mwh,
94
+ last_clearing_price=task.base_price_usd_per_mwh,
95
+ personalities={
96
+ "renewable_1": rng.choice(["opportunistic", "balanced"]),
97
+ "peaker_1": rng.choice(["greedy", "balanced", "risk_averse"]),
98
+ "industrial_1": rng.choice(["risk_averse", "balanced"]),
99
+ "ev_1": rng.choice(["balanced", "risk_averse"]),
100
+ },
101
+ )
102
+ self._sessions[session.session_id] = session
103
+ self._latest_session_id = session.session_id
104
+
105
+ return ResetResponse(
106
+ session_id=session.session_id,
107
+ task_id=task.task_id,
108
+ task_description=task.description,
109
+ schema_info=SCHEMA_INFO,
110
+ steps_taken=0,
111
+ observation=session.to_observation(hint=task.hint),
112
+ )
113
+
114
+ def step(self, action: JointAction, session_id: Optional[str] = None) -> StepResponse:
115
+ session = self._get_session(session_id)
116
+ if session.done:
117
+ return StepResponse(
118
+ observation=session.to_observation(error_message="Episode finished. Call reset."),
119
+ reward=compute_reward(
120
+ dispatch={
121
+ "delivered_supply_mwh": 0.0,
122
+ "unmet_demand_mwh": 0.0,
123
+ "oversupply_mwh": 0.0,
124
+ "correction_count": 0,
125
+ "storage_loss_mwh": 0.0,
126
+ "renewable_dispatch_mwh": 0.0,
127
+ },
128
+ clearing_price=session.last_clearing_price,
129
+ demand_mwh=max(1.0, session.demand_mwh),
130
+ prior_gap=0.0,
131
+ ),
132
+ done=True,
133
+ truncated=False,
134
+ info={"error": "episode_done"},
135
+ )
136
+
137
+ market = clear_market(action.bids, leader_price_signal=session.base_price)
138
+ dispatch, next_storage = enforce_dispatch(
139
+ market_result=market,
140
+ demand_mwh=session.demand_mwh,
141
+ renewable_available_mwh=session.renewable_mwh,
142
+ peaker_capacity_mwh=session.peaker_capacity_mwh,
143
+ ev_storage_mwh=session.ev_storage_mwh,
144
+ ev_storage_capacity_mwh=session.ev_storage_capacity_mwh,
145
+ ev_charge_mwh=action.ev_charge_mwh,
146
+ ev_discharge_mwh=action.ev_discharge_mwh,
147
+ )
148
+
149
+ reward = compute_reward(
150
+ dispatch=dispatch,
151
+ clearing_price=market["clearing_price"] or session.base_price,
152
+ demand_mwh=session.demand_mwh,
153
+ prior_gap=session.prior_gap,
154
+ )
155
+
156
+ session.step += 1
157
+ session.ev_storage_mwh = next_storage
158
+ session.last_clearing_price = market["clearing_price"] or session.base_price
159
+ session.prior_gap = dispatch["delivered_supply_mwh"] - session.demand_mwh
160
+ session.correction_count += dispatch["correction_count"]
161
+ if dispatch["correction_count"] > 0:
162
+ session.infeasible_actions += 1
163
+ session.total_demand_met += min(session.demand_mwh, dispatch["delivered_supply_mwh"])
164
+ session.total_cost += dispatch["delivered_supply_mwh"] * session.last_clearing_price
165
+ session.reward_history.append(reward.score)
166
+
167
+ private_views = self._build_private_agent_views(session, market, dispatch)
168
+
169
+ next_demand, next_renewable, next_price, dyn_info = evolve_grid(
170
+ demand_mwh=session.demand_mwh,
171
+ renewable_mwh=session.renewable_mwh,
172
+ base_price_usd_per_mwh=session.base_price,
173
+ step=session.step,
174
+ task=session.task,
175
+ rng=session.rng,
176
+ )
177
+ session.demand_mwh = next_demand
178
+ session.renewable_mwh = next_renewable
179
+ session.base_price = next_price
180
+ session.shock_seen = session.shock_seen or dyn_info["shock_active"]
181
+
182
+ event = {
183
+ "step": session.step,
184
+ "market": market,
185
+ "dispatch": dispatch,
186
+ "reward": reward.model_dump(),
187
+ "dynamics": dyn_info,
188
+ "agent_private_views": private_views,
189
+ }
190
+ session.event_log.append(event)
191
+
192
+ done = session.step >= session.task.max_steps
193
+ session.done = done
194
+
195
+ info = {
196
+ "market": market,
197
+ "dispatch": dispatch,
198
+ "dynamics": dyn_info,
199
+ "agent_private_views": private_views,
200
+ "summary": {
201
+ "avg_reward": round(sum(session.reward_history) / len(session.reward_history), 4),
202
+ "total_demand_met_mwh": round(session.total_demand_met, 3),
203
+ "total_cost_usd": round(session.total_cost, 3),
204
+ "infeasible_actions": session.infeasible_actions,
205
+ "ldu_corrections": session.correction_count,
206
+ "leader_adjusted_bids": market["leader_adjusted_bids"],
207
+ "personality_map": session.personalities,
208
+ },
209
+ }
210
+
211
+ return StepResponse(
212
+ observation=session.to_observation(),
213
+ reward=reward,
214
+ done=done,
215
+ truncated=False,
216
+ info=info,
217
+ )
218
+
219
+ def state(self, session_id: Optional[str] = None) -> StateResponse:
220
+ session = self._get_session(session_id)
221
+ return StateResponse(
222
+ current_task_id=session.task.task_id,
223
+ steps_taken=session.step,
224
+ episode_done=session.done,
225
+ observation=session.to_observation(),
226
+ )
227
+
228
+ def events(self, session_id: Optional[str] = None) -> Dict:
229
+ session = self._get_session(session_id)
230
+ return {"session_id": session.session_id, "events": session.event_log[-50:]}
231
+
232
+ def get_schema(self) -> Dict:
233
+ return {
234
+ "action_schema": JointAction.model_json_schema(),
235
+ "observation_schema": MarketObservation.model_json_schema(),
236
+ "reward_schema": MarketReward.model_json_schema(),
237
+ "tasks": ["default"],
238
+ "notes": "Hybrid Theme 1+2+3.1 baseline implementation with LDU as core physical layer",
239
+ }
240
+
241
+ def _get_session(self, session_id: Optional[str]) -> Session:
242
+ sid = session_id or self._latest_session_id
243
+ if sid is None or sid not in self._sessions:
244
+ raise KeyError("No active session. Call /reset first.")
245
+ return self._sessions[sid]
246
+
247
+ def _build_private_agent_views(self, session: Session, market: Dict, dispatch: Dict) -> Dict[str, Dict]:
248
+ scarcity = max(0.0, (session.demand_mwh - session.renewable_mwh) / max(session.demand_mwh, 1e-6))
249
+ spread = max(0.0, session.base_price - session.last_clearing_price)
250
+ return {
251
+ "renewable_1": {
252
+ "personality": session.personalities.get("renewable_1", "balanced"),
253
+ "curtailment_risk": round(max(0.0, session.renewable_mwh - market.get("cleared_mwh", 0.0)), 3),
254
+ "forecast_bias": round(session.rng.uniform(-3.0, 3.0), 3),
255
+ },
256
+ "peaker_1": {
257
+ "personality": session.personalities.get("peaker_1", "balanced"),
258
+ "scarcity_index": round(scarcity, 4),
259
+ "margin_signal": round(market.get("clearing_price", session.base_price) - 42.0, 3),
260
+ },
261
+ "industrial_1": {
262
+ "personality": session.personalities.get("industrial_1", "balanced"),
263
+ "budget_pressure": round(
264
+ market.get("clearing_price", session.base_price) / max(session.base_price, 1e-6),
265
+ 4,
266
+ ),
267
+ "unmet_demand_mwh": dispatch["unmet_demand_mwh"],
268
+ },
269
+ "ev_1": {
270
+ "personality": session.personalities.get("ev_1", "balanced"),
271
+ "soc_ratio": round(session.ev_storage_mwh / max(session.ev_storage_capacity_mwh, 1e-6), 4),
272
+ "arbitrage_spread": round(spread, 3),
273
+ },
274
+ }
smartgrid_mas/models.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Literal, Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ AgentRole = Literal[
7
+ "renewable_prosumer",
8
+ "ev_aggregator",
9
+ "peaker_plant",
10
+ "industrial_load",
11
+ ]
12
+ BidType = Literal["supply", "demand"]
13
+
14
+
15
+ class AgentBid(BaseModel):
16
+ agent_id: str = Field(..., description="Unique agent id")
17
+ role: AgentRole = Field(..., description="Agent role")
18
+ bid_type: BidType = Field(..., description="Supply or demand bid")
19
+ quantity_mwh: float = Field(..., ge=0.0, description="Bid quantity in MWh")
20
+ price_usd_per_mwh: float = Field(..., ge=0.0, description="Bid price")
21
+
22
+
23
+ class JointAction(BaseModel):
24
+ bids: List[AgentBid] = Field(default_factory=list, description="Bids from all agents")
25
+ ev_charge_mwh: float = Field(0.0, ge=0.0, description="EV fleet charge command")
26
+ ev_discharge_mwh: float = Field(0.0, ge=0.0, description="EV fleet discharge command")
27
+
28
+
29
+ class MarketObservation(BaseModel):
30
+ step: int
31
+ steps_taken: int
32
+ max_steps: int
33
+ demand_mwh: float
34
+ renewable_availability_mwh: float
35
+ peaker_capacity_mwh: float
36
+ ev_storage_mwh: float
37
+ ev_storage_capacity_mwh: float
38
+ last_clearing_price: float
39
+ leader_price_signal: float
40
+ scarcity_index: float
41
+ shock_active: bool
42
+ public_signal: str
43
+ schema_info: str
44
+ hint: Optional[str] = None
45
+ error_message: Optional[str] = None
46
+
47
+
48
+ class MarketReward(BaseModel):
49
+ score: float = Field(..., ge=0.0, le=1.0)
50
+ reason: str
51
+ demand_satisfaction_score: float
52
+ cost_efficiency_score: float
53
+ renewable_utilization_score: float
54
+ stability_score: float
55
+ infeasibility_penalty: float
56
+ blackout_penalty: float
57
+
58
+
59
+ class ResetRequest(BaseModel):
60
+ task_id: str = "default"
61
+ seed: Optional[int] = None
62
+
63
+
64
+ class ResetResponse(BaseModel):
65
+ session_id: str
66
+ task_id: str
67
+ task_description: str
68
+ schema_info: str
69
+ steps_taken: int
70
+ observation: MarketObservation
71
+
72
+
73
+ class StepRequest(BaseModel):
74
+ action: JointAction
75
+
76
+
77
+ class StepResponse(BaseModel):
78
+ observation: MarketObservation
79
+ reward: MarketReward
80
+ done: bool
81
+ truncated: bool
82
+ info: Dict
83
+
84
+
85
+ class StateResponse(BaseModel):
86
+ current_task_id: str
87
+ steps_taken: int
88
+ episode_done: bool
89
+ observation: Optional[MarketObservation] = None
90
+
91
+
92
+ class EpisodeSummary(BaseModel):
93
+ average_reward: float
94
+ total_demand_met: float
95
+ total_cost: float
96
+ infeasible_actions: int
97
+ corrections: int
98
+ shock_response_score: float
smartgrid_mas/tasks.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Dict
3
+
4
+
5
+ @dataclass
6
+ class TaskConfig:
7
+ task_id: str
8
+ description: str
9
+ max_steps: int
10
+ initial_demand_mwh: float
11
+ initial_renewable_mwh: float
12
+ peaker_capacity_mwh: float
13
+ ev_storage_mwh: float
14
+ ev_storage_capacity_mwh: float
15
+ base_price_usd_per_mwh: float
16
+ demand_trend_mwh: float
17
+ renewable_trend_mwh: float
18
+ demand_volatility: float
19
+ renewable_volatility: float
20
+ shock_step: int
21
+ shock_renewable_drop: float
22
+ hint: str
23
+
24
+
25
+ TASKS: Dict[str, TaskConfig] = {
26
+ "default": TaskConfig(
27
+ task_id="default",
28
+ description=(
29
+ "Hybrid multi-agent smart-grid market simulation. Agents submit strategic bids, "
30
+ "market clears, and LDU enforces physical feasibility with correction logs."
31
+ ),
32
+ max_steps=24,
33
+ initial_demand_mwh=120.0,
34
+ initial_renewable_mwh=70.0,
35
+ peaker_capacity_mwh=85.0,
36
+ ev_storage_mwh=25.0,
37
+ ev_storage_capacity_mwh=60.0,
38
+ base_price_usd_per_mwh=45.0,
39
+ demand_trend_mwh=1.2,
40
+ renewable_trend_mwh=-0.6,
41
+ demand_volatility=4.0,
42
+ renewable_volatility=6.0,
43
+ shock_step=16,
44
+ shock_renewable_drop=22.0,
45
+ hint=(
46
+ "Coordinate bids with expected dispatch feasibility. Market-optimal bids that violate "
47
+ "physical constraints are corrected by LDU and reduce reward."
48
+ ),
49
+ )
50
+ }
51
+
52
+
53
+ def get_task(task_id: str) -> TaskConfig:
54
+ if task_id not in TASKS:
55
+ raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASKS.keys())}")
56
+ return TASKS[task_id]
smartgrid_mas/train_baseline.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import csv
3
+ import os
4
+ import random
5
+ from typing import List
6
+
7
+ import matplotlib.pyplot as plt
8
+
9
+ from smartgrid_mas.engine.policies import (
10
+ adaptive_stackelberg_action,
11
+ heuristic_joint_action,
12
+ random_joint_action,
13
+ )
14
+ from smartgrid_mas.env import SmartGridMarketEnv
15
+
16
+
17
+ def run_episode(env: SmartGridMarketEnv, policy_name: str, seed: int) -> float:
18
+ reset = env.reset(task_id="default", seed=seed)
19
+ sid = reset.session_id
20
+ obs = reset.observation
21
+ rng = random.Random(seed)
22
+
23
+ rewards: List[float] = []
24
+ while True:
25
+ if policy_name == "random":
26
+ action = random_joint_action(obs, rng)
27
+ elif policy_name == "adaptive":
28
+ action = adaptive_stackelberg_action(obs, personality="balanced")
29
+ else:
30
+ action = heuristic_joint_action(obs, personality="balanced")
31
+
32
+ step = env.step(action=action, session_id=sid)
33
+ rewards.append(step.reward.score)
34
+ obs = step.observation
35
+ if step.done:
36
+ break
37
+
38
+ return sum(rewards) / max(1, len(rewards))
39
+
40
+
41
+ def main() -> None:
42
+ parser = argparse.ArgumentParser(description="Minimal baseline training/eval runner")
43
+ parser.add_argument("--episodes", type=int, default=30)
44
+ parser.add_argument("--outdir", type=str, default="artifacts")
45
+ args = parser.parse_args()
46
+
47
+ os.makedirs(args.outdir, exist_ok=True)
48
+
49
+ env = SmartGridMarketEnv()
50
+ random_curve = []
51
+ heuristic_curve = []
52
+ adaptive_curve = []
53
+
54
+ for ep in range(args.episodes):
55
+ random_curve.append(run_episode(env, "random", seed=1000 + ep))
56
+ heuristic_curve.append(run_episode(env, "heuristic", seed=2000 + ep))
57
+ adaptive_curve.append(run_episode(env, "adaptive", seed=3000 + ep))
58
+
59
+ csv_path = os.path.join(args.outdir, "baseline_metrics.csv")
60
+ with open(csv_path, "w", newline="", encoding="utf-8") as f:
61
+ writer = csv.writer(f)
62
+ writer.writerow(["episode", "random_avg_reward", "heuristic_avg_reward", "adaptive_avg_reward"])
63
+ for i, (r, h, a) in enumerate(zip(random_curve, heuristic_curve, adaptive_curve), start=1):
64
+ writer.writerow([i, round(r, 6), round(h, 6), round(a, 6)])
65
+
66
+ plt.figure(figsize=(10, 5))
67
+ plt.plot(range(1, args.episodes + 1), random_curve, label="Random baseline")
68
+ plt.plot(range(1, args.episodes + 1), heuristic_curve, label="Heuristic improved")
69
+ plt.plot(range(1, args.episodes + 1), adaptive_curve, label="Adaptive Stackelberg")
70
+ plt.xlabel("Episode")
71
+ plt.ylabel("Average reward")
72
+ plt.title("Baseline vs Improved Policy Reward")
73
+ plt.legend()
74
+ plt.grid(alpha=0.25)
75
+
76
+ fig_path = os.path.join(args.outdir, "reward_comparison.png")
77
+ plt.tight_layout()
78
+ plt.savefig(fig_path, dpi=160)
79
+
80
+ print(f"Saved metrics CSV: {csv_path}")
81
+ print(f"Saved reward plot: {fig_path}")
82
+
83
+
84
+ if __name__ == "__main__":
85
+ main()
training/minimal_train.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from smartgrid_mas.train_baseline import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()