File size: 4,519 Bytes
88ac8ef
 
 
 
0b0b6b0
 
88ac8ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""HTTP client for the BrowserGym environment."""

from typing import Any, Dict

from openenv_core.http_env_client import HTTPEnvClient, StepResult
from browsergym_env.models import (
    BrowserGymAction,
    BrowserGymObservation,
    BrowserGymState,
)


class BrowserGymEnv(HTTPEnvClient[BrowserGymAction, BrowserGymObservation]):
    """Client for interacting with the BrowserGym environment over HTTP.

    BrowserGym provides unified access to multiple web navigation benchmarks:
    - MiniWoB++: 100+ training tasks (no external infrastructure needed!)
    - WebArena: 812 evaluation tasks (requires backend setup)
    - VisualWebArena: Visual navigation tasks
    - WorkArena: Enterprise automation tasks

    Example usage for TRAINING (MiniWoB - works out of the box):
        ```python
        from envs.browsergym_env import BrowserGymEnv, BrowserGymAction

        # Create environment for MiniWoB training task
        env = BrowserGymEnv.from_docker_image(
            "browsergym-env:latest",
            environment={
                "BROWSERGYM_BENCHMARK": "miniwob",
                "BROWSERGYM_TASK_NAME": "click-test",
            }
        )

        # Reset and get initial observation
        result = env.reset()
        print(f"Task: {result.observation.goal}")
        print(f"Page: {result.observation.text[:200]}")

        # Take actions
        action = BrowserGymAction(action_str="click('Submit button')")
        result = env.step(action)
        print(f"Reward: {result.reward}")
        print(f"Done: {result.done}")

        env.close()
        ```

    Example usage for EVALUATION (WebArena - requires backend):
        ```python
        from envs.browsergym_env import BrowserGymEnv, BrowserGymAction

        # Create environment for WebArena evaluation
        env = BrowserGymEnv.from_docker_image(
            "browsergym-env:latest",
            environment={
                "BROWSERGYM_BENCHMARK": "webarena",
                "BROWSERGYM_TASK_NAME": "0",  # Task 0
                # WebArena backend URLs
                "SHOPPING": "http://your-server:7770",
                "GITLAB": "http://your-server:8023",
                # ... other URLs
            }
        )

        result = env.reset()
        # ... interact with environment
        env.close()
        ```

    Available benchmarks:
        - miniwob: MiniWoB++ tasks (training, no setup required)
        - webarena: WebArena tasks (evaluation, requires backend)
        - visualwebarena: Visual WebArena tasks (evaluation, requires backend)
        - workarena: WorkArena tasks (evaluation, requires backend)
    """

    def _step_payload(self, action: BrowserGymAction) -> Dict[str, Any]:
        """Convert a BrowserGymAction to the JSON payload for the server."""
        return {
            "action_str": action.action_str,
            "metadata": action.metadata,
        }

    def _parse_result(
        self, payload: Dict[str, Any]
    ) -> StepResult[BrowserGymObservation]:
        """Parse the server response into a StepResult."""
        obs_data = payload.get("observation", {})

        observation = BrowserGymObservation(
            text=obs_data.get("text", ""),
            url=obs_data.get("url", ""),
            screenshot=obs_data.get("screenshot"),
            goal=obs_data.get("goal", ""),
            axtree_txt=obs_data.get("axtree_txt", ""),
            pruned_html=obs_data.get("pruned_html", ""),
            error=obs_data.get("error", ""),
            last_action_error=obs_data.get("last_action_error", False),
            done=payload.get("done", False),
            reward=payload.get("reward"),
            metadata=obs_data.get("metadata", {}),
        )

        return StepResult(
            observation=observation,
            reward=payload.get("reward"),
            done=payload.get("done", False),
        )

    def _parse_state(self, payload: Dict[str, Any]) -> BrowserGymState:
        """Parse the server state response into a BrowserGymState object."""
        return BrowserGymState(
            episode_id=payload.get("episode_id"),
            step_count=payload.get("step_count", 0),
            benchmark=payload.get("benchmark", ""),
            task_name=payload.get("task_name", ""),
            task_id=payload.get("task_id"),
            goal=payload.get("goal", ""),
            current_url=payload.get("current_url", ""),
            max_steps=payload.get("max_steps"),
            cum_reward=payload.get("cum_reward", 0.0),
        )