Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| from pathlib import Path | |
| # Load data | |
| def load_data(): | |
| data_path = Path("tasks/reactions/01_gaza_ceasefire_resolution_latest.json") | |
| with open(data_path, 'r') as f: | |
| return json.load(f) | |
| def load_system_prompt(country_slug): | |
| """Load the system prompt for a specific country""" | |
| try: | |
| prompt_path = Path(f"agents/representatives/{country_slug}/system-prompt.md") | |
| with open(prompt_path, 'r') as f: | |
| return f.read() | |
| except: | |
| return "System prompt not found for this country." | |
| def load_motion(): | |
| """Load the ceasefire resolution text""" | |
| try: | |
| with open("tasks/motions/01_gaza_ceasefire_resolution.md", 'r') as f: | |
| return f.read() | |
| except: | |
| return "Motion text not found." | |
| def create_vote_summary_chart(data): | |
| vote_summary = data['vote_summary'] | |
| fig = go.Figure(data=[go.Pie( | |
| labels=['Yes', 'No', 'Abstain'], | |
| values=[vote_summary['yes'], vote_summary['no'], vote_summary['abstain']], | |
| marker=dict(colors=['#2ecc71', '#e74c3c', '#f39c12']), | |
| textinfo='label+value+percent', | |
| textfont_size=16 | |
| )]) | |
| fig.update_layout( | |
| title=f"Voting Results (Total: {data['total_votes']} countries)", | |
| height=400, | |
| showlegend=True | |
| ) | |
| return fig | |
| def get_country_response(country_name, data): | |
| """Get the full response for a specific country""" | |
| if not country_name: | |
| return "Select a country to see their full response", "" | |
| for vote in data['votes']: | |
| if vote['country'].lower() == country_name.lower(): | |
| response = f""" | |
| **Vote:** {vote['vote'].upper()} | |
| **Diplomatic Statement:** | |
| {vote['statement']} | |
| """ | |
| return response, vote['country_slug'] | |
| return "Country not found", "" | |
| # Load data | |
| data = load_data() | |
| country_names = sorted([v['country'] for v in data['votes']]) | |
| motion_text = load_motion() | |
| # JSON schema for structured output | |
| json_schema = """{ | |
| "vote": "yes" | "no" | "abstain", | |
| "statement": "Brief explanation (2-4 sentences)" | |
| }""" | |
| # User prompt template | |
| user_prompt_template = """You are voting on the following UN General Assembly resolution: | |
| {RESOLUTION_TEXT} | |
| You must respond with a JSON object containing: | |
| 1. "vote": Your vote - must be exactly one of: "yes", "no", or "abstain" | |
| 2. "statement": A brief statement (2-4 sentences) explaining your country's position | |
| IMPORTANT: Your statement must articulate {COUNTRY_NAME}'s UNIQUE perspective, national interests, and specific reasons for this vote. Reference your country's: | |
| - Historical positions on this issue | |
| - Regional concerns and alliances | |
| - Domestic political considerations | |
| - Specific clauses in the resolution that align with or contradict your interests | |
| Avoid generic diplomatic language. Be specific to {COUNTRY_NAME}'s situation and worldview. | |
| Your response must be valid JSON in this exact format: | |
| { | |
| "vote": "yes", | |
| "statement": "Your explanation here." | |
| }""" | |
| # Create Gradio interface | |
| with gr.Blocks(title="AI Agent UN Experiment", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # AI Agent United Nations: Multi-Agent Simulation System | |
| ## Modeling International Diplomacy with Structured AI Agents | |
| An experimental framework for simulating UN voting behavior using large language models. | |
| Each of 195 UN member states is represented by an AI agent with structured system prompts | |
| that must produce constrained JSON outputs for resolutions. | |
| """) | |
| with gr.Tab("System Architecture"): | |
| gr.Markdown(""" | |
| ## System Design | |
| This is a multi-agent AI system designed to simulate diplomatic decision-making in international forums. | |
| ### Core Components | |
| **1. Agent System Prompts** | |
| - Each country has a unique system prompt (195 total) | |
| - Prompts are generic templates - identical structure for all countries | |
| - Only country name and P5 status differ between prompts | |
| - No country-specific policy positions are hardcoded | |
| - AI must infer positions from training data about each country | |
| **2. Structured Output Constraints** | |
| - All agents must return valid JSON | |
| - Strict schema enforcement | |
| - Two required fields: `vote` and `statement` | |
| - Vote must be one of: `yes`, `no`, `abstain` | |
| - Statement must be 2-4 sentences | |
| **3. Task Running Model** | |
| - Python script iterates through all 195 country agents | |
| - Each agent receives: system prompt + resolution text + output schema | |
| - Agent processes and returns structured JSON response | |
| - Results aggregated into single JSON file with metadata | |
| **4. Model Configuration** | |
| - Primary model: Claude 3.5 Sonnet (claude-3-5-sonnet-20241022) | |
| - Temperature: 0.7 (balance between consistency and variation) | |
| - Max tokens: 800 per response | |
| - Provider: Anthropic API (cloud) | |
| ### What This Tests | |
| - **LLM Knowledge**: How well models understand different countries' foreign policies | |
| - **Structured Outputs**: Ability to consistently produce valid JSON under constraints | |
| - **Multi-Agent Systems**: Coordinating 195 independent AI agents | |
| - **Prompt Engineering**: Generic templates producing specific behaviors | |
| - **Consistency**: Whether similar countries produce similar responses | |
| """) | |
| with gr.Tab("System Prompt Design"): | |
| gr.Markdown(""" | |
| ## Agent System Prompt Template | |
| All country agents use the same prompt structure. The AI must infer country-specific positions | |
| from its training data about each nation's history, alliances, and interests. | |
| **Template Components:** | |
| 1. **Role and Identity** - Defines the country and UN membership status | |
| 2. **Core Responsibilities** - Instructions to represent national interests | |
| 3. **Behavioral Guidelines** - How to stay in character diplomatically | |
| 4. **Key Considerations** - What factors to analyze (security, economics, alliances) | |
| 5. **Instructions** - Process for evaluating and voting on resolutions | |
| **View any country's system prompt below:** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| country_selector = gr.Dropdown( | |
| choices=country_names, | |
| label="Select Country", | |
| value="United States" | |
| ) | |
| gr.Markdown(""" | |
| **Compare examples:** | |
| - P5 members: United States, China, Russia, United Kingdom, France | |
| - Regional powers: Brazil, India, South Africa, Nigeria | |
| - Small states: Palau, Tuvalu, Monaco | |
| - Key stakeholders: Israel, Palestine, Egypt, Iran | |
| """) | |
| with gr.Column(scale=2): | |
| system_prompt_display = gr.Markdown( | |
| value=load_system_prompt("united-states"), | |
| label="System Prompt" | |
| ) | |
| country_selector.change( | |
| fn=lambda country: load_system_prompt(data['votes'][[v['country'] for v in data['votes']].index(country)]['country_slug']), | |
| inputs=country_selector, | |
| outputs=system_prompt_display | |
| ) | |
| with gr.Tab("Structured Output Schema"): | |
| gr.Markdown(""" | |
| ## JSON Output Constraints | |
| Every agent must produce a valid JSON response conforming to this schema: | |
| """) | |
| gr.Code(json_schema, language="json", label="Required Output Schema") | |
| gr.Markdown(""" | |
| ### Validation Rules | |
| **Vote Field:** | |
| - Type: String (enum) | |
| - Allowed values: `"yes"`, `"no"`, `"abstain"` | |
| - Case-insensitive on input, normalized to lowercase | |
| - Required field - missing value causes error | |
| **Statement Field:** | |
| - Type: String | |
| - Length: 2-4 sentences recommended | |
| - Must be country-specific (not generic) | |
| - Must reference national interests and historical positions | |
| - Required field - missing value causes error | |
| ### Error Handling | |
| If an agent produces invalid output: | |
| 1. JSON parsing attempted with markdown stripping | |
| 2. If parsing fails: agent recorded as `abstain` with error flag | |
| 3. If validation fails: agent recorded as `abstain` with error flag | |
| 4. Error logged for debugging but simulation continues | |
| ### User Prompt Template | |
| Below is the exact prompt template sent to each agent (with variables filled in): | |
| """) | |
| gr.Code(user_prompt_template, language="markdown", label="User Prompt Template") | |
| with gr.Tab("Task Execution"): | |
| gr.Markdown(""" | |
| ## How Simulations Run | |
| ### Execution Flow | |
| ``` | |
| 1. Load motion text from tasks/motions/{motion_id}.md | |
| 2. Load country list from data/bodies/full-member-states.json | |
| 3. For each country (195 total): | |
| a. Load country's system prompt | |
| b. Construct user prompt with motion text | |
| c. Send to AI model (system + user prompt) | |
| d. Parse and validate JSON response | |
| e. Store result with metadata | |
| 4. Aggregate all responses into single JSON file | |
| 5. Calculate vote summary statistics | |
| 6. Save timestamped and "latest" versions | |
| ``` | |
| ### Command Line Interface | |
| **Basic usage:** | |
| ```bash | |
| python scripts/run_motion.py 01_gaza_ceasefire_resolution | |
| ``` | |
| **With options:** | |
| ```bash | |
| # Use specific model | |
| python scripts/run_motion.py 01_gaza_ceasefire_resolution --model claude-3-5-sonnet-20241022 | |
| # Test with sample (5 countries only) | |
| python scripts/run_motion.py 01_gaza_ceasefire_resolution --sample 5 | |
| # Use local model (Ollama) | |
| python scripts/run_motion.py 01_gaza_ceasefire_resolution --provider local --model llama3 | |
| ``` | |
| ### Output Format | |
| Results saved to `tasks/reactions/` as JSON: | |
| - `{motion_id}_{timestamp}.json` - Timestamped archive | |
| - `{motion_id}_latest.json` - Latest simulation (overwritten) | |
| **Metadata included:** | |
| - `motion_id`: Identifier for the resolution | |
| - `timestamp`: ISO 8601 timestamp | |
| - `provider`: cloud or local | |
| - `model`: Model identifier used | |
| - `total_votes`: Number of countries | |
| - `vote_summary`: Counts by vote type | |
| - `votes`: Array of all country responses | |
| ### Configuration | |
| Environment variables (`.env` file): | |
| ``` | |
| ANTHROPIC_API_KEY=your_key_here | |
| MODEL_NAME=claude-3-5-sonnet-20241022 | |
| ``` | |
| """) | |
| with gr.Tab("Case Study: Gaza Ceasefire Resolution"): | |
| gr.Markdown(""" | |
| ## Example Simulation Run | |
| This demonstrates the system with a real UN resolution about a Gaza ceasefire. | |
| All 195 country agents voted on this resolution using the system described above. | |
| """) | |
| gr.Markdown("### The Resolution") | |
| gr.Markdown(motion_text) | |
| gr.Markdown("### Aggregated Results") | |
| with gr.Row(): | |
| with gr.Column(): | |
| vote_chart = gr.Plot(value=create_vote_summary_chart(data)) | |
| with gr.Column(): | |
| gr.Markdown(f""" | |
| ### Vote Summary | |
| - **Yes:** {data['vote_summary']['yes']} ({data['vote_summary']['yes']/data['total_votes']*100:.1f}%) | |
| - **No:** {data['vote_summary']['no']} ({data['vote_summary']['no']/data['total_votes']*100:.1f}%) | |
| - **Abstain:** {data['vote_summary']['abstain']} ({data['vote_summary']['abstain']/data['total_votes']*100:.1f}%) | |
| ### Simulation Metadata | |
| - **Model:** {data['model']} | |
| - **Date:** {data['timestamp'][:10]} | |
| - **Countries:** {data['total_votes']} | |
| - **Provider:** {data['provider']} | |
| """) | |
| gr.Markdown("### Individual Country Responses") | |
| country_inspector = gr.Dropdown( | |
| choices=country_names, | |
| label="Select Country to View Response", | |
| value="United States" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**System Prompt Received:**") | |
| inspector_prompt = gr.Markdown(value=load_system_prompt("united-states")) | |
| with gr.Column(): | |
| gr.Markdown("**JSON Output Produced:**") | |
| inspector_response = gr.Markdown(value=get_country_response("United States", data)[0]) | |
| def update_inspector(country): | |
| response, slug = get_country_response(country, data) | |
| prompt = load_system_prompt(slug) if slug else "Country not found" | |
| return prompt, response | |
| country_inspector.change( | |
| fn=update_inspector, | |
| inputs=country_inspector, | |
| outputs=[inspector_prompt, inspector_response] | |
| ) | |
| gr.Markdown("### Complete Response Data") | |
| votes_data = pd.DataFrame([ | |
| { | |
| 'Country': v['country'], | |
| 'Vote': v['vote'].upper(), | |
| 'Statement': v['statement'] | |
| } | |
| for v in data['votes'] | |
| ]) | |
| gr.Dataframe( | |
| value=votes_data, | |
| height=600, | |
| interactive=False, | |
| column_widths=["15%", "10%", "75%"] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ## About This Project | |
| **AI Agent UN** is an experimental framework for simulating international diplomatic decision-making | |
| using multi-agent AI systems with structured outputs. | |
| ### Research Applications | |
| - Testing LLM knowledge of geopolitics and international relations | |
| - Evaluating structured output consistency across hundreds of agents | |
| - Studying emergent behavior in multi-agent systems | |
| - Educational demonstrations of diplomatic diversity | |
| ### Technical Implementation | |
| - **Model:** Claude 3.5 Sonnet (claude-3-5-sonnet-20241022) | |
| - **Agents:** 195 (one per UN member state) | |
| - **System Prompts:** Generic templates (country-agnostic) | |
| - **Output Format:** Structured JSON with validation | |
| - **Execution:** Python CLI with parallel processing support | |
| - **Storage:** JSON files with metadata | |
| ### Limitations and Disclaimers | |
| This is a simulation for research and educational purposes: | |
| - AI positions are based on training data, not actual policies | |
| - Does NOT predict real government decisions | |
| - Should NOT be considered authoritative | |
| - Real diplomacy involves classified intel and human judgment | |
| - Training data may be outdated or incomplete | |
| ### Open Source | |
| All code, prompts, and data are open source: | |
| - GitHub Repository: https://github.com/danielrosehill/AI-Agent-UN | |
| - System Prompts: https://github.com/danielrosehill/AI-Agent-UN/tree/main/agents/representatives | |
| - Execution Script: https://github.com/danielrosehill/AI-Agent-UN/blob/main/scripts/run_motion.py | |
| - Documentation: https://github.com/danielrosehill/AI-Agent-UN/blob/main/README.md | |
| --- | |
| Built with Gradio | Powered by Anthropic Claude | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |