import os import langextract as lx import textwrap from dotenv import load_dotenv # Step 1: Load environment variables from a .env file load_dotenv() GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") os.environ["LANGEXTRACT_API_KEY"] = GEMINI_API_KEY # 1. Define the prompt and extraction rules prompt = textwrap.dedent( """\ Extract characters, emotions, and relationships in order of appearance. Use exact text for extractions. Do not paraphrase or overlap entities. Provide meaningful attributes for each entity to add context.""" ) # 2. Provide a high-quality example to guide the model examples = [ lx.data.ExampleData( text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.", extractions=[ lx.data.Extraction( extraction_class="character", extraction_text="ROMEO", attributes={"emotional_state": "wonder"}, ), lx.data.Extraction( extraction_class="emotion", extraction_text="But soft!", attributes={"feeling": "gentle awe"}, ), lx.data.Extraction( extraction_class="relationship", extraction_text="Juliet is the sun", attributes={"type": "metaphor"}, ), ], ) ] # The input text to be processed input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo" # Run the extraction result = lx.extract( text_or_documents=input_text, prompt_description=prompt, examples=examples, model_id="gemini-2.5-flash", ) # Save the results to a JSONL file lx.io.save_annotated_documents( [result], output_name="extraction_results.jsonl", output_dir="." ) # Generate the visualization from the file html_content = lx.visualize("extraction_results.jsonl") with open("visualization.html", "w", encoding="utf-8") as f: f.write(html_content)