Spaces:
Running
Running
YourUsername
commited on
Commit
·
c3bea8b
1
Parent(s):
2cae13b
add clone
Browse files- Filesystem/demo.txt +1 -0
- Modules/Agent_Terminal.py +212 -0
- Modules/Code_Interpreter.py +60 -0
- Modules/Deep_Research.py +595 -0
- Modules/File_System.py +648 -0
- Modules/Generate_Image.py +141 -0
- Modules/Generate_Speech.py +677 -0
- Modules/Generate_Video.py +184 -0
- Modules/Memory_Manager.py +253 -0
- Modules/Obsidian_Vault.py +495 -0
- Modules/Shell_Command.py +139 -0
- Modules/Web_Fetch.py +317 -0
- Modules/Web_Search.py +499 -0
- Modules/__init__.py +1 -0
- Modules/_docstrings.py +112 -0
- Obsidian/demo.md +3 -0
- README.md +209 -5
- app.py +318 -0
- memories.json +20 -0
- requirements.txt +14 -0
- styles.css +308 -0
Filesystem/demo.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Hello, World!
|
Modules/Agent_Terminal.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import types
|
| 6 |
+
import ast
|
| 7 |
+
from io import StringIO
|
| 8 |
+
from typing import Annotated
|
| 9 |
+
import importlib.metadata
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
from ._docstrings import autodoc
|
| 13 |
+
from .File_System import ROOT_DIR, File_System
|
| 14 |
+
from .Web_Fetch import Web_Fetch
|
| 15 |
+
from .Web_Search import Web_Search
|
| 16 |
+
from .Memory_Manager import Memory_Manager
|
| 17 |
+
from .Generate_Speech import Generate_Speech, List_Kokoro_Voices, List_Supertonic_Voices
|
| 18 |
+
from .Generate_Image import Generate_Image
|
| 19 |
+
from .Generate_Video import Generate_Video
|
| 20 |
+
from .Deep_Research import Deep_Research
|
| 21 |
+
from .Obsidian_Vault import Obsidian_Vault
|
| 22 |
+
from .Shell_Command import Shell_Command
|
| 23 |
+
from .Code_Interpreter import Code_Interpreter
|
| 24 |
+
|
| 25 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 26 |
+
|
| 27 |
+
def search_packages(query: str = "") -> str:
|
| 28 |
+
"""Search for installed Python packages by name. If query is empty, lists all."""
|
| 29 |
+
packages = []
|
| 30 |
+
query = query.lower()
|
| 31 |
+
for dist in importlib.metadata.distributions():
|
| 32 |
+
name = dist.metadata['Name']
|
| 33 |
+
if query in name.lower():
|
| 34 |
+
packages.append(f"{name} ({dist.version})")
|
| 35 |
+
packages.sort()
|
| 36 |
+
if not packages:
|
| 37 |
+
return f"No packages found matching '{query}'."
|
| 38 |
+
return "\n".join(packages)
|
| 39 |
+
|
| 40 |
+
def _get_tools_map():
|
| 41 |
+
return {
|
| 42 |
+
"Web_Fetch": Web_Fetch,
|
| 43 |
+
"Web_Search": Web_Search,
|
| 44 |
+
"Memory_Manager": Memory_Manager,
|
| 45 |
+
"Generate_Speech": Generate_Speech,
|
| 46 |
+
"List_Kokoro_Voices": List_Kokoro_Voices,
|
| 47 |
+
"List_Supertonic_Voices": List_Supertonic_Voices,
|
| 48 |
+
"Generate_Image": Generate_Image,
|
| 49 |
+
"Generate_Video": Generate_Video,
|
| 50 |
+
"Deep_Research": Deep_Research,
|
| 51 |
+
"File_System": File_System,
|
| 52 |
+
"Obsidian_Vault": Obsidian_Vault,
|
| 53 |
+
"Shell_Command": Shell_Command,
|
| 54 |
+
"Code_Interpreter": Code_Interpreter,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
def list_tools() -> list[str]:
|
| 58 |
+
"""List all available tools in the Code Interpreter environment."""
|
| 59 |
+
return list(_get_tools_map().keys())
|
| 60 |
+
|
| 61 |
+
def search_tools(query: str) -> str:
|
| 62 |
+
"""Search for tools by name or description. Returns usage info for matches."""
|
| 63 |
+
query = query.lower()
|
| 64 |
+
matches = []
|
| 65 |
+
tools = _get_tools_map()
|
| 66 |
+
for name, func in tools.items():
|
| 67 |
+
doc = (func.__doc__ or "").lower()
|
| 68 |
+
if query in name.lower() or query in doc:
|
| 69 |
+
matches.append((name, func))
|
| 70 |
+
|
| 71 |
+
if not matches:
|
| 72 |
+
return f"No tools found matching '{query}'."
|
| 73 |
+
|
| 74 |
+
output = []
|
| 75 |
+
for name, func in matches:
|
| 76 |
+
output.append(f"--- {name} ---")
|
| 77 |
+
output.append(func.__doc__ or "No documentation available.")
|
| 78 |
+
output.append("")
|
| 79 |
+
return "\n".join(output)
|
| 80 |
+
|
| 81 |
+
def usage(tool_name: str) -> str:
|
| 82 |
+
"""Get detailed usage information for a specific tool."""
|
| 83 |
+
tools = _get_tools_map()
|
| 84 |
+
if tool_name not in tools:
|
| 85 |
+
return f"Tool '{tool_name}' not found. Available tools: {', '.join(tools.keys())}"
|
| 86 |
+
func = tools[tool_name]
|
| 87 |
+
return f"--- {tool_name} ---\n{func.__doc__ or 'No documentation available.'}"
|
| 88 |
+
|
| 89 |
+
def _initialize_mock_modules():
|
| 90 |
+
"""
|
| 91 |
+
Registers a mock 'functions' module in sys.modules so that LLMs
|
| 92 |
+
can do 'from functions import ...' without error.
|
| 93 |
+
"""
|
| 94 |
+
mock_module = types.ModuleType("functions")
|
| 95 |
+
|
| 96 |
+
# Add tools
|
| 97 |
+
for name, tool in _get_tools_map().items():
|
| 98 |
+
setattr(mock_module, name, tool)
|
| 99 |
+
|
| 100 |
+
# Add helpers
|
| 101 |
+
helpers = {
|
| 102 |
+
"list_tools": list_tools,
|
| 103 |
+
"search_tools": search_tools,
|
| 104 |
+
"usage": usage,
|
| 105 |
+
"search_packages": search_packages,
|
| 106 |
+
}
|
| 107 |
+
for name, func in helpers.items():
|
| 108 |
+
setattr(mock_module, name, func)
|
| 109 |
+
|
| 110 |
+
sys.modules["functions"] = mock_module
|
| 111 |
+
|
| 112 |
+
_initialize_mock_modules()
|
| 113 |
+
|
| 114 |
+
# Single source of truth for the LLM-facing tool description
|
| 115 |
+
TOOL_SUMMARY = (
|
| 116 |
+
"Executes Python code as the unified interface for the entire tools ecosystem. "
|
| 117 |
+
"All tool interactions must happen through this code-execution gateway. "
|
| 118 |
+
"Use Agent Terminal repeatedly whenever you need to chain or combine tool operations. "
|
| 119 |
+
"Available tools: `Web_Fetch`, `Web_Search`, `Code_Interpreter`, `Shell_Command`, `File_System`, `Obsidian_Vault`, `Memory_Manager`, `Generate_Speech`, `Generate_Image`, `Generate_Video`, `Deep_Research`."
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
@autodoc(
|
| 125 |
+
summary=TOOL_SUMMARY,
|
| 126 |
+
)
|
| 127 |
+
def Agent_Terminal(input: Annotated[str, (
|
| 128 |
+
"Python source code to run; stdout is captured and returned. "
|
| 129 |
+
"Execute these commands: "
|
| 130 |
+
"`search_tools('query')` to search for tools by name or capability; "
|
| 131 |
+
"`list_tools()` to list all available tools; "
|
| 132 |
+
"`usage('ToolName')` to inspect a tool’s expected input parameters; "
|
| 133 |
+
"`search_packages('query')` to search for installed Python libraries."
|
| 134 |
+
)]) -> str:
|
| 135 |
+
_log_call_start("Agent_Terminal", input=_truncate_for_log(input or "", 300))
|
| 136 |
+
if input is None:
|
| 137 |
+
result = "No code provided."
|
| 138 |
+
_log_call_end("Agent_Terminal", result)
|
| 139 |
+
return result
|
| 140 |
+
old_stdout = sys.stdout
|
| 141 |
+
old_cwd = os.getcwd()
|
| 142 |
+
redirected_output = sys.stdout = StringIO()
|
| 143 |
+
|
| 144 |
+
# Prepare the execution environment with all tools
|
| 145 |
+
tools_env = {
|
| 146 |
+
"Web_Fetch": Web_Fetch,
|
| 147 |
+
"Web_Search": Web_Search,
|
| 148 |
+
"Memory_Manager": Memory_Manager,
|
| 149 |
+
"Generate_Speech": Generate_Speech,
|
| 150 |
+
"List_Kokoro_Voices": List_Kokoro_Voices,
|
| 151 |
+
"List_Supertonic_Voices": List_Supertonic_Voices,
|
| 152 |
+
"Generate_Image": Generate_Image,
|
| 153 |
+
"Generate_Video": Generate_Video,
|
| 154 |
+
"Deep_Research": Deep_Research,
|
| 155 |
+
"File_System": File_System,
|
| 156 |
+
"Obsidian_Vault": Obsidian_Vault,
|
| 157 |
+
"Shell_Command": Shell_Command,
|
| 158 |
+
"Code_Interpreter": Code_Interpreter,
|
| 159 |
+
"list_tools": list_tools,
|
| 160 |
+
"search_tools": search_tools,
|
| 161 |
+
"usage": usage,
|
| 162 |
+
"search_packages": search_packages,
|
| 163 |
+
"print": print, # Ensure print is available
|
| 164 |
+
"__builtins__": __builtins__,
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
os.chdir(ROOT_DIR)
|
| 169 |
+
|
| 170 |
+
# Parse code to check if the last statement is an expression
|
| 171 |
+
tree = ast.parse(input)
|
| 172 |
+
if tree.body and isinstance(tree.body[-1], ast.Expr):
|
| 173 |
+
last_node = tree.body.pop()
|
| 174 |
+
|
| 175 |
+
# Execute preceding statements
|
| 176 |
+
if tree.body:
|
| 177 |
+
exec(compile(tree, filename="<string>", mode="exec"), tools_env)
|
| 178 |
+
|
| 179 |
+
# Evaluate and print the last expression
|
| 180 |
+
expr = compile(ast.Expression(last_node.value), filename="<string>", mode="eval")
|
| 181 |
+
result_val = eval(expr, tools_env)
|
| 182 |
+
if result_val is not None:
|
| 183 |
+
print(result_val)
|
| 184 |
+
else:
|
| 185 |
+
exec(input, tools_env)
|
| 186 |
+
|
| 187 |
+
result = redirected_output.getvalue()
|
| 188 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 189 |
+
result = str(exc)
|
| 190 |
+
finally:
|
| 191 |
+
sys.stdout = old_stdout
|
| 192 |
+
try:
|
| 193 |
+
os.chdir(old_cwd)
|
| 194 |
+
except Exception:
|
| 195 |
+
pass
|
| 196 |
+
_log_call_end("Agent_Terminal", _truncate_for_log(result))
|
| 197 |
+
return result
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def build_interface() -> gr.Interface:
|
| 201 |
+
return gr.Interface(
|
| 202 |
+
fn=Agent_Terminal,
|
| 203 |
+
inputs=gr.Code(label="Python Code", language="python"),
|
| 204 |
+
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 205 |
+
title="Agent Terminal",
|
| 206 |
+
description="<div style=\"text-align:center\">Interact with all other tools via a Python API. Reduces token usage by 90%.</div>",
|
| 207 |
+
api_description=TOOL_SUMMARY,
|
| 208 |
+
flagging_mode="never",
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
__all__ = ["Agent_Terminal", "build_interface"]
|
Modules/Code_Interpreter.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from io import StringIO
|
| 6 |
+
from typing import Annotated
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from ._docstrings import autodoc
|
| 10 |
+
from .File_System import ROOT_DIR
|
| 11 |
+
|
| 12 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 13 |
+
|
| 14 |
+
# Single source of truth for the LLM-facing tool description
|
| 15 |
+
TOOL_SUMMARY = (
|
| 16 |
+
"Execute Python code from the tool root; returns captured stdout or the exception text."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@autodoc(
|
| 21 |
+
summary=TOOL_SUMMARY,
|
| 22 |
+
)
|
| 23 |
+
def Code_Interpreter(code: Annotated[str, "Python source code to run; stdout is captured and returned."]) -> str:
|
| 24 |
+
_log_call_start("Code_Interpreter", code=_truncate_for_log(code or "", 300))
|
| 25 |
+
if code is None:
|
| 26 |
+
result = "No code provided."
|
| 27 |
+
_log_call_end("Code_Interpreter", result)
|
| 28 |
+
return result
|
| 29 |
+
old_stdout = sys.stdout
|
| 30 |
+
old_cwd = os.getcwd()
|
| 31 |
+
redirected_output = sys.stdout = StringIO()
|
| 32 |
+
try:
|
| 33 |
+
os.chdir(ROOT_DIR)
|
| 34 |
+
exec(code)
|
| 35 |
+
result = redirected_output.getvalue()
|
| 36 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 37 |
+
result = str(exc)
|
| 38 |
+
finally:
|
| 39 |
+
sys.stdout = old_stdout
|
| 40 |
+
try:
|
| 41 |
+
os.chdir(old_cwd)
|
| 42 |
+
except Exception:
|
| 43 |
+
pass
|
| 44 |
+
_log_call_end("Code_Interpreter", _truncate_for_log(result))
|
| 45 |
+
return result
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def build_interface() -> gr.Interface:
|
| 49 |
+
return gr.Interface(
|
| 50 |
+
fn=Code_Interpreter,
|
| 51 |
+
inputs=gr.Code(label="Python Code", language="python"),
|
| 52 |
+
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 53 |
+
title="Code Interpreter",
|
| 54 |
+
description="<div style=\"text-align:center\">Execute Python code and see the output.</div>",
|
| 55 |
+
api_description=TOOL_SUMMARY,
|
| 56 |
+
flagging_mode="never",
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
__all__ = ["Code_Interpreter", "build_interface"]
|
Modules/Deep_Research.py
ADDED
|
@@ -0,0 +1,595 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import tempfile
|
| 6 |
+
import time
|
| 7 |
+
import uuid
|
| 8 |
+
from collections import OrderedDict, deque
|
| 9 |
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from typing import Annotated, Callable, Dict, List, Tuple
|
| 12 |
+
from urllib.parse import urlparse
|
| 13 |
+
|
| 14 |
+
import gradio as gr
|
| 15 |
+
import requests
|
| 16 |
+
from bs4 import BeautifulSoup
|
| 17 |
+
from ddgs import DDGS
|
| 18 |
+
from huggingface_hub import InferenceClient
|
| 19 |
+
|
| 20 |
+
from .Web_Fetch import _fullpage_markdown_from_soup, _http_get_enhanced
|
| 21 |
+
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 22 |
+
from ._docstrings import autodoc
|
| 23 |
+
from .File_System import ROOT_DIR
|
| 24 |
+
|
| 25 |
+
HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
|
| 26 |
+
|
| 27 |
+
# Single source of truth for the LLM-facing tool description
|
| 28 |
+
TOOL_SUMMARY = (
|
| 29 |
+
"Write a summary of what the user wants to research, and "
|
| 30 |
+
"run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
|
| 31 |
+
"returns (Markdown report, newline-separated source links, downloadable report path). "
|
| 32 |
+
"Provide the user with one-paragraph summary of the research report and the txt file in this format ``."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
RESEARCHER_SYSTEM_PROMPT = (
|
| 36 |
+
"You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
|
| 37 |
+
"You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
|
| 38 |
+
"<report_format>\n"
|
| 39 |
+
"Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
|
| 40 |
+
"For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
|
| 41 |
+
"</report_format>\n\n"
|
| 42 |
+
"<document_structure>\n"
|
| 43 |
+
"- Always begin with a clear title using a single # header\n"
|
| 44 |
+
"- Organize content into major sections using ## headers\n"
|
| 45 |
+
"- Further divide into subsections using ### headers\n"
|
| 46 |
+
"- Use #### headers sparingly for special subsections\n"
|
| 47 |
+
"- Never skip header levels\n"
|
| 48 |
+
"- Write multiple paragraphs per section or subsection\n"
|
| 49 |
+
"- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
|
| 50 |
+
"- Never use lists, instead always use text or tables\n\n"
|
| 51 |
+
"Mandatory Section Flow:\n"
|
| 52 |
+
"1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
|
| 53 |
+
"2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
|
| 54 |
+
"3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
|
| 55 |
+
"</document_structure>\n\n"
|
| 56 |
+
"<planning_rules>\n"
|
| 57 |
+
"- Always break it down into multiple steps\n"
|
| 58 |
+
"- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
|
| 59 |
+
"- Create the best report that weighs all the evidence from the sources\n"
|
| 60 |
+
"- Use the current date supplied in the first user message to contextualize findings\n"
|
| 61 |
+
"- Make sure that your final report addresses all parts of the query\n"
|
| 62 |
+
"- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
|
| 63 |
+
"- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
|
| 64 |
+
"- As a final step, review your planned report structure and ensure it completely answers the query.\n"
|
| 65 |
+
"</planning_rules>\n\n"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
FILTERER_SYSTEM_PROMPT = (
|
| 69 |
+
"You are Nymbot Filterer, an analyst who selects the most relevant sources for a research task. "
|
| 70 |
+
"You will be given a summary of the research topic (and optional search queries) followed by multiple fetched documents. "
|
| 71 |
+
"Each document includes its URL and a truncated excerpt. Evaluate how well each source helps answer the research topic. "
|
| 72 |
+
"Return only the URLs that should be used for the final research step. Output plain text with exactly one URL per line and no additional commentary, bullets, numbering, or explanations. "
|
| 73 |
+
"If no sources are relevant, return an empty string."
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class SlowHost(Exception):
|
| 78 |
+
pass
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _normalize_query(q: str) -> str:
|
| 82 |
+
if not q:
|
| 83 |
+
return ""
|
| 84 |
+
repl = {"“": '"', "”": '"', "‘": "'", "’": "'", "`": "'"}
|
| 85 |
+
for key, value in repl.items():
|
| 86 |
+
q = q.replace(key, value)
|
| 87 |
+
q = re.sub(r"\s+", " ", q)
|
| 88 |
+
q = re.sub(r'"\s+"', " ", q)
|
| 89 |
+
q = q.strip().strip('"').strip()
|
| 90 |
+
return q
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _search_urls_only(query: str, max_results: int) -> list[str]:
|
| 94 |
+
if not query or not query.strip() or max_results <= 0:
|
| 95 |
+
return []
|
| 96 |
+
urls: list[str] = []
|
| 97 |
+
try:
|
| 98 |
+
_search_rate_limiter.acquire()
|
| 99 |
+
with DDGS() as ddgs:
|
| 100 |
+
for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
|
| 101 |
+
url = (item.get("href") or item.get("url") or "").strip()
|
| 102 |
+
if url:
|
| 103 |
+
urls.append(url)
|
| 104 |
+
except Exception:
|
| 105 |
+
pass
|
| 106 |
+
seen = set()
|
| 107 |
+
deduped = []
|
| 108 |
+
for url in urls:
|
| 109 |
+
if url not in seen:
|
| 110 |
+
seen.add(url)
|
| 111 |
+
deduped.append(url)
|
| 112 |
+
return deduped
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
|
| 116 |
+
try:
|
| 117 |
+
resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
|
| 118 |
+
resp.raise_for_status()
|
| 119 |
+
except requests.exceptions.RequestException as exc:
|
| 120 |
+
msg = str(exc)
|
| 121 |
+
if "timed out" in msg.lower():
|
| 122 |
+
raise SlowHost(msg) from exc
|
| 123 |
+
return ""
|
| 124 |
+
final_url = str(resp.url)
|
| 125 |
+
ctype = resp.headers.get("Content-Type", "")
|
| 126 |
+
if "html" not in ctype.lower():
|
| 127 |
+
return ""
|
| 128 |
+
resp.encoding = resp.encoding or resp.apparent_encoding
|
| 129 |
+
html = resp.text
|
| 130 |
+
soup = BeautifulSoup(html, "lxml")
|
| 131 |
+
md_text = _fullpage_markdown_from_soup(soup, final_url, "")
|
| 132 |
+
if max_chars > 0 and len(md_text) > max_chars:
|
| 133 |
+
md_text = md_text[:max_chars]
|
| 134 |
+
return md_text
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
|
| 138 |
+
out = []
|
| 139 |
+
total = 0
|
| 140 |
+
truncated = False
|
| 141 |
+
for part in parts:
|
| 142 |
+
if not part:
|
| 143 |
+
continue
|
| 144 |
+
if total + len(part) > max_chars:
|
| 145 |
+
out.append(part[: max(0, max_chars - total)])
|
| 146 |
+
truncated = True
|
| 147 |
+
break
|
| 148 |
+
out.append(part)
|
| 149 |
+
total += len(part)
|
| 150 |
+
return ("\n\n".join(out), truncated)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
|
| 154 |
+
sources_blocks: List[str] = []
|
| 155 |
+
indexed_urls: List[str] = []
|
| 156 |
+
for idx, url in enumerate(url_list, start=1):
|
| 157 |
+
text = pages_map.get(url, "").strip()
|
| 158 |
+
if not text:
|
| 159 |
+
continue
|
| 160 |
+
indexed_urls.append(f"[{idx}] {url}")
|
| 161 |
+
sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
|
| 162 |
+
sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
|
| 163 |
+
prompt_parts: List[str] = []
|
| 164 |
+
prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
|
| 165 |
+
populated = [q for q in queries if q and q.strip()]
|
| 166 |
+
if populated:
|
| 167 |
+
prompt_parts.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
|
| 168 |
+
if indexed_urls:
|
| 169 |
+
prompt_parts.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
|
| 170 |
+
prompt_parts.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
|
| 171 |
+
return "\n\n".join(prompt_parts)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _build_filter_prompt(summary: str, queries: List[str], pages_map: Dict[str, str]) -> str:
|
| 175 |
+
populated = [q for q in queries if q and q.strip()]
|
| 176 |
+
summary_text = summary or ""
|
| 177 |
+
prompt_sections: List[str] = []
|
| 178 |
+
prompt_sections.append("<research_topic_summary>\n" + summary_text + "\n</research_topic_summary>")
|
| 179 |
+
if populated:
|
| 180 |
+
prompt_sections.append("<search_queries>\n" + "\n".join(populated) + "\n</search_queries>")
|
| 181 |
+
sources: List[str] = []
|
| 182 |
+
for idx, (url, text) in enumerate(pages_map.items(), start=1):
|
| 183 |
+
content = text.strip()
|
| 184 |
+
if not content:
|
| 185 |
+
continue
|
| 186 |
+
sources.append(f"[Source {idx}] URL: {url}\n\n{content}")
|
| 187 |
+
sources_joined, truncated = _truncate_join(sources, max_chars=60_000)
|
| 188 |
+
prompt_sections.append("<candidate_sources>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</candidate_sources>")
|
| 189 |
+
prompt_sections.append(
|
| 190 |
+
"<task>\nIdentify which of the provided URLs should be retained for the final research synthesis. "
|
| 191 |
+
"Consider coverage, credibility, and relevance to the research topic. "
|
| 192 |
+
"Return ONLY the URLs you choose, with one URL per line and no additional text.\n</task>"
|
| 193 |
+
)
|
| 194 |
+
return "\n\n".join(prompt_sections)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def _parse_filterer_output(raw: str, allowed_urls: List[str]) -> List[str]:
|
| 198 |
+
if not raw:
|
| 199 |
+
return []
|
| 200 |
+
allowed_set = {url.strip(): idx for idx, url in enumerate(allowed_urls)}
|
| 201 |
+
found_indices: set[int] = set()
|
| 202 |
+
for line in raw.splitlines():
|
| 203 |
+
candidate = line.strip()
|
| 204 |
+
if not candidate:
|
| 205 |
+
continue
|
| 206 |
+
if candidate in allowed_set:
|
| 207 |
+
found_indices.add(allowed_set[candidate])
|
| 208 |
+
continue
|
| 209 |
+
match = re.search(r"https?://[^\s]+", candidate)
|
| 210 |
+
if not match:
|
| 211 |
+
continue
|
| 212 |
+
url = match.group(0).rstrip(".,);]")
|
| 213 |
+
if url in allowed_set:
|
| 214 |
+
found_indices.add(allowed_set[url])
|
| 215 |
+
selected = [allowed_urls[idx] for idx in sorted(found_indices)]
|
| 216 |
+
return selected
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def _write_report_tmp(text: str) -> str:
|
| 220 |
+
filename = f"research_report_{uuid.uuid4().hex}.txt"
|
| 221 |
+
path = os.path.join(ROOT_DIR, filename)
|
| 222 |
+
with open(path, "w", encoding="utf-8") as file:
|
| 223 |
+
file.write(text)
|
| 224 |
+
return path
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def _fetch_pages_within_budget(urls: List[str], char_limit: int, time_left_fn: Callable[[], float]) -> OrderedDict:
|
| 228 |
+
pages: dict[str, str] = {}
|
| 229 |
+
if not urls:
|
| 230 |
+
return OrderedDict()
|
| 231 |
+
queue = deque(urls)
|
| 232 |
+
attempts: dict[str, int] = {url: 0 for url in urls}
|
| 233 |
+
max_attempts = 2
|
| 234 |
+
max_workers = min(12, max(4, len(urls)))
|
| 235 |
+
in_flight: dict[Future, str] = {}
|
| 236 |
+
delayed: list[tuple[float, str]] = []
|
| 237 |
+
|
| 238 |
+
def schedule_next(executor: ThreadPoolExecutor) -> None:
|
| 239 |
+
while queue and len(in_flight) < max_workers:
|
| 240 |
+
url = queue.popleft()
|
| 241 |
+
if url in pages:
|
| 242 |
+
continue
|
| 243 |
+
attempts.setdefault(url, 0)
|
| 244 |
+
if attempts[url] >= max_attempts:
|
| 245 |
+
continue
|
| 246 |
+
attempts[url] += 1
|
| 247 |
+
tl = time_left_fn()
|
| 248 |
+
if tl <= 0.1:
|
| 249 |
+
return
|
| 250 |
+
per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
|
| 251 |
+
future = executor.submit(_fetch_page_markdown_fast, url, char_limit, per_timeout)
|
| 252 |
+
in_flight[future] = url
|
| 253 |
+
|
| 254 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 255 |
+
schedule_next(executor)
|
| 256 |
+
while (in_flight or queue or delayed) and time_left_fn() > 0.2:
|
| 257 |
+
now = time.time()
|
| 258 |
+
if delayed:
|
| 259 |
+
ready: list[tuple[float, str]] = []
|
| 260 |
+
not_ready: list[tuple[float, str]] = []
|
| 261 |
+
for ready_time, delayed_url in delayed:
|
| 262 |
+
(ready if ready_time <= now else not_ready).append((ready_time, delayed_url))
|
| 263 |
+
delayed = not_ready
|
| 264 |
+
for _, delayed_url in ready:
|
| 265 |
+
queue.append(delayed_url)
|
| 266 |
+
if ready:
|
| 267 |
+
schedule_next(executor)
|
| 268 |
+
done = [future for future in list(in_flight.keys()) if future.done()]
|
| 269 |
+
if not done:
|
| 270 |
+
if not queue and delayed:
|
| 271 |
+
next_ready = min((t for t, _ in delayed), default=time.time())
|
| 272 |
+
sleep_for = max(0.0, next_ready - time.time())
|
| 273 |
+
time.sleep(max(0.02, min(0.25, sleep_for)))
|
| 274 |
+
else:
|
| 275 |
+
time.sleep(0.05)
|
| 276 |
+
continue
|
| 277 |
+
for future in done:
|
| 278 |
+
url = in_flight.pop(future)
|
| 279 |
+
try:
|
| 280 |
+
md = future.result()
|
| 281 |
+
if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
|
| 282 |
+
pages[url] = md
|
| 283 |
+
try:
|
| 284 |
+
print(f"[FETCH OK] {url} (chars={len(md)})", flush=True)
|
| 285 |
+
except Exception:
|
| 286 |
+
pass
|
| 287 |
+
except SlowHost:
|
| 288 |
+
if time_left_fn() > 5.0:
|
| 289 |
+
delayed.append((time.time() + 3.0, url))
|
| 290 |
+
except Exception:
|
| 291 |
+
pass
|
| 292 |
+
schedule_next(executor)
|
| 293 |
+
ordered = OrderedDict((url, pages[url]) for url in urls if url in pages)
|
| 294 |
+
return ordered
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
@autodoc(
|
| 298 |
+
summary=TOOL_SUMMARY,
|
| 299 |
+
)
|
| 300 |
+
def Deep_Research(
|
| 301 |
+
summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
|
| 302 |
+
query1: Annotated[str, "DDG Search Query 1"],
|
| 303 |
+
max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
|
| 304 |
+
query2: Annotated[str, "DDG Search Query 2"] = "",
|
| 305 |
+
max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
|
| 306 |
+
query3: Annotated[str, "DDG Search Query 3"] = "",
|
| 307 |
+
max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
|
| 308 |
+
query4: Annotated[str, "DDG Search Query 4"] = "",
|
| 309 |
+
max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
|
| 310 |
+
query5: Annotated[str, "DDG Search Query 5"] = "",
|
| 311 |
+
max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
|
| 312 |
+
) -> tuple[str, str, str]:
|
| 313 |
+
_log_call_start(
|
| 314 |
+
"Deep_Research",
|
| 315 |
+
summary=_truncate_for_log(summary or "", 200),
|
| 316 |
+
queries=[q for q in [query1, query2, query3, query4, query5] if q],
|
| 317 |
+
)
|
| 318 |
+
if not HF_TEXTGEN_TOKEN:
|
| 319 |
+
_log_call_end("Deep_Research", "error=missing HF token")
|
| 320 |
+
raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
|
| 321 |
+
queries = [
|
| 322 |
+
_normalize_query(query1 or ""),
|
| 323 |
+
_normalize_query(query2 or ""),
|
| 324 |
+
_normalize_query(query3 or ""),
|
| 325 |
+
_normalize_query(query4 or ""),
|
| 326 |
+
_normalize_query(query5 or ""),
|
| 327 |
+
]
|
| 328 |
+
reqs = [
|
| 329 |
+
max(1, min(50, int(max1))),
|
| 330 |
+
max(1, min(50, int(max2))),
|
| 331 |
+
max(1, min(50, int(max3))),
|
| 332 |
+
max(1, min(50, int(max4))),
|
| 333 |
+
max(1, min(50, int(max5))),
|
| 334 |
+
]
|
| 335 |
+
total_requested = sum(reqs)
|
| 336 |
+
if total_requested > 50:
|
| 337 |
+
reqs = [10, 10, 10, 10, 10]
|
| 338 |
+
start_ts = time.time()
|
| 339 |
+
budget_seconds = 55.0
|
| 340 |
+
deadline = start_ts + budget_seconds
|
| 341 |
+
|
| 342 |
+
def time_left() -> float:
|
| 343 |
+
return max(0.0, deadline - time.time())
|
| 344 |
+
|
| 345 |
+
now_dt = datetime.now().astimezone()
|
| 346 |
+
date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
|
| 347 |
+
if not date_str:
|
| 348 |
+
date_str = now_dt.isoformat()
|
| 349 |
+
|
| 350 |
+
all_urls: list[str] = []
|
| 351 |
+
tasks = []
|
| 352 |
+
with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
|
| 353 |
+
for query, count in zip(queries, reqs):
|
| 354 |
+
if not query.strip():
|
| 355 |
+
continue
|
| 356 |
+
tasks.append(executor.submit(_search_urls_only, query.strip(), count))
|
| 357 |
+
for future in as_completed(tasks):
|
| 358 |
+
try:
|
| 359 |
+
urls = future.result() or []
|
| 360 |
+
except Exception:
|
| 361 |
+
urls = []
|
| 362 |
+
for url in urls:
|
| 363 |
+
if url not in all_urls:
|
| 364 |
+
all_urls.append(url)
|
| 365 |
+
if len(all_urls) >= 50:
|
| 366 |
+
break
|
| 367 |
+
if time_left() <= 0.5:
|
| 368 |
+
break
|
| 369 |
+
if len(all_urls) > 50:
|
| 370 |
+
all_urls = all_urls[:50]
|
| 371 |
+
blacklist = {
|
| 372 |
+
"homedepot.com",
|
| 373 |
+
"tractorsupply.com",
|
| 374 |
+
"mcmaster.com",
|
| 375 |
+
"mrchain.com",
|
| 376 |
+
"answers.com",
|
| 377 |
+
"city-data.com",
|
| 378 |
+
"dictionary.cambridge.org",
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
def _domain(url: str) -> str:
|
| 382 |
+
try:
|
| 383 |
+
return urlparse(url).netloc.lower()
|
| 384 |
+
except Exception:
|
| 385 |
+
return ""
|
| 386 |
+
|
| 387 |
+
all_urls = [url for url in all_urls if _domain(url) not in blacklist]
|
| 388 |
+
skip_exts = (
|
| 389 |
+
".pdf",
|
| 390 |
+
".ppt",
|
| 391 |
+
".pptx",
|
| 392 |
+
".doc",
|
| 393 |
+
".docx",
|
| 394 |
+
".xls",
|
| 395 |
+
".xlsx",
|
| 396 |
+
".zip",
|
| 397 |
+
".gz",
|
| 398 |
+
".tgz",
|
| 399 |
+
".bz2",
|
| 400 |
+
".7z",
|
| 401 |
+
".rar",
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
def _skip_url(url: str) -> bool:
|
| 405 |
+
try:
|
| 406 |
+
path = urlparse(url).path.lower()
|
| 407 |
+
except Exception:
|
| 408 |
+
return False
|
| 409 |
+
return any(path.endswith(ext) for ext in skip_exts)
|
| 410 |
+
|
| 411 |
+
all_urls = [url for url in all_urls if not _skip_url(url)]
|
| 412 |
+
truncated_pages = OrderedDict()
|
| 413 |
+
if all_urls and time_left() > 0.2:
|
| 414 |
+
truncated_pages = _fetch_pages_within_budget(all_urls, 3000, time_left)
|
| 415 |
+
print(
|
| 416 |
+
f"[PIPELINE] Initial fetch complete: candidates={len(all_urls)}, truncated_documents={len(truncated_pages)}, time_left={time_left():.2f}s",
|
| 417 |
+
flush=True,
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
def _invoke_chat(messages, provider: str, max_tokens: int, temp: float, top_p: float):
|
| 421 |
+
client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
|
| 422 |
+
return client.chat.completions.create(
|
| 423 |
+
model="zai-org/GLM-4.6",
|
| 424 |
+
messages=messages,
|
| 425 |
+
max_tokens=max_tokens,
|
| 426 |
+
temperature=temp,
|
| 427 |
+
top_p=top_p,
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
filtered_urls: List[str] = list(truncated_pages.keys())
|
| 431 |
+
filter_output = ""
|
| 432 |
+
filter_used_fallback = False
|
| 433 |
+
filter_success = False
|
| 434 |
+
if truncated_pages and time_left() > 3.0:
|
| 435 |
+
filter_prompt = _build_filter_prompt(summary or "", [q for q in queries if q.strip()], truncated_pages)
|
| 436 |
+
filter_messages = [
|
| 437 |
+
{"role": "system", "content": FILTERER_SYSTEM_PROMPT},
|
| 438 |
+
{"role": "user", "content": f"The current date is {date_str}. Consider how recent each source is when deciding relevance."},
|
| 439 |
+
{"role": "user", "content": filter_prompt},
|
| 440 |
+
]
|
| 441 |
+
filter_completion = None
|
| 442 |
+
try:
|
| 443 |
+
print("[FILTER] Attempt 1: provider=cerebras, max_tokens=2048", flush=True)
|
| 444 |
+
filter_completion = _invoke_chat(filter_messages, "cerebras", 2048, 0.2, 0.9)
|
| 445 |
+
except Exception as exc1:
|
| 446 |
+
print(f"[FILTER] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 447 |
+
try:
|
| 448 |
+
print("[FILTER] Attempt 2: provider=auto, max_tokens=2048", flush=True)
|
| 449 |
+
filter_completion = _invoke_chat(filter_messages, "auto", 2048, 0.2, 0.9)
|
| 450 |
+
except Exception as exc2:
|
| 451 |
+
print(f"[FILTER] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 452 |
+
if filter_completion and filter_completion.choices:
|
| 453 |
+
filter_output = filter_completion.choices[0].message.content or ""
|
| 454 |
+
filtered_urls = _parse_filterer_output(filter_output, list(truncated_pages.keys()))
|
| 455 |
+
filter_success = bool(filter_output.strip()) and bool(filtered_urls)
|
| 456 |
+
if not filtered_urls:
|
| 457 |
+
filter_used_fallback = True
|
| 458 |
+
fallback_count = min(8, len(truncated_pages))
|
| 459 |
+
filtered_urls = list(truncated_pages.keys())[:fallback_count]
|
| 460 |
+
max_final_urls = 20
|
| 461 |
+
if len(filtered_urls) > max_final_urls:
|
| 462 |
+
filter_used_fallback = True
|
| 463 |
+
filtered_urls = filtered_urls[:max_final_urls]
|
| 464 |
+
if not filter_success:
|
| 465 |
+
filter_used_fallback = True
|
| 466 |
+
print(
|
| 467 |
+
f"[FILTER] Selected URLs={len(filtered_urls)}, fallback={filter_used_fallback}, time_left={time_left():.2f}s",
|
| 468 |
+
flush=True,
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
final_pages_fetched = OrderedDict()
|
| 472 |
+
if filtered_urls and time_left() > 0.2:
|
| 473 |
+
final_pages_fetched = _fetch_pages_within_budget(filtered_urls, 8000, time_left)
|
| 474 |
+
merged_pages = OrderedDict()
|
| 475 |
+
for url in filtered_urls:
|
| 476 |
+
content = final_pages_fetched.get(url) or truncated_pages.get(url) or ""
|
| 477 |
+
if content:
|
| 478 |
+
merged_pages[url] = content
|
| 479 |
+
pages = merged_pages
|
| 480 |
+
print(
|
| 481 |
+
f"[PIPELINE] Final fetch complete: retained_documents={len(pages)}, time_left={time_left():.2f}s",
|
| 482 |
+
flush=True,
|
| 483 |
+
)
|
| 484 |
+
prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
|
| 485 |
+
system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
|
| 486 |
+
date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
|
| 487 |
+
messages = [
|
| 488 |
+
system_message,
|
| 489 |
+
date_message,
|
| 490 |
+
{"role": "user", "content": prompt},
|
| 491 |
+
]
|
| 492 |
+
try:
|
| 493 |
+
prompt_chars = len(prompt)
|
| 494 |
+
except Exception:
|
| 495 |
+
prompt_chars = -1
|
| 496 |
+
print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
|
| 497 |
+
print("[PIPELINE] Starting inference (provider=cerebras, model=zai-org/GLM-4.6)", flush=True)
|
| 498 |
+
|
| 499 |
+
try:
|
| 500 |
+
print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
|
| 501 |
+
completion = _invoke_chat(messages, "cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
|
| 502 |
+
except Exception as exc1:
|
| 503 |
+
print(f"[LLM] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 504 |
+
try:
|
| 505 |
+
prompt2 = _build_research_prompt(
|
| 506 |
+
summary=summary or "",
|
| 507 |
+
queries=[q for q in queries if q.strip()],
|
| 508 |
+
url_list=list(pages.keys())[:30],
|
| 509 |
+
pages_map={key: pages[key] for key in list(pages.keys())[:30]},
|
| 510 |
+
)
|
| 511 |
+
messages = [
|
| 512 |
+
system_message,
|
| 513 |
+
date_message,
|
| 514 |
+
{"role": "user", "content": prompt2},
|
| 515 |
+
]
|
| 516 |
+
print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
|
| 517 |
+
completion = _invoke_chat(messages, "cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
|
| 518 |
+
except Exception as exc2:
|
| 519 |
+
print(f"[LLM] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 520 |
+
try:
|
| 521 |
+
print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
|
| 522 |
+
completion = _invoke_chat(messages, "auto", max_tokens=8192, temp=0.7, top_p=0.95)
|
| 523 |
+
except Exception as exc3:
|
| 524 |
+
_log_call_end("Deep_Research", f"error={_truncate_for_log(str(exc3), 260)}")
|
| 525 |
+
raise gr.Error(f"Researcher model call failed: {exc3}")
|
| 526 |
+
raw = completion.choices[0].message.content or ""
|
| 527 |
+
try:
|
| 528 |
+
no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
|
| 529 |
+
no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
|
| 530 |
+
except Exception:
|
| 531 |
+
no_think = raw
|
| 532 |
+
try:
|
| 533 |
+
paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
|
| 534 |
+
keep: List[str] = []
|
| 535 |
+
removed = 0
|
| 536 |
+
planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|now i'll|now i will)\b", re.IGNORECASE)
|
| 537 |
+
for paragraph in paragraphs:
|
| 538 |
+
if planning_re.search(paragraph):
|
| 539 |
+
removed += 1
|
| 540 |
+
continue
|
| 541 |
+
keep.append(paragraph)
|
| 542 |
+
report = "\n\n".join(keep).strip()
|
| 543 |
+
if not report:
|
| 544 |
+
report = no_think.strip()
|
| 545 |
+
except Exception:
|
| 546 |
+
report = no_think
|
| 547 |
+
removed = 0
|
| 548 |
+
report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
|
| 549 |
+
try:
|
| 550 |
+
print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
|
| 551 |
+
except Exception:
|
| 552 |
+
pass
|
| 553 |
+
links_text = "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 554 |
+
if links_text:
|
| 555 |
+
sources_section = "\n\n## Sources\n" + "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 556 |
+
report = report.rstrip() + sources_section
|
| 557 |
+
file_path = _write_report_tmp(report)
|
| 558 |
+
elapsed = time.time() - start_ts
|
| 559 |
+
print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
|
| 560 |
+
_log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
|
| 561 |
+
return report, links_text, file_path
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
def build_interface() -> gr.Interface:
|
| 565 |
+
return gr.Interface(
|
| 566 |
+
fn=Deep_Research,
|
| 567 |
+
inputs=[
|
| 568 |
+
gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question", info="Summarization of research topic (one or more sentences)"),
|
| 569 |
+
gr.Textbox(label="DDG Search Query 1", max_lines=1, info="DDG Search Query 1"),
|
| 570 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)", info="Max results for Query 1 (1-50)"),
|
| 571 |
+
gr.Textbox(label="DDG Search Query 2", value="", max_lines=1, info="DDG Search Query 2"),
|
| 572 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)", info="Max results for Query 2 (1-50)"),
|
| 573 |
+
gr.Textbox(label="DDG Search Query 3", value="", max_lines=1, info="DDG Search Query 3"),
|
| 574 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)", info="Max results for Query 3 (1-50)"),
|
| 575 |
+
gr.Textbox(label="DDG Search Query 4", value="", max_lines=1, info="DDG Search Query 4"),
|
| 576 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)", info="Max results for Query 4 (1-50)"),
|
| 577 |
+
gr.Textbox(label="DDG Search Query 5", value="", max_lines=1, info="DDG Search Query 5"),
|
| 578 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)", info="Max results for Query 5 (1-50)"),
|
| 579 |
+
],
|
| 580 |
+
outputs=[
|
| 581 |
+
gr.Markdown(label="Research Report"),
|
| 582 |
+
gr.Textbox(label="Fetched Links", lines=8),
|
| 583 |
+
gr.File(label="Download Research Report", file_count="single"),
|
| 584 |
+
],
|
| 585 |
+
title="Deep Research",
|
| 586 |
+
description=(
|
| 587 |
+
"<div style=\"text-align:center\">Perform multi-query web research: search with DuckDuckGo, fetch up to 50 pages in parallel, "
|
| 588 |
+
"and generate a comprehensive report using a large LLM via Hugging Face Inference Providers (Cerebras). Requires HF_READ_TOKEN.</div>"
|
| 589 |
+
),
|
| 590 |
+
api_description=TOOL_SUMMARY,
|
| 591 |
+
flagging_mode="never",
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
__all__ = ["Deep_Research", "build_interface"]
|
Modules/File_System.py
ADDED
|
@@ -0,0 +1,648 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
import shutil
|
| 7 |
+
import stat
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Annotated, Optional
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
|
| 13 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
from ._docstrings import autodoc
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
TOOL_SUMMARY = (
|
| 18 |
+
"Browse, search, and manage files within a safe root. "
|
| 19 |
+
"Actions: list, read, write, append, mkdir, move, copy, delete, info, search, help. "
|
| 20 |
+
"Fill other fields as needed. "
|
| 21 |
+
"Use paths like `/` or `/notes/todo.txt` because all paths are relative to the root (`/`). "
|
| 22 |
+
"Use 'help' to see action-specific required fields and examples."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
HELP_TEXT = (
|
| 26 |
+
"File System — actions and usage\n\n"
|
| 27 |
+
"Root: paths resolve under Nymbo-Tools/Filesystem by default (or NYMBO_TOOLS_ROOT if set). "
|
| 28 |
+
"Start paths with '/' to refer to the tool root (e.g., /notes). "
|
| 29 |
+
"Absolute paths are disabled unless UNSAFE_ALLOW_ABS_PATHS=1.\n\n"
|
| 30 |
+
"Actions and fields:\n"
|
| 31 |
+
"- list: path='/' (default), recursive=false, show_hidden=false, max_entries=20\n"
|
| 32 |
+
"- read: path (e.g., /notes/todo.txt), offset=0, max_chars=4000 (shows next_cursor when truncated)\n"
|
| 33 |
+
"- write: path, content (UTF-8), create_dirs=true\n"
|
| 34 |
+
"- append: path, content (UTF-8), create_dirs=true\n"
|
| 35 |
+
"- mkdir: path (directory), exist_ok=true\n"
|
| 36 |
+
"- move: path (src), dest_path (dst), overwrite=false\n"
|
| 37 |
+
"- copy: path (src), dest_path (dst), overwrite=false\n"
|
| 38 |
+
"- delete: path, recursive=true (required for directories)\n"
|
| 39 |
+
"- info: path\n"
|
| 40 |
+
"- search: path (dir or file), content=query text, recursive=false, show_hidden=false, max_entries=20, case_sensitive=false, offset=0\n"
|
| 41 |
+
"- help: show this guide\n\n"
|
| 42 |
+
"Errors are returned as JSON with fields: {status:'error', code, message, path?, hint?, data?}.\n\n"
|
| 43 |
+
"Examples:\n"
|
| 44 |
+
"- list current: action=list, path='/'\n"
|
| 45 |
+
"- make folder: action=mkdir, path='/notes'\n"
|
| 46 |
+
"- write file: action=write, path='/notes/todo.txt', content='hello'\n"
|
| 47 |
+
"- read file: action=read, path='/notes/todo.txt', max_chars=200\n"
|
| 48 |
+
"- move file: action=move, path='/notes/todo.txt', dest_path='/notes/todo-old.txt', overwrite=true\n"
|
| 49 |
+
"- delete dir: action=delete, path='/notes', recursive=true\n"
|
| 50 |
+
"- search text: action=search, path='/notes', content='TODO', recursive=true, max_entries=50\n"
|
| 51 |
+
"- page search results: action=search, content='TODO', offset=10\n"
|
| 52 |
+
"- case-sensitive search: action=search, content='TODO', case_sensitive=true\n"
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _default_root() -> str:
|
| 57 |
+
# Prefer explicit root via env var
|
| 58 |
+
root = os.getenv("NYMBO_TOOLS_ROOT")
|
| 59 |
+
if root and root.strip():
|
| 60 |
+
return os.path.abspath(os.path.expanduser(root.strip()))
|
| 61 |
+
# Default to "Nymbo-Tools/Filesystem" alongside this module package
|
| 62 |
+
try:
|
| 63 |
+
here = os.path.abspath(__file__)
|
| 64 |
+
tools_dir = os.path.dirname(os.path.dirname(here)) # .../Nymbo-Tools
|
| 65 |
+
default_root = os.path.abspath(os.path.join(tools_dir, "Filesystem"))
|
| 66 |
+
return default_root
|
| 67 |
+
except Exception:
|
| 68 |
+
# Final fallback
|
| 69 |
+
return os.path.abspath(os.getcwd())
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
ROOT_DIR = _default_root()
|
| 73 |
+
# Ensure the default root directory exists to make listing/writing more convenient
|
| 74 |
+
try:
|
| 75 |
+
os.makedirs(ROOT_DIR, exist_ok=True)
|
| 76 |
+
except Exception:
|
| 77 |
+
pass
|
| 78 |
+
ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
|
| 79 |
+
|
| 80 |
+
def _safe_err(exc: Exception | str) -> str:
|
| 81 |
+
"""Return an error string with any absolute root replaced by '/' and slashes normalized.
|
| 82 |
+
This handles variants like backslashes and duplicate slashes in OS messages.
|
| 83 |
+
"""
|
| 84 |
+
s = str(exc)
|
| 85 |
+
# Normalize to forward slashes for comparison
|
| 86 |
+
s_norm = s.replace("\\", "/")
|
| 87 |
+
root_fwd = ROOT_DIR.replace("\\", "/")
|
| 88 |
+
# Collapse duplicate slashes in root representation
|
| 89 |
+
root_variants = {ROOT_DIR, root_fwd, re.sub(r"/+", "/", root_fwd)}
|
| 90 |
+
for variant in root_variants:
|
| 91 |
+
if variant:
|
| 92 |
+
s_norm = s_norm.replace(variant, "/")
|
| 93 |
+
# Collapse duplicate slashes in final output
|
| 94 |
+
s_norm = re.sub(r"/+", "/", s_norm)
|
| 95 |
+
return s_norm
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _err(code: str, message: str, *, path: Optional[str] = None, hint: Optional[str] = None, data: Optional[dict] = None) -> str:
|
| 99 |
+
"""Return a structured error JSON string.
|
| 100 |
+
Fields: status='error', code, message, path?, hint?, data?, root='/'
|
| 101 |
+
"""
|
| 102 |
+
payload = {
|
| 103 |
+
"status": "error",
|
| 104 |
+
"code": code,
|
| 105 |
+
"message": message,
|
| 106 |
+
"root": "/",
|
| 107 |
+
}
|
| 108 |
+
if path is not None and path != "":
|
| 109 |
+
payload["path"] = path
|
| 110 |
+
if hint:
|
| 111 |
+
payload["hint"] = hint
|
| 112 |
+
if data:
|
| 113 |
+
payload["data"] = data
|
| 114 |
+
return json.dumps(payload, ensure_ascii=False)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _resolve_path(path: str) -> tuple[str, str]:
|
| 118 |
+
"""
|
| 119 |
+
Resolve a user-provided path to an absolute, normalized path constrained to ROOT_DIR
|
| 120 |
+
(unless UNSAFE_ALLOW_ABS_PATHS=1). Returns (abs_path, error_message). error_message is empty when ok.
|
| 121 |
+
"""
|
| 122 |
+
try:
|
| 123 |
+
user_input = (path or "/").strip() or "/"
|
| 124 |
+
if user_input.startswith("/"):
|
| 125 |
+
# Treat leading '/' as the virtual root for the tool.
|
| 126 |
+
rel_part = user_input.lstrip("/") or "."
|
| 127 |
+
raw = os.path.expanduser(rel_part)
|
| 128 |
+
treat_as_relative = True
|
| 129 |
+
else:
|
| 130 |
+
raw = os.path.expanduser(user_input)
|
| 131 |
+
treat_as_relative = False
|
| 132 |
+
|
| 133 |
+
if not treat_as_relative and os.path.isabs(raw):
|
| 134 |
+
if not ALLOW_ABS:
|
| 135 |
+
# Absolute paths are not allowed in safe mode
|
| 136 |
+
return "", _err(
|
| 137 |
+
"absolute_path_disabled",
|
| 138 |
+
"Absolute paths are disabled in safe mode.",
|
| 139 |
+
path=raw.replace("\\", "/"),
|
| 140 |
+
hint="Use a path relative to / (e.g., /notes/todo.txt)."
|
| 141 |
+
)
|
| 142 |
+
abs_path = os.path.abspath(raw)
|
| 143 |
+
else:
|
| 144 |
+
abs_path = os.path.abspath(os.path.join(ROOT_DIR, raw))
|
| 145 |
+
# Constrain to ROOT when not unsafe mode
|
| 146 |
+
if not ALLOW_ABS:
|
| 147 |
+
try:
|
| 148 |
+
common = os.path.commonpath([os.path.normpath(ROOT_DIR), os.path.normpath(abs_path)])
|
| 149 |
+
except Exception:
|
| 150 |
+
# Fallback to simple check
|
| 151 |
+
root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
|
| 152 |
+
abs_cmp = os.path.normcase(os.path.normpath(abs_path))
|
| 153 |
+
if not abs_cmp.startswith(root_cmp):
|
| 154 |
+
return "", _err(
|
| 155 |
+
"path_outside_root",
|
| 156 |
+
"Path not allowed outside root.",
|
| 157 |
+
path=user_input.replace("\\", "/"),
|
| 158 |
+
hint="Use a path under / (the tool's root)."
|
| 159 |
+
)
|
| 160 |
+
else:
|
| 161 |
+
root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
|
| 162 |
+
common_cmp = os.path.normcase(os.path.normpath(common))
|
| 163 |
+
if common_cmp != root_cmp:
|
| 164 |
+
return "", _err(
|
| 165 |
+
"path_outside_root",
|
| 166 |
+
"Path not allowed outside root.",
|
| 167 |
+
path=user_input.replace("\\", "/"),
|
| 168 |
+
hint="Use a path under / (the tool's root)."
|
| 169 |
+
)
|
| 170 |
+
return abs_path, ""
|
| 171 |
+
except Exception as exc:
|
| 172 |
+
return "", _err(
|
| 173 |
+
"resolve_path_failed",
|
| 174 |
+
"Failed to resolve path.",
|
| 175 |
+
path=(path or ""),
|
| 176 |
+
data={"error": _safe_err(exc)}
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _fmt_size(num_bytes: int) -> str:
|
| 181 |
+
units = ["B", "KB", "MB", "GB", "TB"]
|
| 182 |
+
size = float(num_bytes)
|
| 183 |
+
for unit in units:
|
| 184 |
+
if size < 1024.0:
|
| 185 |
+
return f"{size:.1f} {unit}"
|
| 186 |
+
size /= 1024.0
|
| 187 |
+
return f"{size:.1f} PB"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _display_path(abs_path: str) -> str:
|
| 191 |
+
"""Return a user-friendly path relative to ROOT_DIR using forward slashes.
|
| 192 |
+
Example: ROOT_DIR -> '/', a file under it -> '/sub/dir/file.txt'."""
|
| 193 |
+
try:
|
| 194 |
+
norm_root = os.path.normpath(ROOT_DIR)
|
| 195 |
+
norm_abs = os.path.normpath(abs_path)
|
| 196 |
+
common = os.path.commonpath([norm_root, norm_abs])
|
| 197 |
+
if os.path.normcase(common) == os.path.normcase(norm_root):
|
| 198 |
+
rel = os.path.relpath(norm_abs, norm_root)
|
| 199 |
+
if rel == ".":
|
| 200 |
+
return "/"
|
| 201 |
+
return "/" + rel.replace("\\", "/")
|
| 202 |
+
except Exception:
|
| 203 |
+
pass
|
| 204 |
+
# Fallback to original absolute path
|
| 205 |
+
return abs_path.replace("\\", "/")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _list_dir(abs_path: str, *, show_hidden: bool, recursive: bool, max_entries: int) -> str:
|
| 209 |
+
lines: list[str] = []
|
| 210 |
+
total = 0
|
| 211 |
+
root_display = "/"
|
| 212 |
+
listing_display = _display_path(abs_path)
|
| 213 |
+
for root, dirs, files in os.walk(abs_path):
|
| 214 |
+
# filter hidden
|
| 215 |
+
if not show_hidden:
|
| 216 |
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
| 217 |
+
files = [f for f in files if not f.startswith('.')]
|
| 218 |
+
try:
|
| 219 |
+
rel_root = os.path.relpath(root, ROOT_DIR)
|
| 220 |
+
except Exception:
|
| 221 |
+
rel_root = root
|
| 222 |
+
rel_root_disp = "/" if rel_root == "." else "/" + rel_root.replace("\\", "/")
|
| 223 |
+
lines.append(f"\n📂 {rel_root_disp}")
|
| 224 |
+
# sort
|
| 225 |
+
dirs.sort()
|
| 226 |
+
files.sort()
|
| 227 |
+
for d in dirs:
|
| 228 |
+
p = os.path.join(root, d)
|
| 229 |
+
try:
|
| 230 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
|
| 231 |
+
except Exception:
|
| 232 |
+
mtime = "?"
|
| 233 |
+
lines.append(f" • [DIR] {d} (modified {mtime})")
|
| 234 |
+
total += 1
|
| 235 |
+
if total >= max_entries:
|
| 236 |
+
lines.append(f"\n… Truncated at {max_entries} entries.")
|
| 237 |
+
return "\n".join(lines).strip()
|
| 238 |
+
for f in files:
|
| 239 |
+
p = os.path.join(root, f)
|
| 240 |
+
try:
|
| 241 |
+
size = _fmt_size(os.path.getsize(p))
|
| 242 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
|
| 243 |
+
except Exception:
|
| 244 |
+
size, mtime = "?", "?"
|
| 245 |
+
lines.append(f" • {f} ({size}, modified {mtime})")
|
| 246 |
+
total += 1
|
| 247 |
+
if total >= max_entries:
|
| 248 |
+
lines.append(f"\n… Truncated at {max_entries} entries.")
|
| 249 |
+
return "\n".join(lines).strip()
|
| 250 |
+
if not recursive:
|
| 251 |
+
break
|
| 252 |
+
header = f"Listing of {listing_display}\nRoot: {root_display}\nEntries: {total}"
|
| 253 |
+
return (header + "\n" + "\n".join(lines)).strip()
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def _search_text(
|
| 257 |
+
abs_path: str,
|
| 258 |
+
query: str,
|
| 259 |
+
*,
|
| 260 |
+
recursive: bool,
|
| 261 |
+
show_hidden: bool,
|
| 262 |
+
max_results: int,
|
| 263 |
+
case_sensitive: bool,
|
| 264 |
+
start_index: int,
|
| 265 |
+
) -> str:
|
| 266 |
+
if not os.path.exists(abs_path):
|
| 267 |
+
return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 268 |
+
|
| 269 |
+
query = query or ""
|
| 270 |
+
normalized_query = query if case_sensitive else query.lower()
|
| 271 |
+
if normalized_query == "":
|
| 272 |
+
return _err(
|
| 273 |
+
"missing_search_query",
|
| 274 |
+
"Search query is required for the search action.",
|
| 275 |
+
hint="Provide text in the Content field to search for.",
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
max_results = max(1, int(max_results) if max_results is not None else 20)
|
| 279 |
+
start_index = max(0, int(start_index) if start_index is not None else 0)
|
| 280 |
+
matches: list[tuple[str, int, str]] = []
|
| 281 |
+
errors: list[str] = []
|
| 282 |
+
files_scanned = 0
|
| 283 |
+
truncated = False
|
| 284 |
+
total_matches = 0
|
| 285 |
+
|
| 286 |
+
def _should_skip(name: str) -> bool:
|
| 287 |
+
return not show_hidden and name.startswith('.')
|
| 288 |
+
|
| 289 |
+
def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
|
| 290 |
+
nonlocal truncated, total_matches
|
| 291 |
+
total_matches += 1
|
| 292 |
+
if total_matches <= start_index:
|
| 293 |
+
return False
|
| 294 |
+
if len(matches) < max_results:
|
| 295 |
+
snippet = line_text.strip()
|
| 296 |
+
if len(snippet) > 200:
|
| 297 |
+
snippet = snippet[:197] + "…"
|
| 298 |
+
matches.append((_display_path(file_path), line_no, snippet))
|
| 299 |
+
return False
|
| 300 |
+
truncated = True
|
| 301 |
+
return True
|
| 302 |
+
|
| 303 |
+
def _search_file(file_path: str) -> bool:
|
| 304 |
+
nonlocal files_scanned
|
| 305 |
+
files_scanned += 1
|
| 306 |
+
try:
|
| 307 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as handle:
|
| 308 |
+
for line_no, line in enumerate(handle, start=1):
|
| 309 |
+
haystack = line if case_sensitive else line.lower()
|
| 310 |
+
if normalized_query in haystack:
|
| 311 |
+
if _handle_match(file_path, line_no, line):
|
| 312 |
+
return True
|
| 313 |
+
except Exception as exc:
|
| 314 |
+
errors.append(f"{_display_path(file_path)} ({_safe_err(exc)})")
|
| 315 |
+
return truncated
|
| 316 |
+
|
| 317 |
+
if os.path.isfile(abs_path):
|
| 318 |
+
_search_file(abs_path)
|
| 319 |
+
else:
|
| 320 |
+
for root, dirs, files in os.walk(abs_path):
|
| 321 |
+
dirs[:] = [d for d in dirs if not _should_skip(d)]
|
| 322 |
+
visible_files = [f for f in files if show_hidden or not f.startswith('.')]
|
| 323 |
+
for name in visible_files:
|
| 324 |
+
file_path = os.path.join(root, name)
|
| 325 |
+
if _search_file(file_path):
|
| 326 |
+
break
|
| 327 |
+
if truncated:
|
| 328 |
+
break
|
| 329 |
+
if not recursive:
|
| 330 |
+
break
|
| 331 |
+
|
| 332 |
+
header_lines = [
|
| 333 |
+
f"Search results for {query!r}",
|
| 334 |
+
f"Scope: {_display_path(abs_path)}",
|
| 335 |
+
f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
|
| 336 |
+
f"Start offset: {start_index}",
|
| 337 |
+
f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
|
| 338 |
+
f"Files scanned: {files_scanned}",
|
| 339 |
+
]
|
| 340 |
+
|
| 341 |
+
next_cursor = start_index + len(matches) if truncated else None
|
| 342 |
+
|
| 343 |
+
if truncated:
|
| 344 |
+
header_lines.append(f"Matches encountered before truncation: {total_matches}")
|
| 345 |
+
header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
|
| 346 |
+
header_lines.append(f"Next cursor: {next_cursor}")
|
| 347 |
+
else:
|
| 348 |
+
header_lines.append(f"Total matches found: {total_matches}")
|
| 349 |
+
header_lines.append("Truncated: no — end of results.")
|
| 350 |
+
header_lines.append("Next cursor: None")
|
| 351 |
+
|
| 352 |
+
if not matches:
|
| 353 |
+
if total_matches > 0 and start_index >= total_matches:
|
| 354 |
+
hint_limit = max(total_matches - 1, 0)
|
| 355 |
+
body_lines = [
|
| 356 |
+
f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
|
| 357 |
+
(f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
|
| 358 |
+
]
|
| 359 |
+
body_lines = [line for line in body_lines if line]
|
| 360 |
+
else:
|
| 361 |
+
body_lines = [
|
| 362 |
+
"No matches found.",
|
| 363 |
+
(f"Total matches encountered: {total_matches}." if total_matches else ""),
|
| 364 |
+
]
|
| 365 |
+
body_lines = [line for line in body_lines if line]
|
| 366 |
+
else:
|
| 367 |
+
body_lines = [f"{idx}. {path}:{line_no}: {text}" for idx, (path, line_no, text) in enumerate(matches, start=1)]
|
| 368 |
+
|
| 369 |
+
if errors:
|
| 370 |
+
shown = errors[:5]
|
| 371 |
+
body_lines.extend(["", "Warnings:"])
|
| 372 |
+
body_lines.extend(shown)
|
| 373 |
+
if len(errors) > len(shown):
|
| 374 |
+
body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
|
| 375 |
+
|
| 376 |
+
return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def _read_file(abs_path: str, *, offset: int, max_chars: int) -> str:
|
| 380 |
+
if not os.path.exists(abs_path):
|
| 381 |
+
return _err("file_not_found", f"File not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 382 |
+
if os.path.isdir(abs_path):
|
| 383 |
+
return _err("is_directory", f"Path is a directory, not a file: {_display_path(abs_path)}", path=_display_path(abs_path), hint="Provide a file path.")
|
| 384 |
+
try:
|
| 385 |
+
with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 386 |
+
data = f.read()
|
| 387 |
+
except Exception as exc:
|
| 388 |
+
return _err("read_failed", "Failed to read file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 389 |
+
total = len(data)
|
| 390 |
+
start = max(0, min(offset, total))
|
| 391 |
+
if max_chars > 0:
|
| 392 |
+
end = min(total, start + max_chars)
|
| 393 |
+
else:
|
| 394 |
+
end = total
|
| 395 |
+
chunk = data[start:end]
|
| 396 |
+
next_cursor = end if end < total else None
|
| 397 |
+
meta = {
|
| 398 |
+
"offset": start,
|
| 399 |
+
"returned": len(chunk),
|
| 400 |
+
"total": total,
|
| 401 |
+
"next_cursor": next_cursor,
|
| 402 |
+
"path": _display_path(abs_path),
|
| 403 |
+
}
|
| 404 |
+
header = (
|
| 405 |
+
f"Reading {_display_path(abs_path)}\n"
|
| 406 |
+
f"Offset {start}, returned {len(chunk)} of {total}."
|
| 407 |
+
+ (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
|
| 408 |
+
)
|
| 409 |
+
sep = "\n\n---\n\n"
|
| 410 |
+
return header + sep + chunk
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def _ensure_parent(abs_path: str, create_dirs: bool) -> None:
|
| 414 |
+
parent = os.path.dirname(abs_path)
|
| 415 |
+
if parent and not os.path.exists(parent):
|
| 416 |
+
if create_dirs:
|
| 417 |
+
os.makedirs(parent, exist_ok=True)
|
| 418 |
+
else:
|
| 419 |
+
raise FileNotFoundError(f"Parent directory does not exist: {_display_path(parent)}")
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def _write_file(abs_path: str, content: str, *, append: bool, create_dirs: bool) -> str:
|
| 423 |
+
try:
|
| 424 |
+
_ensure_parent(abs_path, create_dirs)
|
| 425 |
+
mode = 'a' if append else 'w'
|
| 426 |
+
with open(abs_path, mode, encoding='utf-8') as f:
|
| 427 |
+
f.write(content or "")
|
| 428 |
+
return f"{'Appended to' if append else 'Wrote'} file: {_display_path(abs_path)} (chars={len(content or '')})"
|
| 429 |
+
except Exception as exc:
|
| 430 |
+
return _err("write_failed", "Failed to write file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
def _mkdir(abs_path: str, exist_ok: bool) -> str:
|
| 434 |
+
try:
|
| 435 |
+
os.makedirs(abs_path, exist_ok=exist_ok)
|
| 436 |
+
return f"Created directory: {_display_path(abs_path)}"
|
| 437 |
+
except Exception as exc:
|
| 438 |
+
return _err("mkdir_failed", "Failed to create directory.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def _move_copy(action: str, src: str, dst: str, *, overwrite: bool) -> str:
|
| 442 |
+
try:
|
| 443 |
+
if not os.path.exists(src):
|
| 444 |
+
return _err("source_not_found", f"Source not found: {_display_path(src)}", path=_display_path(src))
|
| 445 |
+
if os.path.isdir(dst):
|
| 446 |
+
# allow moving into an existing directory
|
| 447 |
+
dst_path = os.path.join(dst, os.path.basename(src))
|
| 448 |
+
else:
|
| 449 |
+
dst_path = dst
|
| 450 |
+
if os.path.exists(dst_path):
|
| 451 |
+
if overwrite:
|
| 452 |
+
if os.path.isdir(dst_path):
|
| 453 |
+
shutil.rmtree(dst_path)
|
| 454 |
+
else:
|
| 455 |
+
os.remove(dst_path)
|
| 456 |
+
else:
|
| 457 |
+
return _err(
|
| 458 |
+
"destination_exists",
|
| 459 |
+
f"Destination already exists: {_display_path(dst_path)}",
|
| 460 |
+
path=_display_path(dst_path),
|
| 461 |
+
hint="Set overwrite=True to replace the destination."
|
| 462 |
+
)
|
| 463 |
+
if action == 'move':
|
| 464 |
+
shutil.move(src, dst_path)
|
| 465 |
+
else:
|
| 466 |
+
if os.path.isdir(src):
|
| 467 |
+
shutil.copytree(src, dst_path)
|
| 468 |
+
else:
|
| 469 |
+
shutil.copy2(src, dst_path)
|
| 470 |
+
return f"{action.capitalize()}d: {_display_path(src)} -> {_display_path(dst_path)}"
|
| 471 |
+
except Exception as exc:
|
| 472 |
+
return _err(f"{action}_failed", f"Failed to {action}.", path=_display_path(src), data={"error": _safe_err(exc), "destination": _display_path(dst)})
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def _delete(abs_path: str, *, recursive: bool) -> str:
|
| 476 |
+
try:
|
| 477 |
+
if not os.path.exists(abs_path):
|
| 478 |
+
return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 479 |
+
if os.path.isdir(abs_path):
|
| 480 |
+
if not recursive:
|
| 481 |
+
# Refuse to delete a dir unless recursive=True
|
| 482 |
+
return _err("requires_recursive", "Refusing to delete a directory without recursive=True", path=_display_path(abs_path), hint="Pass recursive=True to delete a directory.")
|
| 483 |
+
shutil.rmtree(abs_path)
|
| 484 |
+
else:
|
| 485 |
+
os.remove(abs_path)
|
| 486 |
+
return f"Deleted: {_display_path(abs_path)}"
|
| 487 |
+
except Exception as exc:
|
| 488 |
+
return _err("delete_failed", "Failed to delete path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
def _info(abs_path: str) -> str:
|
| 492 |
+
try:
|
| 493 |
+
st = os.stat(abs_path)
|
| 494 |
+
except Exception as exc:
|
| 495 |
+
return _err("stat_failed", "Failed to stat path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 496 |
+
info = {
|
| 497 |
+
"path": _display_path(abs_path),
|
| 498 |
+
"type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
|
| 499 |
+
"size": st.st_size,
|
| 500 |
+
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=' ', timespec='seconds'),
|
| 501 |
+
"created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=' ', timespec='seconds'),
|
| 502 |
+
"mode": oct(st.st_mode),
|
| 503 |
+
"root": "/",
|
| 504 |
+
}
|
| 505 |
+
return json.dumps(info, indent=2)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 509 |
+
def File_System(
|
| 510 |
+
action: Annotated[str, "Operation to perform: 'list', 'read', 'write', 'append', 'mkdir', 'move', 'copy', 'delete', 'info', 'search'."],
|
| 511 |
+
path: Annotated[str, "Target path, relative to root unless UNSAFE_ALLOW_ABS_PATHS=1."] = "/",
|
| 512 |
+
content: Annotated[Optional[str], "Content for write/append actions or search query (UTF-8)."] = None,
|
| 513 |
+
dest_path: Annotated[Optional[str], "Destination for move/copy (relative to root unless unsafe absolute allowed)."] = None,
|
| 514 |
+
recursive: Annotated[bool, "For list/search (recurse into subfolders) and delete (required for directories)."] = False,
|
| 515 |
+
show_hidden: Annotated[bool, "Include hidden files (dotfiles) for list/search."] = False,
|
| 516 |
+
max_entries: Annotated[int, "Max entries to list or matches to return (for list/search)."] = 20,
|
| 517 |
+
offset: Annotated[int, "Start offset for reading files (for read)."] = 0,
|
| 518 |
+
max_chars: Annotated[int, "Max characters to return when reading (0 = full file)."] = 4000,
|
| 519 |
+
create_dirs: Annotated[bool, "Create parent directories for write/append if missing."] = True,
|
| 520 |
+
overwrite: Annotated[bool, "Allow overwrite for move/copy destinations."] = False,
|
| 521 |
+
case_sensitive: Annotated[bool, "Match case when searching text."] = False,
|
| 522 |
+
) -> str:
|
| 523 |
+
_log_call_start(
|
| 524 |
+
"File_System",
|
| 525 |
+
action=action,
|
| 526 |
+
path=path,
|
| 527 |
+
dest_path=dest_path,
|
| 528 |
+
recursive=recursive,
|
| 529 |
+
show_hidden=show_hidden,
|
| 530 |
+
max_entries=max_entries,
|
| 531 |
+
offset=offset,
|
| 532 |
+
max_chars=max_chars,
|
| 533 |
+
create_dirs=create_dirs,
|
| 534 |
+
overwrite=overwrite,
|
| 535 |
+
case_sensitive=case_sensitive,
|
| 536 |
+
)
|
| 537 |
+
action = (action or "").strip().lower()
|
| 538 |
+
if action not in {"list", "read", "write", "append", "mkdir", "move", "copy", "delete", "info", "search", "help"}:
|
| 539 |
+
result = _err(
|
| 540 |
+
"invalid_action",
|
| 541 |
+
"Invalid action.",
|
| 542 |
+
hint="Choose from: list, read, write, append, mkdir, move, copy, delete, info, search, help."
|
| 543 |
+
)
|
| 544 |
+
_log_call_end("File_System", _truncate_for_log(result))
|
| 545 |
+
return result
|
| 546 |
+
|
| 547 |
+
abs_path, err = _resolve_path(path)
|
| 548 |
+
if err:
|
| 549 |
+
_log_call_end("File_System", _truncate_for_log(err))
|
| 550 |
+
return err
|
| 551 |
+
|
| 552 |
+
try:
|
| 553 |
+
if action == "help":
|
| 554 |
+
result = HELP_TEXT
|
| 555 |
+
elif action == "list":
|
| 556 |
+
if not os.path.exists(abs_path):
|
| 557 |
+
result = _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 558 |
+
else:
|
| 559 |
+
result = _list_dir(abs_path, show_hidden=show_hidden, recursive=recursive, max_entries=max_entries)
|
| 560 |
+
elif action == "read":
|
| 561 |
+
result = _read_file(abs_path, offset=offset, max_chars=max_chars)
|
| 562 |
+
elif action in {"write", "append"}:
|
| 563 |
+
# Prevent attempts to write to root or any directory
|
| 564 |
+
if _display_path(abs_path) == "/" or os.path.isdir(abs_path):
|
| 565 |
+
result = _err(
|
| 566 |
+
"invalid_write_path",
|
| 567 |
+
"Invalid path for write/append.",
|
| 568 |
+
path=_display_path(abs_path),
|
| 569 |
+
hint="Provide a file path under / (e.g., /notes/todo.txt)."
|
| 570 |
+
)
|
| 571 |
+
else:
|
| 572 |
+
result = _write_file(abs_path, content or "", append=(action == "append"), create_dirs=create_dirs)
|
| 573 |
+
elif action == "mkdir":
|
| 574 |
+
result = _mkdir(abs_path, exist_ok=True)
|
| 575 |
+
elif action in {"move", "copy"}:
|
| 576 |
+
if not dest_path:
|
| 577 |
+
result = _err("missing_dest_path", "dest_path is required for move/copy (ignored for other actions).")
|
| 578 |
+
else:
|
| 579 |
+
abs_dst, err2 = _resolve_path(dest_path)
|
| 580 |
+
if err2:
|
| 581 |
+
result = err2
|
| 582 |
+
else:
|
| 583 |
+
result = _move_copy(action, abs_path, abs_dst, overwrite=overwrite)
|
| 584 |
+
elif action == "delete":
|
| 585 |
+
result = _delete(abs_path, recursive=recursive)
|
| 586 |
+
elif action == "search":
|
| 587 |
+
query_text = content or ""
|
| 588 |
+
if query_text.strip() == "":
|
| 589 |
+
result = _err(
|
| 590 |
+
"missing_search_query",
|
| 591 |
+
"Search query is required for the search action.",
|
| 592 |
+
hint="Provide text in the Content field to search for.",
|
| 593 |
+
)
|
| 594 |
+
else:
|
| 595 |
+
result = _search_text(
|
| 596 |
+
abs_path,
|
| 597 |
+
query_text,
|
| 598 |
+
recursive=recursive,
|
| 599 |
+
show_hidden=show_hidden,
|
| 600 |
+
max_results=max_entries,
|
| 601 |
+
case_sensitive=case_sensitive,
|
| 602 |
+
start_index=offset,
|
| 603 |
+
)
|
| 604 |
+
else: # info
|
| 605 |
+
result = _info(abs_path)
|
| 606 |
+
except Exception as exc:
|
| 607 |
+
result = _err("exception", "Unhandled error during operation.", data={"error": _safe_err(exc)})
|
| 608 |
+
|
| 609 |
+
_log_call_end("File_System", _truncate_for_log(result))
|
| 610 |
+
return result
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def build_interface() -> gr.Interface:
|
| 614 |
+
return gr.Interface(
|
| 615 |
+
fn=File_System,
|
| 616 |
+
inputs=[
|
| 617 |
+
gr.Radio(
|
| 618 |
+
label="Action",
|
| 619 |
+
choices=["list", "read", "write", "append", "mkdir", "move", "copy", "delete", "info", "search", "help"],
|
| 620 |
+
value="help",
|
| 621 |
+
info="Operation to perform",
|
| 622 |
+
),
|
| 623 |
+
gr.Textbox(label="Path", placeholder="/ or /src/file.txt", max_lines=1, value="/", info="Target path (relative to root)"),
|
| 624 |
+
gr.Textbox(label="Content", lines=6, placeholder="Text to write or search for...", info="Content for write/append actions or search query"),
|
| 625 |
+
gr.Textbox(label="Destination", max_lines=1, info="Destination path (Move/Copy only)"),
|
| 626 |
+
gr.Checkbox(label="Recursive", value=False, info="Recurse into subfolders (List/Delete/Search)"),
|
| 627 |
+
gr.Checkbox(label="Show hidden", value=False, info="Include hidden files (List/Search)"),
|
| 628 |
+
gr.Slider(minimum=10, maximum=5000, step=10, value=20, label="Max entries / matches", info="Max entries to list or matches to return (List/Search)"),
|
| 629 |
+
gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Read/Search)"),
|
| 630 |
+
gr.Slider(minimum=0, maximum=100_000, step=500, value=4000, label="Max chars", info="Max characters to return (Read, 0=all)"),
|
| 631 |
+
gr.Checkbox(label="Create parent dirs", value=True, info="Create parent directories if missing (Write)"),
|
| 632 |
+
gr.Checkbox(label="Overwrite destination", value=False, info="Allow overwrite (Move/Copy)"),
|
| 633 |
+
gr.Checkbox(label="Case sensitive search", value=False, info="Match case (Search)"),
|
| 634 |
+
],
|
| 635 |
+
outputs=gr.Textbox(label="Result", lines=20),
|
| 636 |
+
title="File System",
|
| 637 |
+
description=(
|
| 638 |
+
"<div id=\"fs-desc\" style=\"text-align:center; overflow:hidden;\">Browse, search, and interact with a filesystem. "
|
| 639 |
+
"Choose an action and fill optional fields as needed."
|
| 640 |
+
"</div>"
|
| 641 |
+
),
|
| 642 |
+
api_description=TOOL_SUMMARY,
|
| 643 |
+
flagging_mode="never",
|
| 644 |
+
submit_btn="Run",
|
| 645 |
+
)
|
| 646 |
+
|
| 647 |
+
|
| 648 |
+
__all__ = ["File_System", "build_interface"]
|
Modules/Generate_Image.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import uuid
|
| 5 |
+
import random
|
| 6 |
+
from typing import Annotated
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from huggingface_hub import InferenceClient
|
| 11 |
+
from .File_System import ROOT_DIR
|
| 12 |
+
|
| 13 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
from ._docstrings import autodoc
|
| 15 |
+
|
| 16 |
+
HF_API_TOKEN = os.getenv("HF_READ_TOKEN")
|
| 17 |
+
|
| 18 |
+
# Single source of truth for the LLM-facing tool description
|
| 19 |
+
TOOL_SUMMARY = (
|
| 20 |
+
"Generate an image from a text prompt via Hugging Face serverless inference; "
|
| 21 |
+
"tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
|
| 22 |
+
"Return the generated media to the user in this format ``."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@autodoc(
|
| 27 |
+
summary=TOOL_SUMMARY,
|
| 28 |
+
)
|
| 29 |
+
def Generate_Image(
|
| 30 |
+
prompt: Annotated[str, "Text description of the image to generate."],
|
| 31 |
+
model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., black-forest-labs/FLUX.1-Krea-dev)."] = "black-forest-labs/FLUX.1-Krea-dev",
|
| 32 |
+
negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
|
| 33 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 34 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 35 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 36 |
+
),
|
| 37 |
+
steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
|
| 38 |
+
cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
|
| 39 |
+
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
|
| 40 |
+
width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 41 |
+
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 42 |
+
sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
|
| 43 |
+
) -> str:
|
| 44 |
+
_log_call_start(
|
| 45 |
+
"Generate_Image",
|
| 46 |
+
prompt=_truncate_for_log(prompt, 200),
|
| 47 |
+
model_id=model_id,
|
| 48 |
+
steps=steps,
|
| 49 |
+
cfg_scale=cfg_scale,
|
| 50 |
+
seed=seed,
|
| 51 |
+
size=f"{width}x{height}",
|
| 52 |
+
)
|
| 53 |
+
if not prompt or not prompt.strip():
|
| 54 |
+
_log_call_end("Generate_Image", "error=empty prompt")
|
| 55 |
+
raise gr.Error("Please provide a non-empty prompt.")
|
| 56 |
+
enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
|
| 57 |
+
providers = ["auto", "replicate", "fal-ai"]
|
| 58 |
+
last_error: Exception | None = None
|
| 59 |
+
for provider in providers:
|
| 60 |
+
try:
|
| 61 |
+
client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
|
| 62 |
+
image = client.text_to_image(
|
| 63 |
+
prompt=enhanced_prompt,
|
| 64 |
+
negative_prompt=negative_prompt,
|
| 65 |
+
model=model_id,
|
| 66 |
+
width=width,
|
| 67 |
+
height=height,
|
| 68 |
+
num_inference_steps=steps,
|
| 69 |
+
guidance_scale=cfg_scale,
|
| 70 |
+
seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
filename = f"image_{uuid.uuid4().hex[:8]}.png"
|
| 74 |
+
output_path = os.path.join(ROOT_DIR, filename)
|
| 75 |
+
image.save(output_path)
|
| 76 |
+
|
| 77 |
+
_log_call_end("Generate_Image", f"provider={provider} size={image.size} saved_to={filename}")
|
| 78 |
+
return output_path
|
| 79 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 80 |
+
last_error = exc
|
| 81 |
+
continue
|
| 82 |
+
msg = str(last_error) if last_error else "Unknown error"
|
| 83 |
+
lowered = msg.lower()
|
| 84 |
+
if "404" in msg:
|
| 85 |
+
raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
|
| 86 |
+
if "503" in msg:
|
| 87 |
+
raise gr.Error("The model is warming up. Please try again shortly.")
|
| 88 |
+
if "401" in msg or "403" in msg:
|
| 89 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 90 |
+
if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
|
| 91 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 92 |
+
_log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
|
| 93 |
+
raise gr.Error(f"Image generation failed: {msg}")
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def build_interface() -> gr.Interface:
|
| 97 |
+
return gr.Interface(
|
| 98 |
+
fn=Generate_Image,
|
| 99 |
+
inputs=[
|
| 100 |
+
gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2, info="Text description of the image to generate"),
|
| 101 |
+
gr.Textbox(
|
| 102 |
+
label="Model",
|
| 103 |
+
value="black-forest-labs/FLUX.1-Krea-dev",
|
| 104 |
+
placeholder="creator/model-name",
|
| 105 |
+
max_lines=1,
|
| 106 |
+
info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
|
| 107 |
+
),
|
| 108 |
+
gr.Textbox(
|
| 109 |
+
label="Negative Prompt",
|
| 110 |
+
value=(
|
| 111 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 112 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 113 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 114 |
+
),
|
| 115 |
+
lines=2,
|
| 116 |
+
info="What should NOT appear in the image",
|
| 117 |
+
),
|
| 118 |
+
gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps", info="Number of denoising steps (1–100)"),
|
| 119 |
+
gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale", info="Classifier-free guidance scale (1–20)"),
|
| 120 |
+
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
|
| 121 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width", info="Output width in pixels"),
|
| 122 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height", info="Output height in pixels"),
|
| 123 |
+
gr.Radio(
|
| 124 |
+
label="Sampler",
|
| 125 |
+
value="DPM++ 2M Karras",
|
| 126 |
+
choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
|
| 127 |
+
info="Sampling method",
|
| 128 |
+
),
|
| 129 |
+
],
|
| 130 |
+
outputs=gr.Image(label="Generated Image"),
|
| 131 |
+
title="Generate Image",
|
| 132 |
+
description=(
|
| 133 |
+
"<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
|
| 134 |
+
"Default model is FLUX.1-Krea-dev.</div>"
|
| 135 |
+
),
|
| 136 |
+
api_description=TOOL_SUMMARY,
|
| 137 |
+
flagging_mode="never",
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
__all__ = ["Generate_Image", "build_interface"]
|
Modules/Generate_Speech.py
ADDED
|
@@ -0,0 +1,677 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
from contextlib import contextmanager
|
| 7 |
+
from typing import Optional, Annotated
|
| 8 |
+
from unicodedata import normalize
|
| 9 |
+
import re
|
| 10 |
+
import uuid
|
| 11 |
+
import io
|
| 12 |
+
import wave
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import onnxruntime as ort
|
| 16 |
+
import scipy.io.wavfile
|
| 17 |
+
import gradio as gr
|
| 18 |
+
|
| 19 |
+
from .File_System import ROOT_DIR
|
| 20 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 21 |
+
from ._docstrings import autodoc
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
import torch # type: ignore
|
| 25 |
+
except Exception: # pragma: no cover
|
| 26 |
+
torch = None # type: ignore
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
from kokoro import KModel, KPipeline # type: ignore
|
| 30 |
+
except Exception: # pragma: no cover
|
| 31 |
+
KModel = None # type: ignore
|
| 32 |
+
KPipeline = None # type: ignore
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
from huggingface_hub import snapshot_download, list_repo_files
|
| 36 |
+
except ImportError:
|
| 37 |
+
snapshot_download = None
|
| 38 |
+
list_repo_files = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# --- Supertonic Helper Classes & Functions ---
|
| 42 |
+
|
| 43 |
+
class UnicodeProcessor:
|
| 44 |
+
def __init__(self, unicode_indexer_path: str):
|
| 45 |
+
with open(unicode_indexer_path, "r") as f:
|
| 46 |
+
self.indexer = json.load(f)
|
| 47 |
+
|
| 48 |
+
def _preprocess_text(self, text: str) -> str:
|
| 49 |
+
# TODO: add more preprocessing
|
| 50 |
+
text = normalize("NFKD", text)
|
| 51 |
+
return text
|
| 52 |
+
|
| 53 |
+
def _get_text_mask(self, text_ids_lengths: np.ndarray) -> np.ndarray:
|
| 54 |
+
text_mask = length_to_mask(text_ids_lengths)
|
| 55 |
+
return text_mask
|
| 56 |
+
|
| 57 |
+
def _text_to_unicode_values(self, text: str) -> np.ndarray:
|
| 58 |
+
unicode_values = np.array(
|
| 59 |
+
[ord(char) for char in text], dtype=np.uint16
|
| 60 |
+
) # 2 bytes
|
| 61 |
+
return unicode_values
|
| 62 |
+
|
| 63 |
+
def __call__(self, text_list: list[str]) -> tuple[np.ndarray, np.ndarray]:
|
| 64 |
+
text_list = [self._preprocess_text(t) for t in text_list]
|
| 65 |
+
text_ids_lengths = np.array([len(text) for text in text_list], dtype=np.int64)
|
| 66 |
+
text_ids = np.zeros((len(text_list), text_ids_lengths.max()), dtype=np.int64)
|
| 67 |
+
for i, text in enumerate(text_list):
|
| 68 |
+
unicode_vals = self._text_to_unicode_values(text)
|
| 69 |
+
text_ids[i, : len(unicode_vals)] = np.array(
|
| 70 |
+
[self.indexer[val] for val in unicode_vals], dtype=np.int64
|
| 71 |
+
)
|
| 72 |
+
text_mask = self._get_text_mask(text_ids_lengths)
|
| 73 |
+
return text_ids, text_mask
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class Style:
|
| 77 |
+
def __init__(self, style_ttl_onnx: np.ndarray, style_dp_onnx: np.ndarray):
|
| 78 |
+
self.ttl = style_ttl_onnx
|
| 79 |
+
self.dp = style_dp_onnx
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class TextToSpeech:
|
| 83 |
+
def __init__(
|
| 84 |
+
self,
|
| 85 |
+
cfgs: dict,
|
| 86 |
+
text_processor: UnicodeProcessor,
|
| 87 |
+
dp_ort: ort.InferenceSession,
|
| 88 |
+
text_enc_ort: ort.InferenceSession,
|
| 89 |
+
vector_est_ort: ort.InferenceSession,
|
| 90 |
+
vocoder_ort: ort.InferenceSession,
|
| 91 |
+
):
|
| 92 |
+
self.cfgs = cfgs
|
| 93 |
+
self.text_processor = text_processor
|
| 94 |
+
self.dp_ort = dp_ort
|
| 95 |
+
self.text_enc_ort = text_enc_ort
|
| 96 |
+
self.vector_est_ort = vector_est_ort
|
| 97 |
+
self.vocoder_ort = vocoder_ort
|
| 98 |
+
self.sample_rate = cfgs["ae"]["sample_rate"]
|
| 99 |
+
self.base_chunk_size = cfgs["ae"]["base_chunk_size"]
|
| 100 |
+
self.chunk_compress_factor = cfgs["ttl"]["chunk_compress_factor"]
|
| 101 |
+
self.ldim = cfgs["ttl"]["latent_dim"]
|
| 102 |
+
|
| 103 |
+
def sample_noisy_latent(
|
| 104 |
+
self, duration: np.ndarray
|
| 105 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 106 |
+
bsz = len(duration)
|
| 107 |
+
wav_len_max = duration.max() * self.sample_rate
|
| 108 |
+
wav_lengths = (duration * self.sample_rate).astype(np.int64)
|
| 109 |
+
chunk_size = self.base_chunk_size * self.chunk_compress_factor
|
| 110 |
+
latent_len = ((wav_len_max + chunk_size - 1) / chunk_size).astype(np.int32)
|
| 111 |
+
latent_dim = self.ldim * self.chunk_compress_factor
|
| 112 |
+
noisy_latent = np.random.randn(bsz, latent_dim, latent_len).astype(np.float32)
|
| 113 |
+
latent_mask = get_latent_mask(
|
| 114 |
+
wav_lengths, self.base_chunk_size, self.chunk_compress_factor
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
noisy_latent = noisy_latent * latent_mask
|
| 118 |
+
return noisy_latent, latent_mask
|
| 119 |
+
|
| 120 |
+
def _infer(
|
| 121 |
+
self, text_list: list[str], style: Style, total_step: int, speed: float = 1.05
|
| 122 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 123 |
+
assert (
|
| 124 |
+
len(text_list) == style.ttl.shape[0]
|
| 125 |
+
), "Number of texts must match number of style vectors"
|
| 126 |
+
bsz = len(text_list)
|
| 127 |
+
text_ids, text_mask = self.text_processor(text_list)
|
| 128 |
+
dur_onnx, *_ = self.dp_ort.run(
|
| 129 |
+
None, {"text_ids": text_ids, "style_dp": style.dp, "text_mask": text_mask}
|
| 130 |
+
)
|
| 131 |
+
dur_onnx = dur_onnx / speed
|
| 132 |
+
text_emb_onnx, *_ = self.text_enc_ort.run(
|
| 133 |
+
None,
|
| 134 |
+
{"text_ids": text_ids, "style_ttl": style.ttl, "text_mask": text_mask},
|
| 135 |
+
) # dur_onnx: [bsz]
|
| 136 |
+
xt, latent_mask = self.sample_noisy_latent(dur_onnx)
|
| 137 |
+
total_step_np = np.array([total_step] * bsz, dtype=np.float32)
|
| 138 |
+
for step in range(total_step):
|
| 139 |
+
current_step = np.array([step] * bsz, dtype=np.float32)
|
| 140 |
+
xt, *_ = self.vector_est_ort.run(
|
| 141 |
+
None,
|
| 142 |
+
{
|
| 143 |
+
"noisy_latent": xt,
|
| 144 |
+
"text_emb": text_emb_onnx,
|
| 145 |
+
"style_ttl": style.ttl,
|
| 146 |
+
"text_mask": text_mask,
|
| 147 |
+
"latent_mask": latent_mask,
|
| 148 |
+
"current_step": current_step,
|
| 149 |
+
"total_step": total_step_np,
|
| 150 |
+
},
|
| 151 |
+
)
|
| 152 |
+
wav, *_ = self.vocoder_ort.run(None, {"latent": xt})
|
| 153 |
+
return wav, dur_onnx
|
| 154 |
+
|
| 155 |
+
def __call__(
|
| 156 |
+
self,
|
| 157 |
+
text: str,
|
| 158 |
+
style: Style,
|
| 159 |
+
total_step: int,
|
| 160 |
+
speed: float = 1.05,
|
| 161 |
+
silence_duration: float = 0.3,
|
| 162 |
+
max_len: int = 300,
|
| 163 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 164 |
+
assert (
|
| 165 |
+
style.ttl.shape[0] == 1
|
| 166 |
+
), "Single speaker text to speech only supports single style"
|
| 167 |
+
text_list = chunk_text(text, max_len=max_len)
|
| 168 |
+
wav_cat = None
|
| 169 |
+
dur_cat = None
|
| 170 |
+
for text in text_list:
|
| 171 |
+
wav, dur_onnx = self._infer([text], style, total_step, speed)
|
| 172 |
+
if wav_cat is None:
|
| 173 |
+
wav_cat = wav
|
| 174 |
+
dur_cat = dur_onnx
|
| 175 |
+
else:
|
| 176 |
+
silence = np.zeros(
|
| 177 |
+
(1, int(silence_duration * self.sample_rate)), dtype=np.float32
|
| 178 |
+
)
|
| 179 |
+
wav_cat = np.concatenate([wav_cat, silence, wav], axis=1)
|
| 180 |
+
dur_cat += dur_onnx + silence_duration
|
| 181 |
+
return wav_cat, dur_cat
|
| 182 |
+
|
| 183 |
+
def stream(
|
| 184 |
+
self,
|
| 185 |
+
text: str,
|
| 186 |
+
style: Style,
|
| 187 |
+
total_step: int,
|
| 188 |
+
speed: float = 1.05,
|
| 189 |
+
silence_duration: float = 0.3,
|
| 190 |
+
max_len: int = 300,
|
| 191 |
+
):
|
| 192 |
+
assert (
|
| 193 |
+
style.ttl.shape[0] == 1
|
| 194 |
+
), "Single speaker text to speech only supports single style"
|
| 195 |
+
text_list = chunk_text(text, max_len=max_len)
|
| 196 |
+
|
| 197 |
+
for i, text in enumerate(text_list):
|
| 198 |
+
wav, _ = self._infer([text], style, total_step, speed)
|
| 199 |
+
yield wav.flatten()
|
| 200 |
+
|
| 201 |
+
if i < len(text_list) - 1:
|
| 202 |
+
silence = np.zeros(
|
| 203 |
+
(int(silence_duration * self.sample_rate),), dtype=np.float32
|
| 204 |
+
)
|
| 205 |
+
yield silence
|
| 206 |
+
|
| 207 |
+
def batch(
|
| 208 |
+
self, text_list: list[str], style: Style, total_step: int, speed: float = 1.05
|
| 209 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 210 |
+
return self._infer(text_list, style, total_step, speed)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def length_to_mask(lengths: np.ndarray, max_len: Optional[int] = None) -> np.ndarray:
|
| 214 |
+
"""
|
| 215 |
+
Convert lengths to binary mask.
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
lengths: (B,)
|
| 219 |
+
max_len: int
|
| 220 |
+
|
| 221 |
+
Returns:
|
| 222 |
+
mask: (B, 1, max_len)
|
| 223 |
+
"""
|
| 224 |
+
max_len = max_len or lengths.max()
|
| 225 |
+
ids = np.arange(0, max_len)
|
| 226 |
+
mask = (ids < np.expand_dims(lengths, axis=1)).astype(np.float32)
|
| 227 |
+
return mask.reshape(-1, 1, max_len)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def get_latent_mask(
|
| 231 |
+
wav_lengths: np.ndarray, base_chunk_size: int, chunk_compress_factor: int
|
| 232 |
+
) -> np.ndarray:
|
| 233 |
+
latent_size = base_chunk_size * chunk_compress_factor
|
| 234 |
+
latent_lengths = (wav_lengths + latent_size - 1) // latent_size
|
| 235 |
+
latent_mask = length_to_mask(latent_lengths)
|
| 236 |
+
return latent_mask
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def load_onnx(
|
| 240 |
+
onnx_path: str, opts: ort.SessionOptions, providers: list[str]
|
| 241 |
+
) -> ort.InferenceSession:
|
| 242 |
+
return ort.InferenceSession(onnx_path, sess_options=opts, providers=providers)
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def load_onnx_all(
|
| 246 |
+
onnx_dir: str, opts: ort.SessionOptions, providers: list[str]
|
| 247 |
+
) -> tuple[
|
| 248 |
+
ort.InferenceSession,
|
| 249 |
+
ort.InferenceSession,
|
| 250 |
+
ort.InferenceSession,
|
| 251 |
+
ort.InferenceSession,
|
| 252 |
+
]:
|
| 253 |
+
dp_onnx_path = os.path.join(onnx_dir, "duration_predictor.onnx")
|
| 254 |
+
text_enc_onnx_path = os.path.join(onnx_dir, "text_encoder.onnx")
|
| 255 |
+
vector_est_onnx_path = os.path.join(onnx_dir, "vector_estimator.onnx")
|
| 256 |
+
vocoder_onnx_path = os.path.join(onnx_dir, "vocoder.onnx")
|
| 257 |
+
|
| 258 |
+
dp_ort = load_onnx(dp_onnx_path, opts, providers)
|
| 259 |
+
text_enc_ort = load_onnx(text_enc_onnx_path, opts, providers)
|
| 260 |
+
vector_est_ort = load_onnx(vector_est_onnx_path, opts, providers)
|
| 261 |
+
vocoder_ort = load_onnx(vocoder_onnx_path, opts, providers)
|
| 262 |
+
return dp_ort, text_enc_ort, vector_est_ort, vocoder_ort
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def load_cfgs(onnx_dir: str) -> dict:
|
| 266 |
+
cfg_path = os.path.join(onnx_dir, "tts.json")
|
| 267 |
+
with open(cfg_path, "r") as f:
|
| 268 |
+
cfgs = json.load(f)
|
| 269 |
+
return cfgs
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def load_text_processor(onnx_dir: str) -> UnicodeProcessor:
|
| 273 |
+
unicode_indexer_path = os.path.join(onnx_dir, "unicode_indexer.json")
|
| 274 |
+
text_processor = UnicodeProcessor(unicode_indexer_path)
|
| 275 |
+
return text_processor
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def load_text_to_speech(onnx_dir: str, use_gpu: bool = False) -> TextToSpeech:
|
| 279 |
+
opts = ort.SessionOptions()
|
| 280 |
+
if use_gpu:
|
| 281 |
+
raise NotImplementedError("GPU mode is not fully tested")
|
| 282 |
+
else:
|
| 283 |
+
providers = ["CPUExecutionProvider"]
|
| 284 |
+
print("Using CPU for inference")
|
| 285 |
+
cfgs = load_cfgs(onnx_dir)
|
| 286 |
+
dp_ort, text_enc_ort, vector_est_ort, vocoder_ort = load_onnx_all(
|
| 287 |
+
onnx_dir, opts, providers
|
| 288 |
+
)
|
| 289 |
+
text_processor = load_text_processor(onnx_dir)
|
| 290 |
+
return TextToSpeech(
|
| 291 |
+
cfgs, text_processor, dp_ort, text_enc_ort, vector_est_ort, vocoder_ort
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def load_voice_style(voice_style_paths: list[str], verbose: bool = False) -> Style:
|
| 296 |
+
bsz = len(voice_style_paths)
|
| 297 |
+
|
| 298 |
+
# Read first file to get dimensions
|
| 299 |
+
with open(voice_style_paths[0], "r") as f:
|
| 300 |
+
first_style = json.load(f)
|
| 301 |
+
ttl_dims = first_style["style_ttl"]["dims"]
|
| 302 |
+
dp_dims = first_style["style_dp"]["dims"]
|
| 303 |
+
|
| 304 |
+
# Pre-allocate arrays with full batch size
|
| 305 |
+
ttl_style = np.zeros([bsz, ttl_dims[1], ttl_dims[2]], dtype=np.float32)
|
| 306 |
+
dp_style = np.zeros([bsz, dp_dims[1], dp_dims[2]], dtype=np.float32)
|
| 307 |
+
|
| 308 |
+
# Fill in the data
|
| 309 |
+
for i, voice_style_path in enumerate(voice_style_paths):
|
| 310 |
+
with open(voice_style_path, "r") as f:
|
| 311 |
+
voice_style = json.load(f)
|
| 312 |
+
|
| 313 |
+
ttl_data = np.array(
|
| 314 |
+
voice_style["style_ttl"]["data"], dtype=np.float32
|
| 315 |
+
).flatten()
|
| 316 |
+
ttl_style[i] = ttl_data.reshape(ttl_dims[1], ttl_dims[2])
|
| 317 |
+
|
| 318 |
+
dp_data = np.array(
|
| 319 |
+
voice_style["style_dp"]["data"], dtype=np.float32
|
| 320 |
+
).flatten()
|
| 321 |
+
dp_style[i] = dp_data.reshape(dp_dims[1], dp_dims[2])
|
| 322 |
+
|
| 323 |
+
if verbose:
|
| 324 |
+
print(f"Loaded {bsz} voice styles")
|
| 325 |
+
return Style(ttl_style, dp_style)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
@contextmanager
|
| 329 |
+
def timer(name: str):
|
| 330 |
+
start = time.time()
|
| 331 |
+
print(f"{name}...")
|
| 332 |
+
yield
|
| 333 |
+
print(f" -> {name} completed in {time.time() - start:.2f} sec")
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def sanitize_filename(text: str, max_len: int) -> str:
|
| 337 |
+
"""Sanitize filename by replacing non-alphanumeric characters with underscores"""
|
| 338 |
+
prefix = text[:max_len]
|
| 339 |
+
return re.sub(r"[^a-zA-Z0-9]", "_", prefix)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def chunk_text(text: str, max_len: int = 300) -> list[str]:
|
| 343 |
+
"""
|
| 344 |
+
Split text into chunks by paragraphs and sentences.
|
| 345 |
+
|
| 346 |
+
Args:
|
| 347 |
+
text: Input text to chunk
|
| 348 |
+
max_len: Maximum length of each chunk (default: 300)
|
| 349 |
+
|
| 350 |
+
Returns:
|
| 351 |
+
List of text chunks
|
| 352 |
+
"""
|
| 353 |
+
# Split by paragraph (two or more newlines)
|
| 354 |
+
paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", text.strip()) if p.strip()]
|
| 355 |
+
|
| 356 |
+
chunks = []
|
| 357 |
+
|
| 358 |
+
for paragraph in paragraphs:
|
| 359 |
+
paragraph = paragraph.strip()
|
| 360 |
+
if not paragraph:
|
| 361 |
+
continue
|
| 362 |
+
|
| 363 |
+
# Split by sentence boundaries (period, question mark, exclamation mark followed by space)
|
| 364 |
+
# But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F.
|
| 365 |
+
pattern = r"(?<!Mr\.)(?<!Mrs\.)(?<!Ms\.)(?<!Dr\.)(?<!Prof\.)(?<!Sr\.)(?<!Jr\.)(?<!Ph\.D\.)(?<!etc\.)(?<!e\.g\.)(?<!i\.e\.)(?<!vs\.)(?<!Inc\.)(?<!Ltd\.)(?<!Co\.)(?<!Corp\.)(?<!St\.)(?<!Ave\.)(?<!Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+"
|
| 366 |
+
sentences = re.split(pattern, paragraph)
|
| 367 |
+
|
| 368 |
+
current_chunk = ""
|
| 369 |
+
|
| 370 |
+
for sentence in sentences:
|
| 371 |
+
if len(current_chunk) + len(sentence) + 1 <= max_len:
|
| 372 |
+
current_chunk += (" " if current_chunk else "") + sentence
|
| 373 |
+
else:
|
| 374 |
+
if current_chunk:
|
| 375 |
+
chunks.append(current_chunk.strip())
|
| 376 |
+
current_chunk = sentence
|
| 377 |
+
|
| 378 |
+
if current_chunk:
|
| 379 |
+
chunks.append(current_chunk.strip())
|
| 380 |
+
|
| 381 |
+
return chunks
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
# --- Main Tool Logic ---
|
| 385 |
+
|
| 386 |
+
# --- Kokoro State ---
|
| 387 |
+
_KOKORO_STATE = {
|
| 388 |
+
"initialized": False,
|
| 389 |
+
"device": "cpu",
|
| 390 |
+
"model": None,
|
| 391 |
+
"pipelines": {},
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
# --- Supertonic State ---
|
| 395 |
+
_SUPERTONIC_STATE = {
|
| 396 |
+
"initialized": False,
|
| 397 |
+
"tts": None,
|
| 398 |
+
"assets_dir": None,
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
def _audio_np_to_int16(audio_np: np.ndarray) -> np.ndarray:
|
| 402 |
+
audio_clipped = np.clip(audio_np, -1.0, 1.0)
|
| 403 |
+
return (audio_clipped * 32767.0).astype(np.int16)
|
| 404 |
+
|
| 405 |
+
# --- Kokoro Functions ---
|
| 406 |
+
|
| 407 |
+
def get_kokoro_voices() -> list[str]:
|
| 408 |
+
try:
|
| 409 |
+
if list_repo_files:
|
| 410 |
+
files = list_repo_files("hexgrad/Kokoro-82M")
|
| 411 |
+
voice_files = [file for file in files if file.endswith(".pt") and file.startswith("voices/")]
|
| 412 |
+
voices = [file.replace("voices/", "").replace(".pt", "") for file in voice_files]
|
| 413 |
+
return sorted(voices) if voices else _get_fallback_voices()
|
| 414 |
+
return _get_fallback_voices()
|
| 415 |
+
except Exception:
|
| 416 |
+
return _get_fallback_voices()
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def _get_fallback_voices() -> list[str]:
|
| 420 |
+
return [
|
| 421 |
+
"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
|
| 422 |
+
"am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa",
|
| 423 |
+
"bf_alice", "bf_emma", "bf_isabella", "bf_lily",
|
| 424 |
+
"bm_daniel", "bm_fable", "bm_george", "bm_lewis",
|
| 425 |
+
"ef_dora", "em_alex", "em_santa",
|
| 426 |
+
"ff_siwis",
|
| 427 |
+
"hf_alpha", "hf_beta", "hm_omega", "hm_psi",
|
| 428 |
+
"if_sara", "im_nicola",
|
| 429 |
+
"jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo",
|
| 430 |
+
"pf_dora", "pm_alex", "pm_santa",
|
| 431 |
+
"zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi",
|
| 432 |
+
"zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
|
| 433 |
+
]
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def _init_kokoro() -> None:
|
| 437 |
+
if _KOKORO_STATE["initialized"]:
|
| 438 |
+
return
|
| 439 |
+
if KModel is None or KPipeline is None:
|
| 440 |
+
raise RuntimeError("Kokoro is not installed. Please install the 'kokoro' package (>=0.9.4).")
|
| 441 |
+
device = "cpu"
|
| 442 |
+
if torch is not None:
|
| 443 |
+
try:
|
| 444 |
+
if torch.cuda.is_available():
|
| 445 |
+
device = "cuda"
|
| 446 |
+
except Exception:
|
| 447 |
+
device = "cpu"
|
| 448 |
+
model = KModel(repo_id="hexgrad/Kokoro-82M").to(device).eval()
|
| 449 |
+
pipelines = {"a": KPipeline(lang_code="a", model=False, repo_id="hexgrad/Kokoro-82M")}
|
| 450 |
+
try:
|
| 451 |
+
pipelines["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
|
| 452 |
+
except Exception:
|
| 453 |
+
pass
|
| 454 |
+
_KOKORO_STATE.update({"initialized": True, "device": device, "model": model, "pipelines": pipelines})
|
| 455 |
+
|
| 456 |
+
# --- Supertonic Functions ---
|
| 457 |
+
|
| 458 |
+
def _init_supertonic() -> None:
|
| 459 |
+
if _SUPERTONIC_STATE["initialized"]:
|
| 460 |
+
return
|
| 461 |
+
|
| 462 |
+
if snapshot_download is None:
|
| 463 |
+
raise RuntimeError("huggingface_hub is not installed.")
|
| 464 |
+
|
| 465 |
+
# Use a local assets directory within Nymbo-Tools
|
| 466 |
+
# Assuming this file is in Nymbo-Tools/Modules
|
| 467 |
+
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 468 |
+
assets_dir = os.path.join(base_dir, "assets", "supertonic")
|
| 469 |
+
|
| 470 |
+
if not os.path.exists(assets_dir):
|
| 471 |
+
print(f"Downloading Supertonic models to {assets_dir}...")
|
| 472 |
+
snapshot_download(repo_id="Supertone/supertonic", local_dir=assets_dir)
|
| 473 |
+
|
| 474 |
+
onnx_dir = os.path.join(assets_dir, "onnx")
|
| 475 |
+
tts = load_text_to_speech(onnx_dir, use_gpu=False)
|
| 476 |
+
|
| 477 |
+
_SUPERTONIC_STATE.update({"initialized": True, "tts": tts, "assets_dir": assets_dir})
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
def get_supertonic_voices() -> list[str]:
|
| 481 |
+
# We need assets to list voices. If not initialized, try to find them or init.
|
| 482 |
+
if not _SUPERTONIC_STATE["initialized"]:
|
| 483 |
+
# Check if assets exist without full init
|
| 484 |
+
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 485 |
+
assets_dir = os.path.join(base_dir, "assets", "supertonic")
|
| 486 |
+
if not os.path.exists(assets_dir):
|
| 487 |
+
# If we can't list, return a default list or empty
|
| 488 |
+
return ["F1", "F2", "M1", "M2"] # Known defaults
|
| 489 |
+
else:
|
| 490 |
+
assets_dir = _SUPERTONIC_STATE["assets_dir"]
|
| 491 |
+
|
| 492 |
+
voice_styles_dir = os.path.join(assets_dir, "voice_styles")
|
| 493 |
+
if not os.path.exists(voice_styles_dir):
|
| 494 |
+
return ["F1", "F2", "M1", "M2"]
|
| 495 |
+
|
| 496 |
+
files = os.listdir(voice_styles_dir)
|
| 497 |
+
voices = [f.replace('.json', '') for f in files if f.endswith('.json')]
|
| 498 |
+
return sorted(voices)
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def List_Kokoro_Voices() -> list[str]:
|
| 502 |
+
return get_kokoro_voices()
|
| 503 |
+
|
| 504 |
+
def List_Supertonic_Voices() -> list[str]:
|
| 505 |
+
return get_supertonic_voices()
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
# Single source of truth for the LLM-facing tool description
|
| 509 |
+
TOOL_SUMMARY = (
|
| 510 |
+
"Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
|
| 511 |
+
"Supertonic: faster, supports steps/silence/chunking. "
|
| 512 |
+
"Kokoro: slower, supports many languages/accents. "
|
| 513 |
+
"Return the generated media to the user in this format ``."
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
@autodoc(
|
| 518 |
+
summary=TOOL_SUMMARY,
|
| 519 |
+
)
|
| 520 |
+
def Generate_Speech(
|
| 521 |
+
text: Annotated[str, "The text to synthesize (English)."],
|
| 522 |
+
model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
|
| 523 |
+
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
|
| 524 |
+
steps: Annotated[int, "Supertonic only. Diffusion steps (1-50). Higher = better quality but slower."] = 5,
|
| 525 |
+
voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
|
| 526 |
+
silence_duration: Annotated[float, "Supertonic only. Silence duration between chunks (0.0-2.0s)."] = 0.3,
|
| 527 |
+
max_chunk_size: Annotated[int, "Supertonic only. Max text chunk length (50-1000)."] = 300,
|
| 528 |
+
) -> str:
|
| 529 |
+
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
|
| 530 |
+
|
| 531 |
+
if not text or not text.strip():
|
| 532 |
+
try:
|
| 533 |
+
_log_call_end("Generate_Speech", "error=empty text")
|
| 534 |
+
finally:
|
| 535 |
+
pass
|
| 536 |
+
raise gr.Error("Please provide non-empty text to synthesize.")
|
| 537 |
+
|
| 538 |
+
model_lower = model.lower()
|
| 539 |
+
|
| 540 |
+
# Handle default voice switching if user didn't specify appropriate voice for model
|
| 541 |
+
if model_lower == "kokoro" and voice == "F1":
|
| 542 |
+
voice = "af_heart"
|
| 543 |
+
elif model_lower == "supertonic" and voice == "af_heart":
|
| 544 |
+
voice = "F1"
|
| 545 |
+
|
| 546 |
+
try:
|
| 547 |
+
if model_lower == "kokoro":
|
| 548 |
+
return _generate_kokoro(text, speed, voice)
|
| 549 |
+
else:
|
| 550 |
+
# Default to Supertonic
|
| 551 |
+
return _generate_supertonic(text, speed, voice, steps, silence_duration, max_chunk_size)
|
| 552 |
+
|
| 553 |
+
except gr.Error as exc:
|
| 554 |
+
_log_call_end("Generate_Speech", f"gr_error={str(exc)}")
|
| 555 |
+
raise
|
| 556 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 557 |
+
_log_call_end("Generate_Speech", f"error={str(exc)[:120]}")
|
| 558 |
+
raise gr.Error(f"Error during speech generation: {exc}")
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def _generate_kokoro(text: str, speed: float, voice: str) -> str:
|
| 562 |
+
_init_kokoro()
|
| 563 |
+
model = _KOKORO_STATE["model"]
|
| 564 |
+
pipelines = _KOKORO_STATE["pipelines"]
|
| 565 |
+
pipeline = pipelines.get("a")
|
| 566 |
+
if pipeline is None:
|
| 567 |
+
raise gr.Error("Kokoro English pipeline not initialized.")
|
| 568 |
+
|
| 569 |
+
audio_segments = []
|
| 570 |
+
pack = pipeline.load_voice(voice)
|
| 571 |
+
|
| 572 |
+
segments = list(pipeline(text, voice, speed))
|
| 573 |
+
total_segments = len(segments)
|
| 574 |
+
for segment_idx, (text_chunk, ps, _) in enumerate(segments):
|
| 575 |
+
ref_s = pack[len(ps) - 1]
|
| 576 |
+
try:
|
| 577 |
+
audio = model(ps, ref_s, float(speed))
|
| 578 |
+
audio_segments.append(audio.detach().cpu().numpy())
|
| 579 |
+
if total_segments > 10 and (segment_idx + 1) % 5 == 0:
|
| 580 |
+
print(f"Progress: Generated {segment_idx + 1}/{total_segments} segments...")
|
| 581 |
+
except Exception as exc:
|
| 582 |
+
raise gr.Error(f"Error generating audio for segment {segment_idx + 1}: {exc}")
|
| 583 |
+
|
| 584 |
+
if not audio_segments:
|
| 585 |
+
raise gr.Error("No audio was generated (empty synthesis result).")
|
| 586 |
+
|
| 587 |
+
if len(audio_segments) == 1:
|
| 588 |
+
final_audio = audio_segments[0]
|
| 589 |
+
else:
|
| 590 |
+
final_audio = np.concatenate(audio_segments, axis=0)
|
| 591 |
+
if total_segments > 1:
|
| 592 |
+
duration = len(final_audio) / 24_000
|
| 593 |
+
print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
|
| 594 |
+
|
| 595 |
+
# Save to file
|
| 596 |
+
filename = f"speech_kokoro_{uuid.uuid4().hex[:8]}.wav"
|
| 597 |
+
output_path = os.path.join(ROOT_DIR, filename)
|
| 598 |
+
|
| 599 |
+
# Normalize to 16-bit PCM
|
| 600 |
+
audio_int16 = (final_audio * 32767).astype(np.int16)
|
| 601 |
+
scipy.io.wavfile.write(output_path, 24000, audio_int16)
|
| 602 |
+
|
| 603 |
+
_log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/24_000:.2f}")
|
| 604 |
+
return output_path
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
def _generate_supertonic(text: str, speed: float, voice: str, steps: int, silence_duration: float, max_chunk_size: int) -> str:
|
| 608 |
+
_init_supertonic()
|
| 609 |
+
tts = _SUPERTONIC_STATE["tts"]
|
| 610 |
+
assets_dir = _SUPERTONIC_STATE["assets_dir"]
|
| 611 |
+
|
| 612 |
+
voice_path = os.path.join(assets_dir, "voice_styles", f"{voice}.json")
|
| 613 |
+
if not os.path.exists(voice_path):
|
| 614 |
+
# Fallback or error?
|
| 615 |
+
# Try to find if it's just a name mismatch or use default
|
| 616 |
+
if not os.path.exists(voice_path):
|
| 617 |
+
raise gr.Error(f"Voice style {voice} not found for Supertonic.")
|
| 618 |
+
|
| 619 |
+
style = load_voice_style([voice_path])
|
| 620 |
+
|
| 621 |
+
sr = tts.sample_rate
|
| 622 |
+
|
| 623 |
+
# Supertonic returns a generator of chunks, or we can use __call__ for full audio
|
| 624 |
+
# Using __call__ to get full audio for saving
|
| 625 |
+
# But __call__ returns (wav_cat, dur_cat)
|
| 626 |
+
|
| 627 |
+
wav_cat, _ = tts(text, style, steps, speed, silence_duration, max_chunk_size)
|
| 628 |
+
|
| 629 |
+
if wav_cat is None or wav_cat.size == 0:
|
| 630 |
+
raise gr.Error("No audio generated.")
|
| 631 |
+
|
| 632 |
+
# wav_cat is (1, samples) float32
|
| 633 |
+
final_audio = wav_cat.flatten()
|
| 634 |
+
|
| 635 |
+
# Save to file
|
| 636 |
+
filename = f"speech_supertonic_{uuid.uuid4().hex[:8]}.wav"
|
| 637 |
+
output_path = os.path.join(ROOT_DIR, filename)
|
| 638 |
+
|
| 639 |
+
audio_int16 = _audio_np_to_int16(final_audio)
|
| 640 |
+
scipy.io.wavfile.write(output_path, sr, audio_int16)
|
| 641 |
+
|
| 642 |
+
_log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/sr:.2f}")
|
| 643 |
+
return output_path
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
def build_interface() -> gr.Interface:
|
| 647 |
+
kokoro_voices = get_kokoro_voices()
|
| 648 |
+
supertonic_voices = get_supertonic_voices()
|
| 649 |
+
all_voices = sorted(list(set(kokoro_voices + supertonic_voices)))
|
| 650 |
+
|
| 651 |
+
return gr.Interface(
|
| 652 |
+
fn=Generate_Speech,
|
| 653 |
+
inputs=[
|
| 654 |
+
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4, info="The text to synthesize (English)"),
|
| 655 |
+
gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic", info="The TTS model to use"),
|
| 656 |
+
gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed", info="Speech speed multiplier (1.0 = normal)"),
|
| 657 |
+
gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only: Diffusion steps (1-50)"),
|
| 658 |
+
gr.Dropdown(
|
| 659 |
+
label="Voice",
|
| 660 |
+
choices=all_voices,
|
| 661 |
+
value="F1",
|
| 662 |
+
info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
|
| 663 |
+
),
|
| 664 |
+
gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only: Silence duration between chunks"),
|
| 665 |
+
gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only: Max text chunk length"),
|
| 666 |
+
],
|
| 667 |
+
outputs=gr.Audio(label="Audio", type="filepath", format="wav"),
|
| 668 |
+
title="Generate Speech",
|
| 669 |
+
description=(
|
| 670 |
+
"<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>"
|
| 671 |
+
),
|
| 672 |
+
api_description=TOOL_SUMMARY,
|
| 673 |
+
flagging_mode="never",
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
|
| 677 |
+
__all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]
|
Modules/Generate_Video.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import uuid
|
| 5 |
+
import random
|
| 6 |
+
import tempfile
|
| 7 |
+
from typing import Annotated
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from huggingface_hub import InferenceClient
|
| 11 |
+
from .File_System import ROOT_DIR
|
| 12 |
+
|
| 13 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
from ._docstrings import autodoc
|
| 15 |
+
|
| 16 |
+
HF_VIDEO_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
|
| 17 |
+
|
| 18 |
+
# Single source of truth for the LLM-facing tool description
|
| 19 |
+
TOOL_SUMMARY = (
|
| 20 |
+
"Generate a short MP4 video from a text prompt via Hugging Face serverless inference; "
|
| 21 |
+
"control model, steps, guidance, seed, size, fps, and duration; returns a temporary MP4 file path. "
|
| 22 |
+
"Return the generated media to the user in this format ``."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _write_video_tmp(data_iter_or_bytes: object, suffix: str = ".mp4") -> str:
|
| 27 |
+
filename = f"video_{uuid.uuid4().hex[:8]}{suffix}"
|
| 28 |
+
path = os.path.join(ROOT_DIR, filename)
|
| 29 |
+
try:
|
| 30 |
+
with open(path, "wb") as file:
|
| 31 |
+
if isinstance(data_iter_or_bytes, (bytes, bytearray)):
|
| 32 |
+
file.write(data_iter_or_bytes)
|
| 33 |
+
elif hasattr(data_iter_or_bytes, "read"):
|
| 34 |
+
file.write(data_iter_or_bytes.read())
|
| 35 |
+
elif hasattr(data_iter_or_bytes, "content"):
|
| 36 |
+
file.write(data_iter_or_bytes.content) # type: ignore[attr-defined]
|
| 37 |
+
elif hasattr(data_iter_or_bytes, "__iter__") and not isinstance(data_iter_or_bytes, (str, dict)):
|
| 38 |
+
for chunk in data_iter_or_bytes: # type: ignore[assignment]
|
| 39 |
+
if chunk:
|
| 40 |
+
file.write(chunk)
|
| 41 |
+
else:
|
| 42 |
+
raise gr.Error("Unsupported video data type returned by provider.")
|
| 43 |
+
except Exception:
|
| 44 |
+
try:
|
| 45 |
+
os.remove(path)
|
| 46 |
+
except Exception:
|
| 47 |
+
pass
|
| 48 |
+
raise
|
| 49 |
+
return path
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@autodoc(
|
| 53 |
+
summary=TOOL_SUMMARY,
|
| 54 |
+
)
|
| 55 |
+
def Generate_Video(
|
| 56 |
+
prompt: Annotated[str, "Text description of the video to generate (e.g., 'a red fox running through a snowy forest at sunrise')."],
|
| 57 |
+
model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name'. Defaults to akhaliq/sora-2."] = "akhaliq/sora-2",
|
| 58 |
+
negative_prompt: Annotated[str, "What should NOT appear in the video."] = "",
|
| 59 |
+
steps: Annotated[int, "Number of denoising steps (1–100). Higher can improve quality but is slower."] = 25,
|
| 60 |
+
cfg_scale: Annotated[float, "Guidance scale (1–20). Higher = follow the prompt more closely, lower = more creative."] = 3.5,
|
| 61 |
+
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
|
| 62 |
+
width: Annotated[int, "Output width in pixels (multiples of 8 recommended)."] = 768,
|
| 63 |
+
height: Annotated[int, "Output height in pixels (multiples of 8 recommended)."] = 768,
|
| 64 |
+
fps: Annotated[int, "Frames per second of the output video (e.g., 24)."] = 24,
|
| 65 |
+
duration: Annotated[float, "Target duration in seconds. For Sora-2, must be 4, 8, or 12."] = 4.0,
|
| 66 |
+
) -> str:
|
| 67 |
+
_log_call_start(
|
| 68 |
+
"Generate_Video",
|
| 69 |
+
prompt=_truncate_for_log(prompt, 160),
|
| 70 |
+
model_id=model_id,
|
| 71 |
+
steps=steps,
|
| 72 |
+
cfg_scale=cfg_scale,
|
| 73 |
+
fps=fps,
|
| 74 |
+
duration=duration,
|
| 75 |
+
size=f"{width}x{height}",
|
| 76 |
+
)
|
| 77 |
+
if not prompt or not prompt.strip():
|
| 78 |
+
_log_call_end("Generate_Video", "error=empty prompt")
|
| 79 |
+
raise gr.Error("Please provide a non-empty prompt.")
|
| 80 |
+
providers = ["auto", "replicate", "fal-ai"]
|
| 81 |
+
last_error: Exception | None = None
|
| 82 |
+
parameters = {
|
| 83 |
+
"negative_prompt": negative_prompt or None,
|
| 84 |
+
"num_inference_steps": steps,
|
| 85 |
+
"guidance_scale": cfg_scale,
|
| 86 |
+
"seed": seed if seed != -1 else random.randint(1, 1_000_000_000),
|
| 87 |
+
"width": width,
|
| 88 |
+
"height": height,
|
| 89 |
+
"fps": fps,
|
| 90 |
+
"duration": duration,
|
| 91 |
+
}
|
| 92 |
+
for provider in providers:
|
| 93 |
+
try:
|
| 94 |
+
client = InferenceClient(api_key=HF_VIDEO_TOKEN, provider=provider)
|
| 95 |
+
if hasattr(client, "text_to_video"):
|
| 96 |
+
num_frames = int(duration * fps) if duration and fps else None
|
| 97 |
+
extra_body = {}
|
| 98 |
+
if width:
|
| 99 |
+
extra_body["width"] = width
|
| 100 |
+
if height:
|
| 101 |
+
extra_body["height"] = height
|
| 102 |
+
if fps:
|
| 103 |
+
extra_body["fps"] = fps
|
| 104 |
+
if duration:
|
| 105 |
+
extra_body["duration"] = duration
|
| 106 |
+
result = client.text_to_video(
|
| 107 |
+
prompt=prompt,
|
| 108 |
+
model=model_id,
|
| 109 |
+
guidance_scale=cfg_scale,
|
| 110 |
+
negative_prompt=[negative_prompt] if negative_prompt else None,
|
| 111 |
+
num_frames=num_frames,
|
| 112 |
+
num_inference_steps=steps,
|
| 113 |
+
seed=parameters["seed"],
|
| 114 |
+
extra_body=extra_body if extra_body else None,
|
| 115 |
+
)
|
| 116 |
+
else:
|
| 117 |
+
# Fallback for older clients or specific providers if needed, though InferenceClient usually has text_to_video
|
| 118 |
+
# Note: client.post is not available in some versions of InferenceClient
|
| 119 |
+
continue
|
| 120 |
+
|
| 121 |
+
path = _write_video_tmp(result, suffix=".mp4")
|
| 122 |
+
try:
|
| 123 |
+
size = os.path.getsize(path)
|
| 124 |
+
except Exception:
|
| 125 |
+
size = -1
|
| 126 |
+
_log_call_end("Generate_Video", f"provider={provider} path={os.path.basename(path)} bytes={size}")
|
| 127 |
+
return path
|
| 128 |
+
except KeyError as exc:
|
| 129 |
+
# Handle specific provider errors that manifest as KeyError (e.g. fal-ai missing 'video' key on error)
|
| 130 |
+
if "video" in str(exc):
|
| 131 |
+
last_error = ValueError(f"Provider {provider} returned an invalid response. This often happens with invalid parameters (e.g. duration must be 4, 8, or 12 for Sora-2).")
|
| 132 |
+
else:
|
| 133 |
+
last_error = exc
|
| 134 |
+
continue
|
| 135 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 136 |
+
last_error = exc
|
| 137 |
+
continue
|
| 138 |
+
msg = str(last_error) if last_error else "Unknown error"
|
| 139 |
+
lowered = msg.lower()
|
| 140 |
+
if "404" in msg:
|
| 141 |
+
raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and HF token access.")
|
| 142 |
+
if "503" in msg:
|
| 143 |
+
raise gr.Error("The model is warming up. Please try again shortly.")
|
| 144 |
+
if "401" in msg or "403" in msg:
|
| 145 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 146 |
+
if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
|
| 147 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 148 |
+
_log_call_end("Generate_Video", f"error={_truncate_for_log(msg, 200)}")
|
| 149 |
+
raise gr.Error(f"Video generation failed: {msg}")
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def build_interface() -> gr.Interface:
|
| 153 |
+
return gr.Interface(
|
| 154 |
+
fn=Generate_Video,
|
| 155 |
+
inputs=[
|
| 156 |
+
gr.Textbox(label="Prompt", placeholder="Enter a prompt for the video", lines=2, info="Text description of the video to generate"),
|
| 157 |
+
gr.Textbox(
|
| 158 |
+
label="Model",
|
| 159 |
+
value="akhaliq/sora-2",
|
| 160 |
+
placeholder="creator/model-name",
|
| 161 |
+
max_lines=1,
|
| 162 |
+
info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-video&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
|
| 163 |
+
),
|
| 164 |
+
gr.Textbox(label="Negative Prompt", value="", lines=2, info="What should NOT appear in the video"),
|
| 165 |
+
gr.Slider(minimum=1, maximum=100, value=25, step=1, label="Steps", info="Number of denoising steps (1–100)"),
|
| 166 |
+
gr.Slider(minimum=1.0, maximum=20.0, value=3.5, step=0.1, label="CFG Scale", info="Guidance scale (1–20)"),
|
| 167 |
+
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
|
| 168 |
+
gr.Slider(minimum=64, maximum=1920, value=768, step=8, label="Width", info="Output width in pixels"),
|
| 169 |
+
gr.Slider(minimum=64, maximum=1920, value=768, step=8, label="Height", info="Output height in pixels"),
|
| 170 |
+
gr.Slider(minimum=4, maximum=60, value=24, step=1, label="FPS", info="Frames per second"),
|
| 171 |
+
gr.Slider(minimum=1.0, maximum=10.0, value=4.0, step=0.5, label="Duration (s)", info="Target duration in seconds"),
|
| 172 |
+
],
|
| 173 |
+
outputs=gr.Video(label="Generated Video", buttons=["download"], format="mp4"),
|
| 174 |
+
title="Generate Video",
|
| 175 |
+
description=(
|
| 176 |
+
"<div style=\"text-align:center\">Generate short videos via Hugging Face serverless inference. "
|
| 177 |
+
"Default model is Sora-2.</div>"
|
| 178 |
+
),
|
| 179 |
+
api_description=TOOL_SUMMARY,
|
| 180 |
+
flagging_mode="never",
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
__all__ = ["Generate_Video", "build_interface"]
|
Modules/Memory_Manager.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import threading
|
| 6 |
+
import uuid
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Annotated, Dict, List, Literal, Optional
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
from ._docstrings import autodoc
|
| 12 |
+
|
| 13 |
+
_MODULE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 14 |
+
MEMORY_FILE = os.path.join(os.path.dirname(_MODULE_DIR), "memories.json")
|
| 15 |
+
_MEMORY_LOCK = threading.RLock()
|
| 16 |
+
_MAX_MEMORIES = 10_000
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _now_iso() -> str:
|
| 20 |
+
return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _load_memories() -> List[Dict[str, str]]:
|
| 24 |
+
if not os.path.exists(MEMORY_FILE):
|
| 25 |
+
return []
|
| 26 |
+
try:
|
| 27 |
+
with open(MEMORY_FILE, "r", encoding="utf-8") as file:
|
| 28 |
+
data = json.load(file)
|
| 29 |
+
if isinstance(data, list):
|
| 30 |
+
cleaned: List[Dict[str, str]] = []
|
| 31 |
+
for item in data:
|
| 32 |
+
if isinstance(item, dict) and "id" in item and "text" in item:
|
| 33 |
+
cleaned.append(item)
|
| 34 |
+
return cleaned
|
| 35 |
+
return []
|
| 36 |
+
except Exception:
|
| 37 |
+
try:
|
| 38 |
+
backup = MEMORY_FILE + ".corrupt"
|
| 39 |
+
if not os.path.exists(backup):
|
| 40 |
+
os.replace(MEMORY_FILE, backup)
|
| 41 |
+
except Exception:
|
| 42 |
+
pass
|
| 43 |
+
return []
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _save_memories(memories: List[Dict[str, str]]) -> None:
|
| 47 |
+
tmp_path = MEMORY_FILE + ".tmp"
|
| 48 |
+
with open(tmp_path, "w", encoding="utf-8") as file:
|
| 49 |
+
json.dump(memories, file, ensure_ascii=False, indent=2)
|
| 50 |
+
os.replace(tmp_path, MEMORY_FILE)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _mem_save(text: str, tags: str) -> str:
|
| 54 |
+
text_clean = (text or "").strip()
|
| 55 |
+
if not text_clean:
|
| 56 |
+
return "Error: memory text is empty."
|
| 57 |
+
with _MEMORY_LOCK:
|
| 58 |
+
memories = _load_memories()
|
| 59 |
+
if memories and memories[-1].get("text") == text_clean:
|
| 60 |
+
return "Skipped: identical to last stored memory."
|
| 61 |
+
mem_id = str(uuid.uuid4())
|
| 62 |
+
entry = {
|
| 63 |
+
"id": mem_id,
|
| 64 |
+
"text": text_clean,
|
| 65 |
+
"timestamp": _now_iso(),
|
| 66 |
+
"tags": tags.strip(),
|
| 67 |
+
}
|
| 68 |
+
memories.append(entry)
|
| 69 |
+
if len(memories) > _MAX_MEMORIES:
|
| 70 |
+
overflow = len(memories) - _MAX_MEMORIES
|
| 71 |
+
memories = memories[overflow:]
|
| 72 |
+
_save_memories(memories)
|
| 73 |
+
return f"Memory saved: {mem_id}"
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _mem_list(limit: int, include_tags: bool) -> str:
|
| 77 |
+
limit = max(1, min(200, limit))
|
| 78 |
+
with _MEMORY_LOCK:
|
| 79 |
+
memories = _load_memories()
|
| 80 |
+
if not memories:
|
| 81 |
+
return "No memories stored yet."
|
| 82 |
+
chosen = memories[-limit:][::-1]
|
| 83 |
+
lines: List[str] = []
|
| 84 |
+
for memory in chosen:
|
| 85 |
+
base = f"{memory['id'][:8]} [{memory.get('timestamp','?')}] {memory.get('text','')}"
|
| 86 |
+
if include_tags and memory.get("tags"):
|
| 87 |
+
base += f" | tags: {memory['tags']}"
|
| 88 |
+
lines.append(base)
|
| 89 |
+
omitted = len(memories) - len(chosen)
|
| 90 |
+
if omitted > 0:
|
| 91 |
+
lines.append(f"… ({omitted} older memorie{'s' if omitted!=1 else ''} omitted; total={len(memories)})")
|
| 92 |
+
return "\n".join(lines)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _parse_search_query(query: str) -> Dict[str, List[str]]:
|
| 96 |
+
import re
|
| 97 |
+
|
| 98 |
+
result = {"tag_terms": [], "text_terms": [], "operator": "and"}
|
| 99 |
+
if not query or not query.strip():
|
| 100 |
+
return result
|
| 101 |
+
query = re.sub(r"\s+", " ", query.strip())
|
| 102 |
+
if re.search(r"\bOR\b", query, re.IGNORECASE):
|
| 103 |
+
result["operator"] = "or"
|
| 104 |
+
parts = re.split(r"\s+OR\s+", query, flags=re.IGNORECASE)
|
| 105 |
+
else:
|
| 106 |
+
parts = re.split(r"\s+(?:AND\s+)?", query, flags=re.IGNORECASE)
|
| 107 |
+
parts = [p for p in parts if p.strip() and p.strip().upper() != "AND"]
|
| 108 |
+
for part in parts:
|
| 109 |
+
part = part.strip()
|
| 110 |
+
if not part:
|
| 111 |
+
continue
|
| 112 |
+
tag_match = re.match(r"^tag:(.+)$", part, re.IGNORECASE)
|
| 113 |
+
if tag_match:
|
| 114 |
+
tag_name = tag_match.group(1).strip()
|
| 115 |
+
if tag_name:
|
| 116 |
+
result["tag_terms"].append(tag_name.lower())
|
| 117 |
+
else:
|
| 118 |
+
result["text_terms"].append(part.lower())
|
| 119 |
+
return result
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _match_memory_with_query(memory: Dict[str, str], parsed_query: Dict[str, List[str]]) -> bool:
|
| 123 |
+
tag_terms = parsed_query["tag_terms"]
|
| 124 |
+
text_terms = parsed_query["text_terms"]
|
| 125 |
+
operator = parsed_query["operator"]
|
| 126 |
+
if not tag_terms and not text_terms:
|
| 127 |
+
return False
|
| 128 |
+
memory_text = memory.get("text", "").lower()
|
| 129 |
+
memory_tags = memory.get("tags", "").lower()
|
| 130 |
+
memory_tag_list = [tag.strip() for tag in memory_tags.split(",") if tag.strip()]
|
| 131 |
+
tag_matches = [any(tag_term in tag for tag in memory_tag_list) for tag_term in tag_terms]
|
| 132 |
+
combined_text = memory_text + " " + memory_tags
|
| 133 |
+
text_matches = [text_term in combined_text for text_term in text_terms]
|
| 134 |
+
all_matches = tag_matches + text_matches
|
| 135 |
+
if not all_matches:
|
| 136 |
+
return False
|
| 137 |
+
if operator == "or":
|
| 138 |
+
return any(all_matches)
|
| 139 |
+
return all(all_matches)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _mem_search(query: str, limit: int) -> str:
|
| 143 |
+
q = (query or "").strip()
|
| 144 |
+
if not q:
|
| 145 |
+
return "Error: empty query."
|
| 146 |
+
parsed_query = _parse_search_query(q)
|
| 147 |
+
if not parsed_query["tag_terms"] and not parsed_query["text_terms"]:
|
| 148 |
+
return "Error: no valid search terms found."
|
| 149 |
+
limit = max(1, min(200, limit))
|
| 150 |
+
with _MEMORY_LOCK:
|
| 151 |
+
memories = _load_memories()
|
| 152 |
+
matches: List[Dict[str, str]] = []
|
| 153 |
+
total_matches = 0
|
| 154 |
+
for memory in reversed(memories):
|
| 155 |
+
if _match_memory_with_query(memory, parsed_query):
|
| 156 |
+
total_matches += 1
|
| 157 |
+
if len(matches) < limit:
|
| 158 |
+
matches.append(memory)
|
| 159 |
+
if not matches:
|
| 160 |
+
return f"No matches for: {query}"
|
| 161 |
+
lines = [
|
| 162 |
+
f"{memory['id'][:8]} [{memory.get('timestamp','?')}] {memory.get('text','')}" + (f" | tags: {memory['tags']}" if memory.get('tags') else "")
|
| 163 |
+
for memory in matches
|
| 164 |
+
]
|
| 165 |
+
omitted = total_matches - len(matches)
|
| 166 |
+
if omitted > 0:
|
| 167 |
+
lines.append(f"… ({omitted} additional match{'es' if omitted!=1 else ''} omitted; total_matches={total_matches})")
|
| 168 |
+
return "\n".join(lines)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _mem_delete(memory_id: str) -> str:
|
| 172 |
+
key = (memory_id or "").strip().lower()
|
| 173 |
+
if len(key) < 4:
|
| 174 |
+
return "Error: supply at least 4 characters of the id."
|
| 175 |
+
with _MEMORY_LOCK:
|
| 176 |
+
memories = _load_memories()
|
| 177 |
+
matched = [memory for memory in memories if memory["id"].lower().startswith(key)]
|
| 178 |
+
if not matched:
|
| 179 |
+
return "Memory not found."
|
| 180 |
+
if len(matched) > 1 and key != matched[0]["id"].lower():
|
| 181 |
+
sample = ", ".join(memory["id"][:8] for memory in matched[:5])
|
| 182 |
+
more = "…" if len(matched) > 5 else ""
|
| 183 |
+
return f"Ambiguous prefix (matches {len(matched)} ids: {sample}{more}). Provide more characters."
|
| 184 |
+
target_id = matched[0]["id"]
|
| 185 |
+
memories = [memory for memory in memories if memory["id"] != target_id]
|
| 186 |
+
_save_memories(memories)
|
| 187 |
+
return f"Deleted memory: {target_id}"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# Single source of truth for the LLM-facing tool description
|
| 191 |
+
TOOL_SUMMARY = (
|
| 192 |
+
"Manage short text memories (save, list, search, delete) in a local JSON store with tags and simple query language; "
|
| 193 |
+
"returns a result string (confirmation, listing, matches, or error)."
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
@autodoc(
|
| 198 |
+
summary=TOOL_SUMMARY,
|
| 199 |
+
)
|
| 200 |
+
def Memory_Manager(
|
| 201 |
+
action: Annotated[Literal["save", "list", "search", "delete"], "Action to perform: save | list | search | delete"],
|
| 202 |
+
text: Annotated[Optional[str], "Text content (Save only)"] = None,
|
| 203 |
+
tags: Annotated[Optional[str], "Comma-separated tags (Save only)"] = None,
|
| 204 |
+
query: Annotated[Optional[str], "Enhanced search with tag:name syntax, AND/OR operators (Search only)"] = None,
|
| 205 |
+
limit: Annotated[int, "Max results (List/Search only)"] = 20,
|
| 206 |
+
memory_id: Annotated[Optional[str], "Full UUID or unique prefix (Delete only)"] = None,
|
| 207 |
+
include_tags: Annotated[bool, "Include tags (List/Search only)"] = True,
|
| 208 |
+
) -> str:
|
| 209 |
+
act = (action or "").lower().strip()
|
| 210 |
+
text = text or ""
|
| 211 |
+
tags = tags or ""
|
| 212 |
+
query = query or ""
|
| 213 |
+
memory_id = memory_id or ""
|
| 214 |
+
if act == "save":
|
| 215 |
+
if not text.strip():
|
| 216 |
+
return "Error: 'text' is required when action=save."
|
| 217 |
+
return _mem_save(text=text, tags=tags)
|
| 218 |
+
if act == "list":
|
| 219 |
+
return _mem_list(limit=limit, include_tags=include_tags)
|
| 220 |
+
if act == "search":
|
| 221 |
+
if not query.strip():
|
| 222 |
+
return "Error: 'query' is required when action=search."
|
| 223 |
+
return _mem_search(query=query, limit=limit)
|
| 224 |
+
if act == "delete":
|
| 225 |
+
if not memory_id.strip():
|
| 226 |
+
return "Error: 'memory_id' is required when action=delete."
|
| 227 |
+
return _mem_delete(memory_id=memory_id)
|
| 228 |
+
return "Error: invalid action (use save|list|search|delete)."
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def build_interface() -> gr.Interface:
|
| 232 |
+
return gr.Interface(
|
| 233 |
+
fn=Memory_Manager,
|
| 234 |
+
inputs=[
|
| 235 |
+
gr.Radio(label="Action", choices=["save", "list", "search", "delete"], value="list", info="Action to perform"),
|
| 236 |
+
gr.Textbox(label="Text", lines=3, info="Memory text (Save only)"),
|
| 237 |
+
gr.Textbox(label="Tags", placeholder="tag1, tag2", max_lines=1, info="Comma-separated tags (Save only)"),
|
| 238 |
+
gr.Textbox(label="Query", placeholder="tag:work AND tag:project OR meeting", max_lines=1, info="Search query (Search only)"),
|
| 239 |
+
gr.Slider(1, 200, value=20, step=1, label="Limit", info="Max results (List/Search only)"),
|
| 240 |
+
gr.Textbox(label="Memory ID / Prefix", max_lines=1, info="UUID or prefix (Delete only)"),
|
| 241 |
+
gr.Checkbox(value=True, label="Include Tags", info="Include tags in output (List/Search only)"),
|
| 242 |
+
],
|
| 243 |
+
outputs=gr.Textbox(label="Result", lines=14),
|
| 244 |
+
title="Memory Manager",
|
| 245 |
+
description=(
|
| 246 |
+
"<div style=\"text-align:center\">Lightweight local JSON memory store (no external DB). Choose an Action, fill only the relevant fields, and run.</div>"
|
| 247 |
+
),
|
| 248 |
+
api_description=TOOL_SUMMARY,
|
| 249 |
+
flagging_mode="never",
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
__all__ = ["Memory_Manager", "build_interface", "_load_memories", "_save_memories"]
|
Modules/Obsidian_Vault.py
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
import stat
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Annotated, Optional
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 13 |
+
from ._docstrings import autodoc
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
TOOL_SUMMARY = (
|
| 17 |
+
"Browse and search the Obsidian vault in read-only mode. "
|
| 18 |
+
"Actions: list, read, info, search, help. "
|
| 19 |
+
"All paths resolve within the vault root. Start paths with '/' (e.g., /Notes)."
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
HELP_TEXT = (
|
| 23 |
+
"Obsidian Vault — actions and usage\n\n"
|
| 24 |
+
"Root: Tools/Obsidian (override with OBSIDIAN_VAULT_ROOT). "
|
| 25 |
+
"Start paths with '/' to reference the vault root (e.g., /Projects/note.md). "
|
| 26 |
+
"Absolute paths are disabled unless UNSAFE_ALLOW_ABS_PATHS=1.\n\n"
|
| 27 |
+
"Actions and fields:\n"
|
| 28 |
+
"- list: path='/' (default), recursive=false, show_hidden=false, max_entries=20\n"
|
| 29 |
+
"- read: path (e.g., /Projects/note.md), offset=0, max_chars=4000 (shows next_cursor when truncated)\n"
|
| 30 |
+
"- info: path\n"
|
| 31 |
+
"- search: path (note or folder), query text in the Search field, recursive=false, show_hidden=false, max_entries=20, case_sensitive=false, offset=0\n"
|
| 32 |
+
"- help: show this guide\n\n"
|
| 33 |
+
"Errors are returned as JSON with fields: {status:'error', code, message, path?, hint?, data?}.\n\n"
|
| 34 |
+
"Examples:\n"
|
| 35 |
+
"- list current: action=list, path='/'\n"
|
| 36 |
+
"- read note: action=read, path='/Projects/note.md', max_chars=500\n"
|
| 37 |
+
"- show metadata: action=info, path='/Inbox'\n"
|
| 38 |
+
"- search notes: action=search, path='/Projects', query='deadline', recursive=true, max_entries=100\n"
|
| 39 |
+
"- case-sensitive search: action=search, query='TODO', case_sensitive=true\n"
|
| 40 |
+
"- page search results: action=search, query='TODO', offset=20\n"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _default_root() -> str:
|
| 45 |
+
env_root = os.getenv("OBSIDIAN_VAULT_ROOT")
|
| 46 |
+
if env_root and env_root.strip():
|
| 47 |
+
return os.path.abspath(os.path.expanduser(env_root.strip()))
|
| 48 |
+
try:
|
| 49 |
+
here = os.path.abspath(__file__)
|
| 50 |
+
tools_dir = os.path.dirname(os.path.dirname(here))
|
| 51 |
+
return os.path.abspath(os.path.join(tools_dir, "Obsidian"))
|
| 52 |
+
except Exception:
|
| 53 |
+
return os.path.abspath(os.getcwd())
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
ROOT_DIR = _default_root()
|
| 57 |
+
try:
|
| 58 |
+
os.makedirs(ROOT_DIR, exist_ok=True)
|
| 59 |
+
except Exception:
|
| 60 |
+
pass
|
| 61 |
+
ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _safe_err(exc: Exception | str) -> str:
|
| 65 |
+
"""Return an error string with any absolute root replaced by '/' and slashes normalized."""
|
| 66 |
+
s = str(exc)
|
| 67 |
+
s_norm = s.replace("\\", "/")
|
| 68 |
+
root_fwd = ROOT_DIR.replace("\\", "/")
|
| 69 |
+
root_variants = {ROOT_DIR, root_fwd, re.sub(r"/+", "/", root_fwd)}
|
| 70 |
+
for variant in root_variants:
|
| 71 |
+
if variant:
|
| 72 |
+
s_norm = s_norm.replace(variant, "/")
|
| 73 |
+
s_norm = re.sub(r"/+", "/", s_norm)
|
| 74 |
+
return s_norm
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _err(code: str, message: str, *, path: str | None = None, hint: str | None = None, data: dict | None = None) -> str:
|
| 78 |
+
payload = {
|
| 79 |
+
"status": "error",
|
| 80 |
+
"code": code,
|
| 81 |
+
"message": message,
|
| 82 |
+
"root": "/",
|
| 83 |
+
}
|
| 84 |
+
if path:
|
| 85 |
+
payload["path"] = path
|
| 86 |
+
if hint:
|
| 87 |
+
payload["hint"] = hint
|
| 88 |
+
if data:
|
| 89 |
+
payload["data"] = data
|
| 90 |
+
return json.dumps(payload, ensure_ascii=False)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _display_path(abs_path: str) -> str:
|
| 94 |
+
try:
|
| 95 |
+
norm_root = os.path.normpath(ROOT_DIR)
|
| 96 |
+
norm_abs = os.path.normpath(abs_path)
|
| 97 |
+
common = os.path.commonpath([norm_root, norm_abs])
|
| 98 |
+
if os.path.normcase(common) == os.path.normcase(norm_root):
|
| 99 |
+
rel = os.path.relpath(norm_abs, norm_root)
|
| 100 |
+
if rel == ".":
|
| 101 |
+
return "/"
|
| 102 |
+
return "/" + rel.replace("\\", "/")
|
| 103 |
+
except Exception:
|
| 104 |
+
pass
|
| 105 |
+
return abs_path.replace("\\", "/")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _resolve_path(path: str) -> tuple[str, str]:
|
| 109 |
+
try:
|
| 110 |
+
user_input = (path or "/").strip() or "/"
|
| 111 |
+
if user_input.startswith("/"):
|
| 112 |
+
rel_part = user_input.lstrip("/") or "."
|
| 113 |
+
raw = os.path.expanduser(rel_part)
|
| 114 |
+
treat_as_relative = True
|
| 115 |
+
else:
|
| 116 |
+
raw = os.path.expanduser(user_input)
|
| 117 |
+
treat_as_relative = False
|
| 118 |
+
|
| 119 |
+
if not treat_as_relative and os.path.isabs(raw):
|
| 120 |
+
if not ALLOW_ABS:
|
| 121 |
+
return "", _err(
|
| 122 |
+
"absolute_path_disabled",
|
| 123 |
+
"Absolute paths are disabled in safe mode.",
|
| 124 |
+
path=raw.replace("\\", "/"),
|
| 125 |
+
hint="Use a path relative to / (e.g., /Notes/index.md).",
|
| 126 |
+
)
|
| 127 |
+
abs_path = os.path.abspath(raw)
|
| 128 |
+
else:
|
| 129 |
+
abs_path = os.path.abspath(os.path.join(ROOT_DIR, raw))
|
| 130 |
+
if not ALLOW_ABS:
|
| 131 |
+
try:
|
| 132 |
+
common = os.path.commonpath([os.path.normpath(ROOT_DIR), os.path.normpath(abs_path)])
|
| 133 |
+
except Exception:
|
| 134 |
+
root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
|
| 135 |
+
abs_cmp = os.path.normcase(os.path.normpath(abs_path))
|
| 136 |
+
if not abs_cmp.startswith(root_cmp):
|
| 137 |
+
return "", _err(
|
| 138 |
+
"path_outside_root",
|
| 139 |
+
"Path not allowed outside root.",
|
| 140 |
+
path=user_input.replace("\\", "/"),
|
| 141 |
+
hint="Use a path under / (the vault root).",
|
| 142 |
+
)
|
| 143 |
+
else:
|
| 144 |
+
root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
|
| 145 |
+
common_cmp = os.path.normcase(os.path.normpath(common))
|
| 146 |
+
if common_cmp != root_cmp:
|
| 147 |
+
return "", _err(
|
| 148 |
+
"path_outside_root",
|
| 149 |
+
"Path not allowed outside root.",
|
| 150 |
+
path=user_input.replace("\\", "/"),
|
| 151 |
+
hint="Use a path under / (the vault root).",
|
| 152 |
+
)
|
| 153 |
+
return abs_path, ""
|
| 154 |
+
except Exception as exc:
|
| 155 |
+
return "", _err(
|
| 156 |
+
"resolve_path_failed",
|
| 157 |
+
"Failed to resolve path.",
|
| 158 |
+
path=(path or ""),
|
| 159 |
+
data={"error": _safe_err(exc)},
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _fmt_size(num_bytes: int) -> str:
|
| 164 |
+
units = ["B", "KB", "MB", "GB", "TB"]
|
| 165 |
+
size = float(num_bytes)
|
| 166 |
+
for unit in units:
|
| 167 |
+
if size < 1024.0:
|
| 168 |
+
return f"{size:.1f} {unit}"
|
| 169 |
+
size /= 1024.0
|
| 170 |
+
return f"{size:.1f} PB"
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def _list_dir(abs_path: str, *, show_hidden: bool, recursive: bool, max_entries: int) -> str:
|
| 174 |
+
lines: list[str] = []
|
| 175 |
+
total = 0
|
| 176 |
+
listing_display = _display_path(abs_path)
|
| 177 |
+
for root, dirs, files in os.walk(abs_path):
|
| 178 |
+
if not show_hidden:
|
| 179 |
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
| 180 |
+
files = [f for f in files if not f.startswith('.')]
|
| 181 |
+
try:
|
| 182 |
+
rel_root = os.path.relpath(root, ROOT_DIR)
|
| 183 |
+
except Exception:
|
| 184 |
+
rel_root = root
|
| 185 |
+
rel_root_disp = "/" if rel_root == "." else "/" + rel_root.replace("\\", "/")
|
| 186 |
+
lines.append(f"\n📂 {rel_root_disp}")
|
| 187 |
+
dirs.sort()
|
| 188 |
+
files.sort()
|
| 189 |
+
for d in dirs:
|
| 190 |
+
p = os.path.join(root, d)
|
| 191 |
+
try:
|
| 192 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
|
| 193 |
+
except Exception:
|
| 194 |
+
mtime = "?"
|
| 195 |
+
lines.append(f" • [DIR] {d} (modified {mtime})")
|
| 196 |
+
total += 1
|
| 197 |
+
if total >= max_entries:
|
| 198 |
+
lines.append(f"\n… Truncated at {max_entries} entries.")
|
| 199 |
+
return "\n".join(lines).strip()
|
| 200 |
+
for f in files:
|
| 201 |
+
p = os.path.join(root, f)
|
| 202 |
+
try:
|
| 203 |
+
size = _fmt_size(os.path.getsize(p))
|
| 204 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
|
| 205 |
+
except Exception:
|
| 206 |
+
size, mtime = "?", "?"
|
| 207 |
+
lines.append(f" • {f} ({size}, modified {mtime})")
|
| 208 |
+
total += 1
|
| 209 |
+
if total >= max_entries:
|
| 210 |
+
lines.append(f"\n… Truncated at {max_entries} entries.")
|
| 211 |
+
return "\n".join(lines).strip()
|
| 212 |
+
if not recursive:
|
| 213 |
+
break
|
| 214 |
+
header = f"Listing of {listing_display}\nRoot: /\nEntries: {total}"
|
| 215 |
+
return (header + "\n" + "\n".join(lines)).strip()
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def _search_text(
|
| 219 |
+
abs_path: str,
|
| 220 |
+
query: str,
|
| 221 |
+
*,
|
| 222 |
+
recursive: bool,
|
| 223 |
+
show_hidden: bool,
|
| 224 |
+
max_results: int,
|
| 225 |
+
case_sensitive: bool,
|
| 226 |
+
start_index: int,
|
| 227 |
+
) -> str:
|
| 228 |
+
if not os.path.exists(abs_path):
|
| 229 |
+
return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 230 |
+
|
| 231 |
+
query = query or ""
|
| 232 |
+
normalized_query = query if case_sensitive else query.lower()
|
| 233 |
+
if normalized_query == "":
|
| 234 |
+
return _err(
|
| 235 |
+
"missing_search_query",
|
| 236 |
+
"Search query is required for the search action.",
|
| 237 |
+
hint="Provide text in the Search field to look for.",
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
max_results = max(1, int(max_results) if max_results is not None else 20)
|
| 241 |
+
start_index = max(0, int(start_index) if start_index is not None else 0)
|
| 242 |
+
matches: list[tuple[str, int, str]] = []
|
| 243 |
+
errors: list[str] = []
|
| 244 |
+
files_scanned = 0
|
| 245 |
+
truncated = False
|
| 246 |
+
total_matches = 0
|
| 247 |
+
|
| 248 |
+
def _should_skip(name: str) -> bool:
|
| 249 |
+
return not show_hidden and name.startswith('.')
|
| 250 |
+
|
| 251 |
+
def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
|
| 252 |
+
nonlocal truncated, total_matches
|
| 253 |
+
total_matches += 1
|
| 254 |
+
if total_matches <= start_index:
|
| 255 |
+
return False
|
| 256 |
+
if len(matches) < max_results:
|
| 257 |
+
snippet = line_text.strip()
|
| 258 |
+
if len(snippet) > 200:
|
| 259 |
+
snippet = snippet[:197] + "…"
|
| 260 |
+
matches.append((_display_path(file_path), line_no, snippet))
|
| 261 |
+
return False
|
| 262 |
+
truncated = True
|
| 263 |
+
return True
|
| 264 |
+
|
| 265 |
+
def _search_file(file_path: str) -> bool:
|
| 266 |
+
nonlocal files_scanned
|
| 267 |
+
files_scanned += 1
|
| 268 |
+
try:
|
| 269 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as handle:
|
| 270 |
+
for line_no, line in enumerate(handle, start=1):
|
| 271 |
+
haystack = line if case_sensitive else line.lower()
|
| 272 |
+
if normalized_query in haystack:
|
| 273 |
+
if _handle_match(file_path, line_no, line):
|
| 274 |
+
return True
|
| 275 |
+
except Exception as exc:
|
| 276 |
+
errors.append(f"{_display_path(file_path)} ({_safe_err(exc)})")
|
| 277 |
+
return truncated
|
| 278 |
+
|
| 279 |
+
if os.path.isfile(abs_path):
|
| 280 |
+
_search_file(abs_path)
|
| 281 |
+
else:
|
| 282 |
+
for root, dirs, files in os.walk(abs_path):
|
| 283 |
+
dirs[:] = [d for d in dirs if not _should_skip(d)]
|
| 284 |
+
visible_files = [f for f in files if show_hidden or not f.startswith('.')]
|
| 285 |
+
for name in visible_files:
|
| 286 |
+
file_path = os.path.join(root, name)
|
| 287 |
+
if _search_file(file_path):
|
| 288 |
+
break
|
| 289 |
+
if truncated:
|
| 290 |
+
break
|
| 291 |
+
if not recursive:
|
| 292 |
+
break
|
| 293 |
+
|
| 294 |
+
header_lines = [
|
| 295 |
+
f"Search results for {query!r}",
|
| 296 |
+
f"Scope: {_display_path(abs_path)}",
|
| 297 |
+
f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
|
| 298 |
+
f"Start offset: {start_index}",
|
| 299 |
+
f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
|
| 300 |
+
f"Files scanned: {files_scanned}",
|
| 301 |
+
]
|
| 302 |
+
|
| 303 |
+
next_cursor = start_index + len(matches) if truncated else None
|
| 304 |
+
|
| 305 |
+
if truncated:
|
| 306 |
+
header_lines.append(f"Matches encountered before truncation: {total_matches}")
|
| 307 |
+
header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
|
| 308 |
+
header_lines.append(f"Next cursor: {next_cursor}")
|
| 309 |
+
else:
|
| 310 |
+
header_lines.append(f"Total matches found: {total_matches}")
|
| 311 |
+
header_lines.append("Truncated: no — end of results.")
|
| 312 |
+
header_lines.append("Next cursor: None")
|
| 313 |
+
|
| 314 |
+
if not matches:
|
| 315 |
+
if total_matches > 0 and start_index >= total_matches:
|
| 316 |
+
hint_limit = max(total_matches - 1, 0)
|
| 317 |
+
body_lines = [
|
| 318 |
+
f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
|
| 319 |
+
(f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
|
| 320 |
+
]
|
| 321 |
+
body_lines = [line for line in body_lines if line]
|
| 322 |
+
else:
|
| 323 |
+
body_lines = [
|
| 324 |
+
"No matches found.",
|
| 325 |
+
(f"Total matches encountered: {total_matches}." if total_matches else ""),
|
| 326 |
+
]
|
| 327 |
+
body_lines = [line for line in body_lines if line]
|
| 328 |
+
else:
|
| 329 |
+
body_lines = [f"{idx}. {path}:{line_no}: {text}" for idx, (path, line_no, text) in enumerate(matches, start=1)]
|
| 330 |
+
|
| 331 |
+
if errors:
|
| 332 |
+
shown = errors[:5]
|
| 333 |
+
body_lines.extend(["", "Warnings:"])
|
| 334 |
+
body_lines.extend(shown)
|
| 335 |
+
if len(errors) > len(shown):
|
| 336 |
+
body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
|
| 337 |
+
|
| 338 |
+
return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def _read_file(abs_path: str, *, offset: int, max_chars: int) -> str:
|
| 342 |
+
if not os.path.exists(abs_path):
|
| 343 |
+
return _err("file_not_found", f"File not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 344 |
+
if os.path.isdir(abs_path):
|
| 345 |
+
return _err(
|
| 346 |
+
"is_directory",
|
| 347 |
+
f"Path is a directory, not a file: {_display_path(abs_path)}",
|
| 348 |
+
path=_display_path(abs_path),
|
| 349 |
+
hint="Provide a file path.",
|
| 350 |
+
)
|
| 351 |
+
try:
|
| 352 |
+
with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 353 |
+
data = f.read()
|
| 354 |
+
except Exception as exc:
|
| 355 |
+
return _err("read_failed", "Failed to read file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 356 |
+
total = len(data)
|
| 357 |
+
start = max(0, min(offset, total))
|
| 358 |
+
end = total if max_chars <= 0 else min(total, start + max_chars)
|
| 359 |
+
chunk = data[start:end]
|
| 360 |
+
next_cursor = end if end < total else None
|
| 361 |
+
header = (
|
| 362 |
+
f"Reading {_display_path(abs_path)}\n"
|
| 363 |
+
f"Offset {start}, returned {len(chunk)} of {total}."
|
| 364 |
+
+ (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
|
| 365 |
+
)
|
| 366 |
+
return header + "\n\n---\n\n" + chunk
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def _info(abs_path: str) -> str:
|
| 370 |
+
try:
|
| 371 |
+
st = os.stat(abs_path)
|
| 372 |
+
except Exception as exc:
|
| 373 |
+
return _err("stat_failed", "Failed to stat path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
|
| 374 |
+
info = {
|
| 375 |
+
"path": _display_path(abs_path),
|
| 376 |
+
"type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
|
| 377 |
+
"size": st.st_size,
|
| 378 |
+
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=' ', timespec='seconds'),
|
| 379 |
+
"created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=' ', timespec='seconds'),
|
| 380 |
+
"mode": oct(st.st_mode),
|
| 381 |
+
"root": "/",
|
| 382 |
+
}
|
| 383 |
+
return json.dumps(info, indent=2)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 387 |
+
def Obsidian_Vault(
|
| 388 |
+
action: Annotated[str, "Operation to perform: 'list', 'read', 'info', 'search', 'help'."],
|
| 389 |
+
path: Annotated[str, "Target path, relative to the vault root." ] = "/",
|
| 390 |
+
query: Annotated[Optional[str], "Text to search for when action=search."] = None,
|
| 391 |
+
recursive: Annotated[bool, "Recurse into subfolders when listing/searching."] = False,
|
| 392 |
+
show_hidden: Annotated[bool, "Include hidden files when listing/searching."] = False,
|
| 393 |
+
max_entries: Annotated[int, "Max entries to list or matches to return (for list/search)."] = 20,
|
| 394 |
+
offset: Annotated[int, "Start offset when reading files."] = 0,
|
| 395 |
+
max_chars: Annotated[int, "Max characters to return when reading (0 = full file)."] = 4000,
|
| 396 |
+
case_sensitive: Annotated[bool, "Match case when searching text."] = False,
|
| 397 |
+
) -> str:
|
| 398 |
+
_log_call_start(
|
| 399 |
+
"Obsidian_Vault",
|
| 400 |
+
action=action,
|
| 401 |
+
path=path,
|
| 402 |
+
query=query,
|
| 403 |
+
recursive=recursive,
|
| 404 |
+
show_hidden=show_hidden,
|
| 405 |
+
max_entries=max_entries,
|
| 406 |
+
offset=offset,
|
| 407 |
+
max_chars=max_chars,
|
| 408 |
+
case_sensitive=case_sensitive,
|
| 409 |
+
)
|
| 410 |
+
action = (action or "").strip().lower()
|
| 411 |
+
if action not in {"list", "read", "info", "search", "help"}:
|
| 412 |
+
result = _err(
|
| 413 |
+
"invalid_action",
|
| 414 |
+
"Invalid action.",
|
| 415 |
+
hint="Choose from: list, read, info, search, help.",
|
| 416 |
+
)
|
| 417 |
+
_log_call_end("Obsidian_Vault", _truncate_for_log(result))
|
| 418 |
+
return result
|
| 419 |
+
|
| 420 |
+
if action == "help":
|
| 421 |
+
result = HELP_TEXT
|
| 422 |
+
_log_call_end("Obsidian_Vault", _truncate_for_log(result))
|
| 423 |
+
return result
|
| 424 |
+
|
| 425 |
+
abs_path, err = _resolve_path(path)
|
| 426 |
+
if err:
|
| 427 |
+
_log_call_end("Obsidian_Vault", _truncate_for_log(err))
|
| 428 |
+
return err
|
| 429 |
+
|
| 430 |
+
try:
|
| 431 |
+
if action == "list":
|
| 432 |
+
if not os.path.exists(abs_path):
|
| 433 |
+
result = _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
|
| 434 |
+
else:
|
| 435 |
+
result = _list_dir(abs_path, show_hidden=show_hidden, recursive=recursive, max_entries=max_entries)
|
| 436 |
+
elif action == "read":
|
| 437 |
+
result = _read_file(abs_path, offset=offset, max_chars=max_chars)
|
| 438 |
+
elif action == "search":
|
| 439 |
+
query_text = query or ""
|
| 440 |
+
if query_text.strip() == "":
|
| 441 |
+
result = _err(
|
| 442 |
+
"missing_search_query",
|
| 443 |
+
"Search query is required for the search action.",
|
| 444 |
+
hint="Provide text in the Search field to look for.",
|
| 445 |
+
)
|
| 446 |
+
else:
|
| 447 |
+
result = _search_text(
|
| 448 |
+
abs_path,
|
| 449 |
+
query_text,
|
| 450 |
+
recursive=recursive,
|
| 451 |
+
show_hidden=show_hidden,
|
| 452 |
+
max_results=max_entries,
|
| 453 |
+
case_sensitive=case_sensitive,
|
| 454 |
+
start_index=offset,
|
| 455 |
+
)
|
| 456 |
+
else: # info
|
| 457 |
+
result = _info(abs_path)
|
| 458 |
+
except Exception as exc:
|
| 459 |
+
result = _err("exception", "Unhandled error during operation.", data={"error": _safe_err(exc)})
|
| 460 |
+
|
| 461 |
+
_log_call_end("Obsidian_Vault", _truncate_for_log(result))
|
| 462 |
+
return result
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def build_interface() -> gr.Interface:
|
| 466 |
+
return gr.Interface(
|
| 467 |
+
fn=Obsidian_Vault,
|
| 468 |
+
inputs=[
|
| 469 |
+
gr.Radio(
|
| 470 |
+
label="Action",
|
| 471 |
+
choices=["list", "read", "info", "search", "help"],
|
| 472 |
+
value="help",
|
| 473 |
+
info="Operation to perform",
|
| 474 |
+
),
|
| 475 |
+
gr.Textbox(label="Path", placeholder="/ or /Notes/todo.md", max_lines=1, value="/", info="Target path (relative to vault root)"),
|
| 476 |
+
gr.Textbox(label="Search text", lines=3, placeholder="Text to search for...", info="Text to search for (Search only)"),
|
| 477 |
+
gr.Checkbox(label="Recursive", value=False, info="Recurse into subfolders (List/Search)"),
|
| 478 |
+
gr.Checkbox(label="Show hidden", value=False, info="Include hidden files (List/Search)"),
|
| 479 |
+
gr.Slider(minimum=10, maximum=5000, step=10, value=20, label="Max entries / matches", info="Max entries to list or matches to return (List/Search)"),
|
| 480 |
+
gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Read/Search)"),
|
| 481 |
+
gr.Slider(minimum=0, maximum=100_000, step=500, value=4000, label="Max chars", info="Max characters to return (Read, 0=all)"),
|
| 482 |
+
gr.Checkbox(label="Case sensitive search", value=False, info="Match case (Search)"),
|
| 483 |
+
],
|
| 484 |
+
outputs=gr.Textbox(label="Result", lines=20),
|
| 485 |
+
title="Obsidian Vault",
|
| 486 |
+
description=(
|
| 487 |
+
"<div style=\"text-align:center; overflow:hidden;\">Explore and search notes in the vault without modifying them." "</div>"
|
| 488 |
+
),
|
| 489 |
+
api_description=TOOL_SUMMARY,
|
| 490 |
+
flagging_mode="never",
|
| 491 |
+
submit_btn="Run",
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
__all__ = ["Obsidian_Vault", "build_interface"]
|
Modules/Shell_Command.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import platform
|
| 5 |
+
import shlex
|
| 6 |
+
import subprocess
|
| 7 |
+
from typing import Annotated
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 12 |
+
from ._docstrings import autodoc
|
| 13 |
+
from .File_System import _resolve_path, ROOT_DIR, _display_path
|
| 14 |
+
import shutil
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _detect_shell(prefer_powershell: bool = True) -> tuple[list[str], str]:
|
| 18 |
+
"""
|
| 19 |
+
Pick an appropriate shell for the host OS.
|
| 20 |
+
- Windows: use PowerShell by default, fall back to cmd.exe.
|
| 21 |
+
- POSIX: use /bin/bash if available, else /bin/sh.
|
| 22 |
+
Returns (shell_cmd_prefix, shell_name) where shell_cmd_prefix is the command list to launch the shell.
|
| 23 |
+
"""
|
| 24 |
+
system = platform.system().lower()
|
| 25 |
+
if system == "windows":
|
| 26 |
+
if prefer_powershell:
|
| 27 |
+
pwsh = shutil.which("pwsh")
|
| 28 |
+
candidates = [pwsh, shutil.which("powershell"), shutil.which("powershell.exe")]
|
| 29 |
+
for cand in candidates:
|
| 30 |
+
if cand:
|
| 31 |
+
return [cand, "-NoLogo", "-NoProfile", "-Command"], "powershell"
|
| 32 |
+
# Fallback to cmd
|
| 33 |
+
comspec = os.environ.get("ComSpec", r"C:\\Windows\\System32\\cmd.exe")
|
| 34 |
+
return [comspec, "/C"], "cmd"
|
| 35 |
+
# POSIX
|
| 36 |
+
bash = shutil.which("bash")
|
| 37 |
+
if bash:
|
| 38 |
+
return [bash, "-lc"], "bash"
|
| 39 |
+
sh = os.environ.get("SHELL", "/bin/sh")
|
| 40 |
+
return [sh, "-lc"], "sh"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# Detect shell at import time for docs/UI purposes
|
| 44 |
+
_DETECTED_SHELL_PREFIX, _DETECTED_SHELL_NAME = _detect_shell()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Clarify path semantics and expose detected shell in summary
|
| 48 |
+
TOOL_SUMMARY = (
|
| 49 |
+
"Execute a shell command within a safe working directory under the tool root ('/'). "
|
| 50 |
+
"Paths must be relative to '/'. "
|
| 51 |
+
"Set workdir to '.' to use the root. "
|
| 52 |
+
"Absolute paths are disabled."
|
| 53 |
+
f"Detected shell: {_DETECTED_SHELL_NAME}."
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _run_command(command: str, cwd: str, timeout: int) -> tuple[str, str, int]:
|
| 58 |
+
shell_prefix, shell_name = _detect_shell()
|
| 59 |
+
full_cmd = shell_prefix + [command]
|
| 60 |
+
try:
|
| 61 |
+
proc = subprocess.run(
|
| 62 |
+
full_cmd,
|
| 63 |
+
cwd=cwd,
|
| 64 |
+
stdout=subprocess.PIPE,
|
| 65 |
+
stderr=subprocess.PIPE,
|
| 66 |
+
text=True,
|
| 67 |
+
encoding="utf-8",
|
| 68 |
+
errors="replace",
|
| 69 |
+
timeout=timeout if timeout and timeout > 0 else None,
|
| 70 |
+
)
|
| 71 |
+
return proc.stdout, proc.stderr, proc.returncode
|
| 72 |
+
except subprocess.TimeoutExpired as exc:
|
| 73 |
+
return exc.stdout or "", (exc.stderr or "") + "\n[timeout]", 124
|
| 74 |
+
except Exception as exc:
|
| 75 |
+
return "", f"Execution failed: {exc}", 1
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 79 |
+
def Shell_Command(
|
| 80 |
+
command: Annotated[str, "Shell command to execute. Accepts multi-part pipelines as a single string."],
|
| 81 |
+
workdir: Annotated[str, "Working directory (relative to root unless UNSAFE_ALLOW_ABS_PATHS=1)."] = ".",
|
| 82 |
+
timeout: Annotated[int, "Timeout in seconds (0 = no timeout, be careful on public hosting)."] = 60,
|
| 83 |
+
) -> str:
|
| 84 |
+
_log_call_start("Shell_Command", command=command, workdir=workdir, timeout=timeout)
|
| 85 |
+
if not command or not command.strip():
|
| 86 |
+
result = "No command provided."
|
| 87 |
+
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 88 |
+
return result
|
| 89 |
+
|
| 90 |
+
abs_cwd, err = _resolve_path(workdir)
|
| 91 |
+
if err:
|
| 92 |
+
_log_call_end("Shell_Command", _truncate_for_log(err))
|
| 93 |
+
return err
|
| 94 |
+
if not os.path.exists(abs_cwd):
|
| 95 |
+
result = f"Working directory not found: {abs_cwd}"
|
| 96 |
+
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 97 |
+
return result
|
| 98 |
+
|
| 99 |
+
# Capture shell used for transparency
|
| 100 |
+
_, shell_name = _detect_shell()
|
| 101 |
+
stdout, stderr, code = _run_command(command, cwd=abs_cwd, timeout=timeout)
|
| 102 |
+
display_cwd = _display_path(abs_cwd)
|
| 103 |
+
header = (
|
| 104 |
+
f"Command: {command}\n"
|
| 105 |
+
f"CWD: {display_cwd}\n"
|
| 106 |
+
f"Root: /\n"
|
| 107 |
+
f"Shell: {shell_name}\n"
|
| 108 |
+
f"Exit code: {code}\n"
|
| 109 |
+
f"--- STDOUT ---\n"
|
| 110 |
+
)
|
| 111 |
+
output = header + (stdout or "<empty>") + "\n--- STDERR ---\n" + (stderr or "<empty>")
|
| 112 |
+
_log_call_end("Shell_Command", _truncate_for_log(f"exit={code} stdout={len(stdout)} stderr={len(stderr)}"))
|
| 113 |
+
return output
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def build_interface() -> gr.Interface:
|
| 117 |
+
return gr.Interface(
|
| 118 |
+
fn=Shell_Command,
|
| 119 |
+
inputs=[
|
| 120 |
+
gr.Textbox(label="Command", placeholder="echo hello || dir", lines=2, info="Shell command to execute"),
|
| 121 |
+
gr.Textbox(label="Workdir", value=".", max_lines=1, info="Working directory (relative to root)"),
|
| 122 |
+
gr.Slider(minimum=0, maximum=600, step=5, value=60, label="Timeout (seconds)", info="Timeout in seconds (0 = no timeout)"),
|
| 123 |
+
],
|
| 124 |
+
outputs=gr.Textbox(label="Output", lines=20),
|
| 125 |
+
title="Shell Command",
|
| 126 |
+
description=(
|
| 127 |
+
"<div style=\"text-align:center; overflow:hidden;\">"
|
| 128 |
+
"Run a shell command under the same safe root as File System. "
|
| 129 |
+
"Absolute paths are disabled, use relative paths. "
|
| 130 |
+
f"Detected shell: {_DETECTED_SHELL_NAME}. "
|
| 131 |
+
"</div>"
|
| 132 |
+
),
|
| 133 |
+
api_description=TOOL_SUMMARY,
|
| 134 |
+
flagging_mode="never",
|
| 135 |
+
submit_btn="Run",
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
__all__ = ["Shell_Command", "build_interface"]
|
Modules/Web_Fetch.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Annotated, Dict, Literal, Tuple
|
| 5 |
+
from urllib.parse import urlparse, urljoin
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import requests
|
| 9 |
+
from bs4 import BeautifulSoup
|
| 10 |
+
from markdownify import markdownify as md
|
| 11 |
+
from readability import Document
|
| 12 |
+
|
| 13 |
+
from app import _fetch_rate_limiter, _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
from ._docstrings import autodoc
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Single source of truth for the LLM-facing tool description
|
| 18 |
+
TOOL_SUMMARY = (
|
| 19 |
+
"Fetch a webpage and return clean Markdown, raw HTML, or a list of links, with max length and pagination via "
|
| 20 |
+
"offset; if truncated, the output includes a notice with next_cursor for exact continuation."
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
ModeOption = Literal["markdown", "html", "url_scraper"]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
|
| 27 |
+
headers = {
|
| 28 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
| 29 |
+
"Accept-Language": "en-US,en;q=0.9",
|
| 30 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
| 31 |
+
"Accept-Encoding": "gzip, deflate, br",
|
| 32 |
+
"DNT": "1",
|
| 33 |
+
"Connection": "keep-alive",
|
| 34 |
+
"Upgrade-Insecure-Requests": "1",
|
| 35 |
+
}
|
| 36 |
+
if not skip_rate_limit:
|
| 37 |
+
_fetch_rate_limiter.acquire()
|
| 38 |
+
try:
|
| 39 |
+
response = requests.get(
|
| 40 |
+
url,
|
| 41 |
+
headers=headers,
|
| 42 |
+
timeout=timeout,
|
| 43 |
+
allow_redirects=True,
|
| 44 |
+
stream=False,
|
| 45 |
+
)
|
| 46 |
+
response.raise_for_status()
|
| 47 |
+
return response
|
| 48 |
+
except requests.exceptions.Timeout as exc:
|
| 49 |
+
raise requests.exceptions.RequestException("Request timed out. The webpage took too long to respond.") from exc
|
| 50 |
+
except requests.exceptions.ConnectionError as exc:
|
| 51 |
+
raise requests.exceptions.RequestException("Connection error. Please check the URL and your internet connection.") from exc
|
| 52 |
+
except requests.exceptions.HTTPError as exc:
|
| 53 |
+
if response.status_code == 403:
|
| 54 |
+
raise requests.exceptions.RequestException("Access forbidden. The website may be blocking automated requests.") from exc
|
| 55 |
+
if response.status_code == 404:
|
| 56 |
+
raise requests.exceptions.RequestException("Page not found. Please check the URL.") from exc
|
| 57 |
+
if response.status_code == 429:
|
| 58 |
+
raise requests.exceptions.RequestException("Rate limited. Please try again in a few minutes.") from exc
|
| 59 |
+
raise requests.exceptions.RequestException(f"HTTP error {response.status_code}: {exc}") from exc
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _normalize_whitespace(text: str) -> str:
|
| 63 |
+
text = re.sub(r"[ \t\u00A0]+", " ", text)
|
| 64 |
+
text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text.strip())
|
| 65 |
+
return text.strip()
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _truncate(text: str, max_chars: int) -> Tuple[str, bool]:
|
| 69 |
+
if max_chars is None or max_chars <= 0 or len(text) <= max_chars:
|
| 70 |
+
return text, False
|
| 71 |
+
return text[:max_chars].rstrip() + " …", True
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _shorten(text: str, limit: int) -> str:
|
| 75 |
+
if limit <= 0 or len(text) <= limit:
|
| 76 |
+
return text
|
| 77 |
+
return text[: max(0, limit - 1)].rstrip() + "…"
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _domain_of(url: str) -> str:
|
| 81 |
+
try:
|
| 82 |
+
return urlparse(url).netloc or ""
|
| 83 |
+
except Exception:
|
| 84 |
+
return ""
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _normalize_mode(mode: str | None) -> ModeOption:
|
| 88 |
+
"""Convert UI-supplied labels into canonical mode values."""
|
| 89 |
+
if not mode:
|
| 90 |
+
return "markdown"
|
| 91 |
+
normalized = mode.strip().lower()
|
| 92 |
+
normalized = normalized.replace("-", "_").replace(" ", "_")
|
| 93 |
+
if normalized in {"markdown", "markdown_mode", "md"}:
|
| 94 |
+
return "markdown"
|
| 95 |
+
if normalized in {"html", "html_mode"}:
|
| 96 |
+
return "html"
|
| 97 |
+
if normalized in {"url_scraper", "urlscraper", "url_mode", "scraper", "links", "link_mode"}:
|
| 98 |
+
return "url_scraper"
|
| 99 |
+
return "markdown"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _extract_links_from_soup(soup: BeautifulSoup, base_url: str) -> str:
|
| 103 |
+
links = []
|
| 104 |
+
for link in soup.find_all("a", href=True):
|
| 105 |
+
href = link.get("href")
|
| 106 |
+
text = link.get_text(strip=True)
|
| 107 |
+
if href.startswith("http"):
|
| 108 |
+
full_url = href
|
| 109 |
+
elif href.startswith("//"):
|
| 110 |
+
full_url = "https:" + href
|
| 111 |
+
elif href.startswith("/"):
|
| 112 |
+
full_url = urljoin(base_url, href)
|
| 113 |
+
else:
|
| 114 |
+
full_url = urljoin(base_url, href)
|
| 115 |
+
if text and href not in ["#", "javascript:void(0)"]:
|
| 116 |
+
links.append(f"- [{text}]({full_url})")
|
| 117 |
+
if not links:
|
| 118 |
+
return "No links found on this page."
|
| 119 |
+
title = soup.find("title")
|
| 120 |
+
title_text = title.get_text(strip=True) if title else "Links from webpage"
|
| 121 |
+
return f"# {title_text}\n\n" + "\n".join(links)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str, strip_selectors: str = "") -> str:
|
| 125 |
+
if strip_selectors:
|
| 126 |
+
selectors = [s.strip() for s in strip_selectors.split(",") if s.strip()]
|
| 127 |
+
for selector in selectors:
|
| 128 |
+
try:
|
| 129 |
+
for element in full_soup.select(selector):
|
| 130 |
+
element.decompose()
|
| 131 |
+
except Exception:
|
| 132 |
+
continue
|
| 133 |
+
for element in full_soup.select("script, style, nav, footer, header, aside"):
|
| 134 |
+
element.decompose()
|
| 135 |
+
main = (
|
| 136 |
+
full_soup.find("main")
|
| 137 |
+
or full_soup.find("article")
|
| 138 |
+
or full_soup.find("div", class_=re.compile(r"content|main|post|article", re.I))
|
| 139 |
+
or full_soup.find("body")
|
| 140 |
+
)
|
| 141 |
+
if not main:
|
| 142 |
+
return "No main content found on the webpage."
|
| 143 |
+
markdown_text = md(str(main), heading_style="ATX")
|
| 144 |
+
markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text)
|
| 145 |
+
markdown_text = re.sub(r"\[\s*\]\([^)]*\)", "", markdown_text)
|
| 146 |
+
markdown_text = re.sub(r"[ \t]+", " ", markdown_text)
|
| 147 |
+
markdown_text = markdown_text.strip()
|
| 148 |
+
title = full_soup.find("title")
|
| 149 |
+
if title and title.get_text(strip=True):
|
| 150 |
+
markdown_text = f"# {title.get_text(strip=True)}\n\n{markdown_text}"
|
| 151 |
+
return markdown_text or "No content could be extracted."
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _truncate_with_notice(content: str, max_chars: int) -> Tuple[str, Dict[str, object]]:
|
| 155 |
+
total_chars = len(content)
|
| 156 |
+
if total_chars <= max_chars:
|
| 157 |
+
return content, {
|
| 158 |
+
"truncated": False,
|
| 159 |
+
"returned_chars": total_chars,
|
| 160 |
+
"total_chars_estimate": total_chars,
|
| 161 |
+
"next_cursor": None,
|
| 162 |
+
}
|
| 163 |
+
truncated = content[:max_chars]
|
| 164 |
+
last_paragraph = truncated.rfind("\n\n")
|
| 165 |
+
if last_paragraph > max_chars * 0.7:
|
| 166 |
+
truncated = truncated[:last_paragraph]
|
| 167 |
+
cursor_pos = last_paragraph
|
| 168 |
+
elif "." in truncated[-100:]:
|
| 169 |
+
last_period = truncated.rfind(".")
|
| 170 |
+
if last_period > max_chars * 0.8:
|
| 171 |
+
truncated = truncated[: last_period + 1]
|
| 172 |
+
cursor_pos = last_period + 1
|
| 173 |
+
else:
|
| 174 |
+
cursor_pos = len(truncated)
|
| 175 |
+
else:
|
| 176 |
+
cursor_pos = len(truncated)
|
| 177 |
+
metadata = {
|
| 178 |
+
"truncated": True,
|
| 179 |
+
"returned_chars": len(truncated),
|
| 180 |
+
"total_chars_estimate": total_chars,
|
| 181 |
+
"next_cursor": cursor_pos,
|
| 182 |
+
}
|
| 183 |
+
truncated = truncated.rstrip()
|
| 184 |
+
truncation_notice = (
|
| 185 |
+
"\n\n---\n"
|
| 186 |
+
f"**Content Truncated:** Showing {metadata['returned_chars']:,} of {metadata['total_chars_estimate']:,} characters "
|
| 187 |
+
f"({(metadata['returned_chars']/metadata['total_chars_estimate']*100):.1f}%)\n"
|
| 188 |
+
f"**Next cursor:** {metadata['next_cursor']} (use this value with offset parameter for continuation)\n"
|
| 189 |
+
"---"
|
| 190 |
+
)
|
| 191 |
+
return truncated + truncation_notice, metadata
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 195 |
+
def Web_Fetch(
|
| 196 |
+
url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
|
| 197 |
+
max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
|
| 198 |
+
offset: Annotated[int, "Character offset to start from (for pagination, use next_cursor from previous call)."] = 0,
|
| 199 |
+
strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
|
| 200 |
+
mode: Annotated[
|
| 201 |
+
str,
|
| 202 |
+
"Output mode: 'markdown' (default, clean content), 'html' (raw response), or 'url_scraper' (links list).",
|
| 203 |
+
] = "markdown",
|
| 204 |
+
) -> str:
|
| 205 |
+
canonical_mode = _normalize_mode(mode)
|
| 206 |
+
_log_call_start(
|
| 207 |
+
"Web_Fetch",
|
| 208 |
+
url=url,
|
| 209 |
+
max_chars=max_chars,
|
| 210 |
+
strip_selectors=strip_selectors,
|
| 211 |
+
mode=canonical_mode,
|
| 212 |
+
offset=offset,
|
| 213 |
+
)
|
| 214 |
+
if not url or not url.strip():
|
| 215 |
+
result = "Please enter a valid URL."
|
| 216 |
+
_log_call_end("Web_Fetch", _truncate_for_log(result))
|
| 217 |
+
return result
|
| 218 |
+
try:
|
| 219 |
+
resp = _http_get_enhanced(url)
|
| 220 |
+
resp.raise_for_status()
|
| 221 |
+
except requests.exceptions.RequestException as exc:
|
| 222 |
+
result = f"An error occurred: {exc}"
|
| 223 |
+
_log_call_end("Web_Fetch", _truncate_for_log(result))
|
| 224 |
+
return result
|
| 225 |
+
final_url = str(resp.url)
|
| 226 |
+
ctype = resp.headers.get("Content-Type", "")
|
| 227 |
+
if "html" not in ctype.lower():
|
| 228 |
+
result = f"Unsupported content type for extraction: {ctype or 'unknown'}"
|
| 229 |
+
_log_call_end("Web_Fetch", _truncate_for_log(result))
|
| 230 |
+
return result
|
| 231 |
+
resp.encoding = resp.encoding or resp.apparent_encoding
|
| 232 |
+
html = resp.text
|
| 233 |
+
full_soup = BeautifulSoup(html, "lxml")
|
| 234 |
+
if canonical_mode == "html":
|
| 235 |
+
_log_call_end("Web_Fetch", f"chars={len(html)}, mode={canonical_mode}, offset=0 (ignored)")
|
| 236 |
+
return html
|
| 237 |
+
if canonical_mode == "markdown":
|
| 238 |
+
full_result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
|
| 239 |
+
elif canonical_mode == "url_scraper":
|
| 240 |
+
full_result = _extract_links_from_soup(full_soup, final_url)
|
| 241 |
+
else:
|
| 242 |
+
full_result = html
|
| 243 |
+
|
| 244 |
+
if offset > 0:
|
| 245 |
+
if offset >= len(full_result):
|
| 246 |
+
result = (
|
| 247 |
+
f"Offset {offset} exceeds content length ({len(full_result)} characters). "
|
| 248 |
+
f"Content ends at position {len(full_result)}."
|
| 249 |
+
)
|
| 250 |
+
_log_call_end("Web_Fetch", _truncate_for_log(result))
|
| 251 |
+
return result
|
| 252 |
+
result = full_result[offset:]
|
| 253 |
+
else:
|
| 254 |
+
result = full_result
|
| 255 |
+
|
| 256 |
+
if max_chars > 0 and len(result) > max_chars:
|
| 257 |
+
result, metadata = _truncate_with_notice(result, max_chars)
|
| 258 |
+
if offset > 0:
|
| 259 |
+
metadata["total_chars_estimate"] = len(full_result)
|
| 260 |
+
metadata["next_cursor"] = offset + metadata["next_cursor"] if metadata["next_cursor"] else None
|
| 261 |
+
|
| 262 |
+
_log_call_end("Web_Fetch", f"chars={len(result)}, mode={canonical_mode}, offset={offset}")
|
| 263 |
+
return result
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def build_interface() -> gr.Interface:
|
| 267 |
+
return gr.Interface(
|
| 268 |
+
fn=Web_Fetch,
|
| 269 |
+
inputs=[
|
| 270 |
+
gr.Textbox(label="URL", placeholder="https://example.com/article", max_lines=1),
|
| 271 |
+
gr.Slider(
|
| 272 |
+
minimum=0,
|
| 273 |
+
maximum=10000,
|
| 274 |
+
value=3000,
|
| 275 |
+
step=100,
|
| 276 |
+
label="Max Characters",
|
| 277 |
+
info="0 = no limit (full page), default 3000",
|
| 278 |
+
),
|
| 279 |
+
gr.Slider(
|
| 280 |
+
minimum=0,
|
| 281 |
+
maximum=100000,
|
| 282 |
+
value=0,
|
| 283 |
+
step=100,
|
| 284 |
+
label="Offset",
|
| 285 |
+
info="Character offset to start from (use next_cursor from previous call for pagination)",
|
| 286 |
+
),
|
| 287 |
+
gr.Textbox(
|
| 288 |
+
label="Strip Selectors",
|
| 289 |
+
placeholder=".header, .footer, nav, .sidebar",
|
| 290 |
+
value="",
|
| 291 |
+
max_lines=1,
|
| 292 |
+
info="CSS selectors to remove (comma-separated)",
|
| 293 |
+
),
|
| 294 |
+
gr.Radio(
|
| 295 |
+
label="Mode",
|
| 296 |
+
choices=["Markdown Mode", "HTML Mode", "URL Scraper"],
|
| 297 |
+
value="Markdown Mode",
|
| 298 |
+
info="Markdown cleans content, HTML returns raw response, URL Scraper lists links.",
|
| 299 |
+
),
|
| 300 |
+
],
|
| 301 |
+
outputs=gr.Markdown(label="Extracted Content"),
|
| 302 |
+
title="Web Fetch",
|
| 303 |
+
description=(
|
| 304 |
+
"<div style=\"text-align:center\">Convert any webpage to Markdown, inspect the raw HTML response, or "
|
| 305 |
+
"extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
|
| 306 |
+
),
|
| 307 |
+
api_description=TOOL_SUMMARY,
|
| 308 |
+
flagging_mode="never",
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
__all__ = [
|
| 313 |
+
"Web_Fetch",
|
| 314 |
+
"build_interface",
|
| 315 |
+
"_http_get_enhanced",
|
| 316 |
+
"_fullpage_markdown_from_soup",
|
| 317 |
+
]
|
Modules/Web_Search.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Annotated, List
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from ddgs import DDGS
|
| 8 |
+
|
| 9 |
+
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 10 |
+
from ._docstrings import autodoc
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Single source of truth for the LLM-facing tool description
|
| 14 |
+
TOOL_SUMMARY = (
|
| 15 |
+
"Run a DuckDuckGo-backed search across text, news, images, videos, or books. "
|
| 16 |
+
"Readable results include pagination hints and next_offset when more results are available; "
|
| 17 |
+
"Use in combination with `Web_Fetch` to navigate the web."
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
_SAFESEARCH_LEVEL = "off"
|
| 22 |
+
|
| 23 |
+
# Defaults and choices for newly added parameters
|
| 24 |
+
BACKEND_CHOICES = [
|
| 25 |
+
"auto",
|
| 26 |
+
"duckduckgo",
|
| 27 |
+
"bing",
|
| 28 |
+
"brave",
|
| 29 |
+
"yahoo",
|
| 30 |
+
"wikipedia",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
# Allowed backends per type (explicit selection set)
|
| 34 |
+
_ALLOWED_BACKENDS = {
|
| 35 |
+
"text": ["duckduckgo", "bing", "brave", "yahoo", "wikipedia"],
|
| 36 |
+
"news": ["duckduckgo", "bing", "yahoo"],
|
| 37 |
+
"images": ["duckduckgo"],
|
| 38 |
+
"videos": ["duckduckgo"],
|
| 39 |
+
"books": ["annasarchive"],
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Auto order per type (used when backend == "auto"); wikipedia excluded for text
|
| 43 |
+
_AUTO_ORDER = {
|
| 44 |
+
"text": ["duckduckgo", "bing", "brave", "yahoo"],
|
| 45 |
+
"news": ["duckduckgo", "bing", "yahoo"],
|
| 46 |
+
"images": ["duckduckgo"],
|
| 47 |
+
"videos": ["duckduckgo"],
|
| 48 |
+
"books": ["annasarchive"],
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Date filter choices: canonical values used by resolver
|
| 52 |
+
DATE_FILTER_CHOICES = ["any", "day", "week", "month", "year"]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _resolve_backend(search_type: str, backend_choice: str) -> str:
|
| 56 |
+
"""Resolve backend string for DDGS based on search type and user choice.
|
| 57 |
+
|
| 58 |
+
- If backend_choice is "auto", return a comma-separated fallback order for that type.
|
| 59 |
+
- If backend_choice is not supported by the type, fall back to the first allowed backend.
|
| 60 |
+
- Books endpoint uses only 'annasarchive'.
|
| 61 |
+
"""
|
| 62 |
+
stype = search_type if search_type in _ALLOWED_BACKENDS else "text"
|
| 63 |
+
allowed = _ALLOWED_BACKENDS[stype]
|
| 64 |
+
if backend_choice == "auto":
|
| 65 |
+
return ", ".join(_AUTO_ORDER[stype])
|
| 66 |
+
if stype == "books":
|
| 67 |
+
return "annasarchive"
|
| 68 |
+
# Validate backend against allowed set for this type
|
| 69 |
+
if backend_choice in allowed:
|
| 70 |
+
return backend_choice
|
| 71 |
+
# Fallback to first allowed backend
|
| 72 |
+
return allowed[0]
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _resolve_timelimit(date_filter: str, search_type: str) -> str | None:
|
| 76 |
+
"""Map UI date filter to DDGS timelimit code per endpoint.
|
| 77 |
+
|
| 78 |
+
Returns one of: None, 'd', 'w', 'm', 'y'. For news/videos (which support d/w/m),
|
| 79 |
+
selecting 'year' will coerce to 'm' to stay within supported range.
|
| 80 |
+
"""
|
| 81 |
+
normalized = (date_filter or "any").strip().lower()
|
| 82 |
+
if normalized in ("any", "none", ""):
|
| 83 |
+
return None
|
| 84 |
+
mapping = {
|
| 85 |
+
"day": "d",
|
| 86 |
+
"week": "w",
|
| 87 |
+
"month": "m",
|
| 88 |
+
"year": "y",
|
| 89 |
+
}
|
| 90 |
+
code = mapping.get(normalized)
|
| 91 |
+
if not code:
|
| 92 |
+
return None
|
| 93 |
+
if search_type in ("news", "videos") and code == "y":
|
| 94 |
+
return "m"
|
| 95 |
+
return code
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _extract_date_from_snippet(snippet: str) -> str:
|
| 99 |
+
if not snippet:
|
| 100 |
+
return ""
|
| 101 |
+
import re
|
| 102 |
+
|
| 103 |
+
date_patterns = [
|
| 104 |
+
r"\b(\d{4}[-/]\d{1,2}[-/]\d{1,2})\b",
|
| 105 |
+
r"\b([A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})\b",
|
| 106 |
+
r"\b(\d{1,2}\s+[A-Za-z]{3,9}\s+\d{4})\b",
|
| 107 |
+
r"\b(\d+\s+(?:day|week|month|year)s?\s+ago)\b",
|
| 108 |
+
r"(?:Published|Updated|Posted):\s*([^,\n]+?)(?:[,\n]|$)",
|
| 109 |
+
]
|
| 110 |
+
for pattern in date_patterns:
|
| 111 |
+
matches = re.findall(pattern, snippet, re.IGNORECASE)
|
| 112 |
+
if matches:
|
| 113 |
+
return matches[0].strip()
|
| 114 |
+
return ""
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _format_search_result(result: dict, search_type: str, index: int) -> List[str]:
|
| 118 |
+
lines: List[str] = []
|
| 119 |
+
if search_type == "text":
|
| 120 |
+
title = result.get("title", "").strip()
|
| 121 |
+
url = result.get("href", "").strip()
|
| 122 |
+
snippet = result.get("body", "").strip()
|
| 123 |
+
date = _extract_date_from_snippet(snippet)
|
| 124 |
+
lines.append(f"{index}. {title}")
|
| 125 |
+
lines.append(f" URL: {url}")
|
| 126 |
+
if snippet:
|
| 127 |
+
lines.append(f" Summary: {snippet}")
|
| 128 |
+
if date:
|
| 129 |
+
lines.append(f" Date: {date}")
|
| 130 |
+
elif search_type == "news":
|
| 131 |
+
title = result.get("title", "").strip()
|
| 132 |
+
url = result.get("url", "").strip()
|
| 133 |
+
body = result.get("body", "").strip()
|
| 134 |
+
date = result.get("date", "").strip()
|
| 135 |
+
source = result.get("source", "").strip()
|
| 136 |
+
lines.append(f"{index}. {title}")
|
| 137 |
+
lines.append(f" URL: {url}")
|
| 138 |
+
if source:
|
| 139 |
+
lines.append(f" Source: {source}")
|
| 140 |
+
if date:
|
| 141 |
+
lines.append(f" Date: {date}")
|
| 142 |
+
if body:
|
| 143 |
+
lines.append(f" Summary: {body}")
|
| 144 |
+
elif search_type == "images":
|
| 145 |
+
title = result.get("title", "").strip()
|
| 146 |
+
image_url = result.get("image", "").strip()
|
| 147 |
+
source_url = result.get("url", "").strip()
|
| 148 |
+
source = result.get("source", "").strip()
|
| 149 |
+
width = result.get("width", "")
|
| 150 |
+
height = result.get("height", "")
|
| 151 |
+
lines.append(f"{index}. {title}")
|
| 152 |
+
lines.append(f" Image: {image_url}")
|
| 153 |
+
lines.append(f" Source: {source_url}")
|
| 154 |
+
if source:
|
| 155 |
+
lines.append(f" Publisher: {source}")
|
| 156 |
+
if width and height:
|
| 157 |
+
lines.append(f" Dimensions: {width}x{height}")
|
| 158 |
+
elif search_type == "videos":
|
| 159 |
+
title = result.get("title", "").strip()
|
| 160 |
+
description = result.get("description", "").strip()
|
| 161 |
+
duration = result.get("duration", "").strip()
|
| 162 |
+
published = result.get("published", "").strip()
|
| 163 |
+
uploader = result.get("uploader", "").strip()
|
| 164 |
+
embed_url = result.get("embed_url", "").strip()
|
| 165 |
+
lines.append(f"{index}. {title}")
|
| 166 |
+
if embed_url:
|
| 167 |
+
lines.append(f" Video: {embed_url}")
|
| 168 |
+
if uploader:
|
| 169 |
+
lines.append(f" Uploader: {uploader}")
|
| 170 |
+
if duration:
|
| 171 |
+
lines.append(f" Duration: {duration}")
|
| 172 |
+
if published:
|
| 173 |
+
lines.append(f" Published: {published}")
|
| 174 |
+
if description:
|
| 175 |
+
lines.append(f" Description: {description}")
|
| 176 |
+
elif search_type == "books":
|
| 177 |
+
title = result.get("title", "").strip()
|
| 178 |
+
url = result.get("url", "").strip()
|
| 179 |
+
body = result.get("body", "").strip()
|
| 180 |
+
lines.append(f"{index}. {title}")
|
| 181 |
+
lines.append(f" URL: {url}")
|
| 182 |
+
if body:
|
| 183 |
+
lines.append(f" Description: {body}")
|
| 184 |
+
return lines
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
@autodoc(
|
| 188 |
+
summary=TOOL_SUMMARY,
|
| 189 |
+
)
|
| 190 |
+
def Web_Search(
|
| 191 |
+
query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
|
| 192 |
+
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 193 |
+
page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
|
| 194 |
+
offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
|
| 195 |
+
search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
|
| 196 |
+
backend: Annotated[str, "Search backend or ordered fallbacks. Use 'auto' for recommended order."] = "auto",
|
| 197 |
+
date_filter: Annotated[str, "Time filter: any, day, week, month, year."] = "any",
|
| 198 |
+
) -> str:
|
| 199 |
+
_log_call_start(
|
| 200 |
+
"Web_Search",
|
| 201 |
+
query=query,
|
| 202 |
+
max_results=max_results,
|
| 203 |
+
page=page,
|
| 204 |
+
search_type=search_type,
|
| 205 |
+
offset=offset,
|
| 206 |
+
backend=backend,
|
| 207 |
+
date_filter=date_filter,
|
| 208 |
+
)
|
| 209 |
+
if not query or not query.strip():
|
| 210 |
+
result = "No search query provided. Please enter a search term."
|
| 211 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 212 |
+
return result
|
| 213 |
+
max_results = max(1, min(20, max_results))
|
| 214 |
+
page = max(1, page)
|
| 215 |
+
offset = max(0, offset)
|
| 216 |
+
valid_types = ["text", "news", "images", "videos", "books"]
|
| 217 |
+
if search_type not in valid_types:
|
| 218 |
+
search_type = "text"
|
| 219 |
+
if offset > 0:
|
| 220 |
+
actual_offset = offset
|
| 221 |
+
calculated_page = (offset // max_results) + 1
|
| 222 |
+
else:
|
| 223 |
+
actual_offset = (page - 1) * max_results
|
| 224 |
+
calculated_page = page
|
| 225 |
+
total_needed = actual_offset + max_results
|
| 226 |
+
used_fallback = False
|
| 227 |
+
original_search_type = search_type
|
| 228 |
+
# Prepare cross-cutting parameters
|
| 229 |
+
resolved_backend = _resolve_backend(search_type, (backend or "auto").lower())
|
| 230 |
+
timelimit = _resolve_timelimit(date_filter, search_type)
|
| 231 |
+
|
| 232 |
+
def _perform_search(stype: str) -> list[dict]:
|
| 233 |
+
try:
|
| 234 |
+
_search_rate_limiter.acquire()
|
| 235 |
+
with DDGS() as ddgs:
|
| 236 |
+
if stype == "text":
|
| 237 |
+
user_backend_choice = (backend or "auto").lower()
|
| 238 |
+
if user_backend_choice == "auto":
|
| 239 |
+
# Custom auto: DDG first, then append other engines
|
| 240 |
+
results: list[dict] = []
|
| 241 |
+
seen: set[str] = set()
|
| 242 |
+
|
| 243 |
+
def add_unique(items: list[dict], key_field: str) -> None:
|
| 244 |
+
for it in items or []:
|
| 245 |
+
url = (it.get(key_field, "") or "").strip()
|
| 246 |
+
if url and url not in seen:
|
| 247 |
+
seen.add(url)
|
| 248 |
+
results.append(it)
|
| 249 |
+
|
| 250 |
+
# First: duckduckgo
|
| 251 |
+
try:
|
| 252 |
+
ddg_items = list(
|
| 253 |
+
ddgs.text(
|
| 254 |
+
query,
|
| 255 |
+
max_results=total_needed + 10,
|
| 256 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 257 |
+
timelimit=timelimit,
|
| 258 |
+
backend="duckduckgo",
|
| 259 |
+
)
|
| 260 |
+
)
|
| 261 |
+
except Exception:
|
| 262 |
+
ddg_items = []
|
| 263 |
+
add_unique(ddg_items, "href")
|
| 264 |
+
|
| 265 |
+
# Then: other engines appended (excluding duckduckgo)
|
| 266 |
+
for eng in [b for b in _AUTO_ORDER["text"] if b != "duckduckgo"]:
|
| 267 |
+
try:
|
| 268 |
+
extra = list(
|
| 269 |
+
ddgs.text(
|
| 270 |
+
query,
|
| 271 |
+
max_results=total_needed + 10,
|
| 272 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 273 |
+
timelimit=timelimit,
|
| 274 |
+
backend=eng,
|
| 275 |
+
)
|
| 276 |
+
)
|
| 277 |
+
except Exception:
|
| 278 |
+
extra = []
|
| 279 |
+
add_unique(extra, "href")
|
| 280 |
+
|
| 281 |
+
return results
|
| 282 |
+
else:
|
| 283 |
+
raw_gen = ddgs.text(
|
| 284 |
+
query,
|
| 285 |
+
max_results=total_needed + 10,
|
| 286 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 287 |
+
timelimit=timelimit,
|
| 288 |
+
backend=resolved_backend,
|
| 289 |
+
)
|
| 290 |
+
elif stype == "news":
|
| 291 |
+
user_backend_choice = (backend or "auto").lower()
|
| 292 |
+
if user_backend_choice == "auto":
|
| 293 |
+
# Custom auto: DDG first, then append other engines
|
| 294 |
+
results: list[dict] = []
|
| 295 |
+
seen: set[str] = set()
|
| 296 |
+
|
| 297 |
+
def add_unique(items: list[dict], key_field: str) -> None:
|
| 298 |
+
for it in items or []:
|
| 299 |
+
url = (it.get(key_field, "") or "").strip()
|
| 300 |
+
if url and url not in seen:
|
| 301 |
+
seen.add(url)
|
| 302 |
+
results.append(it)
|
| 303 |
+
|
| 304 |
+
# First: duckduckgo news
|
| 305 |
+
try:
|
| 306 |
+
ddg_news = list(
|
| 307 |
+
ddgs.news(
|
| 308 |
+
query,
|
| 309 |
+
max_results=total_needed + 10,
|
| 310 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 311 |
+
timelimit=timelimit,
|
| 312 |
+
backend="duckduckgo",
|
| 313 |
+
)
|
| 314 |
+
)
|
| 315 |
+
except Exception:
|
| 316 |
+
ddg_news = []
|
| 317 |
+
add_unique(ddg_news, "url")
|
| 318 |
+
|
| 319 |
+
# Then: other news engines appended
|
| 320 |
+
for eng in [b for b in _AUTO_ORDER["news"] if b != "duckduckgo"]:
|
| 321 |
+
try:
|
| 322 |
+
extra = list(
|
| 323 |
+
ddgs.news(
|
| 324 |
+
query,
|
| 325 |
+
max_results=total_needed + 10,
|
| 326 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 327 |
+
timelimit=timelimit,
|
| 328 |
+
backend=eng,
|
| 329 |
+
)
|
| 330 |
+
)
|
| 331 |
+
except Exception:
|
| 332 |
+
extra = []
|
| 333 |
+
add_unique(extra, "url")
|
| 334 |
+
|
| 335 |
+
return results
|
| 336 |
+
else:
|
| 337 |
+
raw_gen = ddgs.news(
|
| 338 |
+
query,
|
| 339 |
+
max_results=total_needed + 10,
|
| 340 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 341 |
+
timelimit=timelimit,
|
| 342 |
+
backend=_resolve_backend("news", (backend or "auto").lower()),
|
| 343 |
+
)
|
| 344 |
+
elif stype == "images":
|
| 345 |
+
raw_gen = ddgs.images(
|
| 346 |
+
query,
|
| 347 |
+
max_results=total_needed + 10,
|
| 348 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 349 |
+
timelimit=timelimit,
|
| 350 |
+
backend=_resolve_backend("images", (backend or "auto").lower()),
|
| 351 |
+
)
|
| 352 |
+
elif stype == "videos":
|
| 353 |
+
raw_gen = ddgs.videos(
|
| 354 |
+
query,
|
| 355 |
+
max_results=total_needed + 10,
|
| 356 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 357 |
+
timelimit=timelimit,
|
| 358 |
+
backend=_resolve_backend("videos", (backend or "auto").lower()),
|
| 359 |
+
)
|
| 360 |
+
else:
|
| 361 |
+
raw_gen = ddgs.books(
|
| 362 |
+
query,
|
| 363 |
+
max_results=total_needed + 10,
|
| 364 |
+
backend=_resolve_backend("books", (backend or "auto").lower()),
|
| 365 |
+
)
|
| 366 |
+
try:
|
| 367 |
+
return list(raw_gen)
|
| 368 |
+
except Exception as inner_exc:
|
| 369 |
+
if "no results" in str(inner_exc).lower() or "not found" in str(inner_exc).lower():
|
| 370 |
+
return []
|
| 371 |
+
raise inner_exc
|
| 372 |
+
except Exception as exc:
|
| 373 |
+
error_msg = f"Search failed: {str(exc)[:200]}"
|
| 374 |
+
lowered = str(exc).lower()
|
| 375 |
+
if "blocked" in lowered or "rate" in lowered:
|
| 376 |
+
error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
|
| 377 |
+
elif "timeout" in lowered:
|
| 378 |
+
error_msg = "Search timed out. Please try again with a simpler query."
|
| 379 |
+
elif "network" in lowered or "connection" in lowered:
|
| 380 |
+
error_msg = "Network connection error. Please check your internet connection and try again."
|
| 381 |
+
elif "no results" in lowered or "not found" in lowered:
|
| 382 |
+
return []
|
| 383 |
+
raise Exception(error_msg)
|
| 384 |
+
|
| 385 |
+
try:
|
| 386 |
+
raw = _perform_search(search_type)
|
| 387 |
+
except Exception as exc:
|
| 388 |
+
result = f"Error: {exc}"
|
| 389 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 390 |
+
return result
|
| 391 |
+
|
| 392 |
+
if not raw and search_type == "news":
|
| 393 |
+
try:
|
| 394 |
+
raw = _perform_search("text")
|
| 395 |
+
if raw:
|
| 396 |
+
used_fallback = True
|
| 397 |
+
search_type = "text"
|
| 398 |
+
except Exception:
|
| 399 |
+
pass
|
| 400 |
+
|
| 401 |
+
if not raw:
|
| 402 |
+
fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else ""
|
| 403 |
+
result = f"No {original_search_type} results found for query: {query}{fallback_note}"
|
| 404 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 405 |
+
return result
|
| 406 |
+
|
| 407 |
+
paginated_results = raw[actual_offset: actual_offset + max_results]
|
| 408 |
+
if not paginated_results:
|
| 409 |
+
if actual_offset >= len(raw):
|
| 410 |
+
result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning."
|
| 411 |
+
else:
|
| 412 |
+
result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number."
|
| 413 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 414 |
+
return result
|
| 415 |
+
|
| 416 |
+
total_available = len(raw)
|
| 417 |
+
start_num = actual_offset + 1
|
| 418 |
+
end_num = actual_offset + len(paginated_results)
|
| 419 |
+
next_offset = actual_offset + len(paginated_results)
|
| 420 |
+
search_label = original_search_type.title()
|
| 421 |
+
if used_fallback:
|
| 422 |
+
search_label += " → Text (Smart Fallback)"
|
| 423 |
+
|
| 424 |
+
now_dt = datetime.now().astimezone()
|
| 425 |
+
date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
|
| 426 |
+
if not date_str:
|
| 427 |
+
date_str = now_dt.isoformat()
|
| 428 |
+
|
| 429 |
+
pagination_info = f"Page {calculated_page}"
|
| 430 |
+
if offset > 0:
|
| 431 |
+
pagination_info = f"Offset {actual_offset} (≈ {pagination_info})"
|
| 432 |
+
lines = [f"Current Date: {date_str}", f"{search_label} search results for: {query}"]
|
| 433 |
+
if used_fallback:
|
| 434 |
+
lines.append("📍 Note: News search returned no results, automatically searched general web content instead")
|
| 435 |
+
lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
|
| 436 |
+
for i, result in enumerate(paginated_results, start_num):
|
| 437 |
+
result_lines = _format_search_result(result, search_type, i)
|
| 438 |
+
lines.extend(result_lines)
|
| 439 |
+
lines.append("")
|
| 440 |
+
if total_available > end_num:
|
| 441 |
+
lines.append("💡 More results available:")
|
| 442 |
+
lines.append(f" • Next page: page={calculated_page + 1}")
|
| 443 |
+
lines.append(f" • Next offset: offset={next_offset}")
|
| 444 |
+
lines.append(f" • Use offset={next_offset} to continue exactly from result {next_offset + 1}")
|
| 445 |
+
result = "\n".join(lines)
|
| 446 |
+
search_info = f"type={original_search_type}"
|
| 447 |
+
if used_fallback:
|
| 448 |
+
search_info += "→text"
|
| 449 |
+
_log_call_end("Web_Search", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}")
|
| 450 |
+
return result
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
def build_interface() -> gr.Interface:
|
| 454 |
+
return gr.Interface(
|
| 455 |
+
fn=Web_Search,
|
| 456 |
+
inputs=[
|
| 457 |
+
gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1, info="The search query"),
|
| 458 |
+
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results", info="Number of results to return (1–20)"),
|
| 459 |
+
gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
|
| 460 |
+
gr.Slider(
|
| 461 |
+
minimum=0,
|
| 462 |
+
maximum=1000,
|
| 463 |
+
value=0,
|
| 464 |
+
step=1,
|
| 465 |
+
label="Offset",
|
| 466 |
+
info="Result offset to start from (overrides page if > 0, use next_offset from previous search)",
|
| 467 |
+
),
|
| 468 |
+
gr.Radio(
|
| 469 |
+
label="Search Type",
|
| 470 |
+
choices=["text", "news", "images", "videos", "books"],
|
| 471 |
+
value="text",
|
| 472 |
+
info="Type of content to search for",
|
| 473 |
+
),
|
| 474 |
+
gr.Radio(
|
| 475 |
+
label="Backend",
|
| 476 |
+
choices=BACKEND_CHOICES,
|
| 477 |
+
value="auto",
|
| 478 |
+
info="Search engine backend or fallback order (auto applies recommended order)",
|
| 479 |
+
),
|
| 480 |
+
gr.Radio(
|
| 481 |
+
label="Date filter",
|
| 482 |
+
choices=DATE_FILTER_CHOICES,
|
| 483 |
+
value="any",
|
| 484 |
+
info="Limit results to: day, week, month, or year (varies by type)",
|
| 485 |
+
),
|
| 486 |
+
],
|
| 487 |
+
outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20),
|
| 488 |
+
title="Web Search",
|
| 489 |
+
description=(
|
| 490 |
+
"<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and flexible pagination. "
|
| 491 |
+
"Supports text, news, images, videos, and books. Features smart fallback for news searches and precise offset control.</div>"
|
| 492 |
+
),
|
| 493 |
+
api_description=TOOL_SUMMARY,
|
| 494 |
+
flagging_mode="never",
|
| 495 |
+
submit_btn="Search",
|
| 496 |
+
)
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
__all__ = ["Web_Search", "build_interface"]
|
Modules/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Gradio tool modules bundled for Nymbo-Tools."""
|
Modules/_docstrings.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
from typing import Any, Annotated, get_args, get_origin, get_type_hints
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _typename(tp: Any) -> str:
|
| 8 |
+
"""Return a readable type name from a type or annotation."""
|
| 9 |
+
try:
|
| 10 |
+
if hasattr(tp, "__name__"):
|
| 11 |
+
return tp.__name__ # e.g. int, str
|
| 12 |
+
if getattr(tp, "__module__", None) and getattr(tp, "__qualname__", None):
|
| 13 |
+
return f"{tp.__module__}.{tp.__qualname__}"
|
| 14 |
+
return str(tp).replace("typing.", "")
|
| 15 |
+
except Exception:
|
| 16 |
+
return str(tp)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _extract_base_and_meta(annotation: Any) -> tuple[Any, str | None]:
|
| 20 |
+
"""Given an annotation, return (base_type, first string metadata) if Annotated, else (annotation, None)."""
|
| 21 |
+
try:
|
| 22 |
+
if get_origin(annotation) is Annotated:
|
| 23 |
+
args = get_args(annotation)
|
| 24 |
+
base = args[0] if args else annotation
|
| 25 |
+
# Grab the first string metadata if present
|
| 26 |
+
for meta in args[1:]:
|
| 27 |
+
if isinstance(meta, str):
|
| 28 |
+
return base, meta
|
| 29 |
+
return base, None
|
| 30 |
+
return annotation, None
|
| 31 |
+
except Exception:
|
| 32 |
+
return annotation, None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def autodoc(summary: str | None = None, returns: str | None = None, *, force: bool = False):
|
| 36 |
+
"""
|
| 37 |
+
Decorator that auto-generates a concise Google-style docstring from a function's
|
| 38 |
+
type hints and Annotated metadata. Useful for Gradio MCP where docstrings are
|
| 39 |
+
used for tool descriptions and parameter docs.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
summary: Optional one-line summary for the function. If not provided,
|
| 43 |
+
will generate a simple sentence from the function name.
|
| 44 |
+
returns: Optional return value description. If not provided, only the
|
| 45 |
+
return type will be listed (if available).
|
| 46 |
+
force: When True, overwrite an existing docstring. Default False.
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
The original function with its __doc__ populated (unless skipped).
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def decorator(func):
|
| 53 |
+
# Skip if docstring already present and not forcing
|
| 54 |
+
if not force and func.__doc__ and func.__doc__.strip():
|
| 55 |
+
return func
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
# include_extras=True to retain Annotated metadata
|
| 59 |
+
hints = get_type_hints(func, include_extras=True, globalns=getattr(func, "__globals__", None))
|
| 60 |
+
except Exception:
|
| 61 |
+
hints = {}
|
| 62 |
+
|
| 63 |
+
sig = inspect.signature(func)
|
| 64 |
+
|
| 65 |
+
lines: list[str] = []
|
| 66 |
+
# Summary line
|
| 67 |
+
if summary and summary.strip():
|
| 68 |
+
lines.append(summary.strip())
|
| 69 |
+
else:
|
| 70 |
+
pretty = func.__name__.replace("_", " ").strip().capitalize()
|
| 71 |
+
if not pretty.endswith("."):
|
| 72 |
+
pretty += "."
|
| 73 |
+
lines.append(pretty)
|
| 74 |
+
|
| 75 |
+
# Args section
|
| 76 |
+
if sig.parameters:
|
| 77 |
+
lines.append("")
|
| 78 |
+
lines.append("Args:")
|
| 79 |
+
for name, param in sig.parameters.items():
|
| 80 |
+
if name == "self":
|
| 81 |
+
continue
|
| 82 |
+
annot = hints.get(name, param.annotation)
|
| 83 |
+
base, meta = _extract_base_and_meta(annot)
|
| 84 |
+
tname = _typename(base) if base is not inspect._empty else None
|
| 85 |
+
desc = meta or ""
|
| 86 |
+
if tname and tname != str(inspect._empty):
|
| 87 |
+
lines.append(f" {name} ({tname}): {desc}".rstrip())
|
| 88 |
+
else:
|
| 89 |
+
lines.append(f" {name}: {desc}".rstrip())
|
| 90 |
+
|
| 91 |
+
# Returns section
|
| 92 |
+
ret_hint = hints.get("return", sig.return_annotation)
|
| 93 |
+
if returns or (ret_hint and ret_hint is not inspect.Signature.empty):
|
| 94 |
+
lines.append("")
|
| 95 |
+
lines.append("Returns:")
|
| 96 |
+
if returns:
|
| 97 |
+
lines.append(f" {returns}")
|
| 98 |
+
else:
|
| 99 |
+
base, meta = _extract_base_and_meta(ret_hint)
|
| 100 |
+
rtype = _typename(base)
|
| 101 |
+
if meta:
|
| 102 |
+
lines.append(f" {rtype}: {meta}")
|
| 103 |
+
else:
|
| 104 |
+
lines.append(f" {rtype}")
|
| 105 |
+
|
| 106 |
+
func.__doc__ = "\n".join(lines).strip() + "\n"
|
| 107 |
+
return func
|
| 108 |
+
|
| 109 |
+
return decorator
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
__all__ = ["autodoc"]
|
Obsidian/demo.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hello, World
|
| 2 |
+
|
| 3 |
+
Clone this server locally and set your Obsidian vault's root path as an environment variable `OBSIDIAN_VAULT_ROOT` to access your own files.
|
README.md
CHANGED
|
@@ -1,12 +1,216 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 6.0.
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Nymbo Tools MCP
|
| 3 |
+
emoji: ⚙️
|
| 4 |
+
colorFrom: green
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 6.0.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
short_description: All-in-one hub of general purpose tools useful for any agent
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 15 |
+
|
| 16 |
+
## Nymbo-Tools MCP Server
|
| 17 |
+
|
| 18 |
+
All-in-one hub of general-purpose tools useful for any agent. Run it as a Gradio web app, or connect to it remotely as a Model Context Protocol (MCP) server to call its tools programmatically.
|
| 19 |
+
|
| 20 |
+
Live Space: https://huggingface.co/spaces/Nymbo/Tools
|
| 21 |
+
|
| 22 |
+
### What’s inside
|
| 23 |
+
|
| 24 |
+
- Web Fetch: Turn any webpage into clean Markdown with optional link-only scraping, CSS selector stripping, length limits, and pagination via cursor offset.
|
| 25 |
+
- Web Search: DuckDuckGo-backed search across text, news, images, videos, and books with readable, paginated output.
|
| 26 |
+
- Code Interpreter: Execute small Python snippets and capture stdout.
|
| 27 |
+
- Memory Manager: Lightweight JSON-based memory store with save/list/search/delete and tag filters.
|
| 28 |
+
- Generate Speech: Kokoro‑82M TTS with 54 voices and adjustable speed (CPU or CUDA if available).
|
| 29 |
+
- Generate Image: Text-to-image via Hugging Face serverless inference (choose model, steps, CFG, size, seed).
|
| 30 |
+
- Generate Video: Text-to-video via Hugging Face serverless inference (model, steps, guidance, size, fps, duration, seed).
|
| 31 |
+
- Deep Research: Multi-query web research pipeline (DDG search + parallel fetch + LLM report synthesis) with downloadable report.
|
| 32 |
+
- File System: Safe, sandboxed filesystem operations under a tool root.
|
| 33 |
+
- Shell Command: Run shell commands inside the same safe root as File System.
|
| 34 |
+
|
| 35 |
+
## Quick start
|
| 36 |
+
|
| 37 |
+
Run the following commands in sequence to run the server locally:
|
| 38 |
+
|
| 39 |
+
```shell
|
| 40 |
+
git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 41 |
+
cd Tools
|
| 42 |
+
python -m venv env
|
| 43 |
+
source env/bin/activate
|
| 44 |
+
pip install -r requirements.txt
|
| 45 |
+
python app.py
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
Defaults:
|
| 49 |
+
- The Gradio UI typically serves on http://127.0.0.1:7860
|
| 50 |
+
- The MCP endpoint is available at http://127.0.0.1:7860/gradio_api/mcp/
|
| 51 |
+
|
| 52 |
+
## Using it as an MCP server
|
| 53 |
+
|
| 54 |
+
Remote MCP (hosted):
|
| 55 |
+
- Base URL: https://mcp.nymbo.net/gradio_api/mcp/
|
| 56 |
+
- SSE endpoint (for clients that need it): https://mcp.nymbo.net/gradio_api/mcp/sse
|
| 57 |
+
|
| 58 |
+
Local MCP (when you run app.py):
|
| 59 |
+
- Base URL: http://127.0.0.1:7860/gradio_api/mcp/
|
| 60 |
+
- SSE endpoint: http://127.0.0.1:7860/gradio_api/mcp/sse
|
| 61 |
+
|
| 62 |
+
Example client config (JSON):
|
| 63 |
+
|
| 64 |
+
```json
|
| 65 |
+
{
|
| 66 |
+
"mcpServers": {
|
| 67 |
+
"nymbo-tools": {
|
| 68 |
+
"url": "https://mcp.nymbo.net/gradio_api/mcp/"
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
## Environment variables (optional but recommended)
|
| 75 |
+
|
| 76 |
+
- HF_READ_TOKEN: Enables Image Generation, Video Generation, and Deep Research (Hugging Face serverless inference). These tools stay visible to MCP clients but calls require a valid token to succeed.
|
| 77 |
+
- HF_TOKEN: Alternative token fallback used by some providers (also enables Deep Research/Video).
|
| 78 |
+
- NYMBO_TOOLS_ROOT: Overrides the File System/Shell working root. Defaults to Nymbo-Tools/Filesystem.
|
| 79 |
+
- UNSAFE_ALLOW_ABS_PATHS=1: Allow absolute paths in File System and Shell Command (off by default for safety).
|
| 80 |
+
|
| 81 |
+
Notes:
|
| 82 |
+
- Without a HF API key, you can still use Web Fetch, Web Search, Code Interpreter, Memory Manager, File System, Shell Command, and Generate Speech.
|
| 83 |
+
- Generate Speech requires the kokoro package and its dependencies; it works on CPU and uses CUDA if available. Doesn't require an API key because it's computed on the server itself.
|
| 84 |
+
|
| 85 |
+
## Persistence and privacy
|
| 86 |
+
|
| 87 |
+
- Memory Manager stores entries in `memories.json` at the Nymbo-Tools folder root when running locally.
|
| 88 |
+
- File System defaults to the `Filesystem/` directory under Nymbo-Tools.
|
| 89 |
+
- In the public demo Space, storage is ephemeral and visible to anyone using the Space; avoid personal or sensitive data.
|
| 90 |
+
|
| 91 |
+
## Tool reference (signatures and behavior)
|
| 92 |
+
|
| 93 |
+
Below are the MCP tool parameters summarized by inputs, outputs, and notable behaviors.
|
| 94 |
+
|
| 95 |
+
### Web_Fetch (Webpages, converted to Markdown)
|
| 96 |
+
Inputs:
|
| 97 |
+
- url (str): Absolute URL to fetch (must return HTML).
|
| 98 |
+
- max_chars (int, default 3000): 0 = full page; otherwise truncates with a next_cursor notice.
|
| 99 |
+
- strip_selectors (str): Comma-separated CSS selectors to remove (e.g., .header, .footer, nav).
|
| 100 |
+
- url_scraper (bool): If true, return only a list of links from the page.
|
| 101 |
+
- offset (int): Character offset for pagination; pass the previous next_cursor to continue.
|
| 102 |
+
|
| 103 |
+
Output: Markdown string. If truncated, includes a next_cursor to continue.
|
| 104 |
+
|
| 105 |
+
### Web_Search (DuckDuckGo backend)
|
| 106 |
+
Inputs:
|
| 107 |
+
- query (str): DuckDuckGo query (supports site:, quotes, OR).
|
| 108 |
+
- max_results (int 1–20, default 5)
|
| 109 |
+
- page (int, default 1) or offset (int) for precise continuation
|
| 110 |
+
- search_type (str): "text" | "news" | "images" | "videos" | "books"
|
| 111 |
+
|
| 112 |
+
Output: Readable text with pagination hints and next_offset.
|
| 113 |
+
|
| 114 |
+
### Code_Interpreter (Python)
|
| 115 |
+
Inputs:
|
| 116 |
+
- code (str): Python source; stdout is captured.
|
| 117 |
+
|
| 118 |
+
Output: Captured stdout or the exception text.
|
| 119 |
+
|
| 120 |
+
### Memory_Manager (Simple JSON store)
|
| 121 |
+
Inputs:
|
| 122 |
+
- action: "save" | "list" | "search" | "delete"
|
| 123 |
+
- text (save only), tags (save only)
|
| 124 |
+
- query (search only): supports tag:name terms and AND/OR
|
| 125 |
+
- limit (list/search): default 20
|
| 126 |
+
- memory_id (delete): full UUID or unique prefix
|
| 127 |
+
- include_tags (bool): include tags when listing/searching
|
| 128 |
+
|
| 129 |
+
Output: Confirmation string, listing, search matches, or structured error text.
|
| 130 |
+
|
| 131 |
+
### Generate_Speech (Kokoro-82M)
|
| 132 |
+
Inputs:
|
| 133 |
+
- text (str)
|
| 134 |
+
- speed (float 0.5–2.0, default 1.25)
|
| 135 |
+
- voice (str): One of 54 voices (e.g., af_heart, am_liam, bf_alice, zf_xiaoyi…)
|
| 136 |
+
|
| 137 |
+
Output: (sample_rate:int, waveform:np.ndarray) – rendered as downloadable WAV in the UI.
|
| 138 |
+
|
| 139 |
+
### Generate_Image (HF inference)
|
| 140 |
+
Requires: HF_READ_TOKEN
|
| 141 |
+
|
| 142 |
+
Inputs:
|
| 143 |
+
- prompt (str)
|
| 144 |
+
- model_id (str): e.g., black-forest-labs/FLUX.1-Krea-dev
|
| 145 |
+
- negative_prompt (str)
|
| 146 |
+
- steps (1–100), cfg_scale (1–20), sampler (UI label), seed (-1=random), width/height
|
| 147 |
+
|
| 148 |
+
Output: PIL.Image. In UI, displayed and downloadable. Errors guide you to provide a token or fix model id.
|
| 149 |
+
|
| 150 |
+
### Generate_Video (HF inference)
|
| 151 |
+
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 152 |
+
|
| 153 |
+
Inputs:
|
| 154 |
+
- prompt (str)
|
| 155 |
+
- model_id (str): default akhaliq/sora-2
|
| 156 |
+
- negative_prompt (str)
|
| 157 |
+
- steps (1–100), cfg_scale, seed, width/height, fps, duration (s)
|
| 158 |
+
|
| 159 |
+
Output: Temporary MP4 file path; UI shows a playable/downloadable video.
|
| 160 |
+
|
| 161 |
+
### Deep_Research (HF inference)
|
| 162 |
+
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 163 |
+
|
| 164 |
+
Inputs:
|
| 165 |
+
- summary (str): One or more sentences describing the research task.
|
| 166 |
+
- query1..query5 (str) with max1..max5 (1–50). Total requested results across queries are capped at 50.
|
| 167 |
+
|
| 168 |
+
Behavior:
|
| 169 |
+
- Parallel DDG searches → fetch pages in budget → filter candidate sources with an LLM → synthesize a long, well-structured Markdown report and list of sources.
|
| 170 |
+
|
| 171 |
+
Output: (report_md, fetched_links_text, report_file_path)
|
| 172 |
+
|
| 173 |
+
### File_System (safe root)
|
| 174 |
+
Root:
|
| 175 |
+
- Defaults to `Nymbo-Tools/Filesystem` (or NYMBO_TOOLS_ROOT). Absolute paths disabled unless UNSAFE_ALLOW_ABS_PATHS=1.
|
| 176 |
+
|
| 177 |
+
Actions:
|
| 178 |
+
- list, read, write, append, mkdir, move, copy, delete, info, help
|
| 179 |
+
|
| 180 |
+
Key fields:
|
| 181 |
+
- path, content (write/append), dest_path (move/copy), recursive, show_hidden, max_entries, offset, max_chars, create_dirs, overwrite
|
| 182 |
+
|
| 183 |
+
Output:
|
| 184 |
+
- Human-readable listings or JSON-like error strings with code/message/hint.
|
| 185 |
+
|
| 186 |
+
### Shell_Command (same safe root)
|
| 187 |
+
Inputs:
|
| 188 |
+
- command (str): Single-string shell command (pipelines supported by the host shell).
|
| 189 |
+
- workdir (str): Relative to the root.
|
| 190 |
+
- timeout (s)
|
| 191 |
+
|
| 192 |
+
Output:
|
| 193 |
+
- Combined header + STDOUT/STDERR. Absolute paths disabled by default. Shell is detected automatically (PowerShell on Windows when available; bash/sh on POSIX).
|
| 194 |
+
|
| 195 |
+
## Running on Hugging Face Spaces
|
| 196 |
+
|
| 197 |
+
1) Duplicate the Space at https://huggingface.co/spaces/Nymbo/Tools.
|
| 198 |
+
2) In Space Settings → Secrets, add HF_READ_TOKEN (and/or HF_TOKEN) for model access.
|
| 199 |
+
3) Both the UI and MCP clients will list every tool. Image/Video/Deep Research still need a valid token when invoked.
|
| 200 |
+
|
| 201 |
+
## Troubleshooting
|
| 202 |
+
|
| 203 |
+
- Image/Video/Deep Research calls fail immediately:
|
| 204 |
+
- Provide HF_READ_TOKEN (and optionally HF_TOKEN). Restart the app/Space.
|
| 205 |
+
- 401/403 when calling generation tools:
|
| 206 |
+
- Token missing/insufficient permissions. Ensure your token can read the chosen model.
|
| 207 |
+
- Kokoro not found:
|
| 208 |
+
- Install kokoro>=0.9.4. CPU works; CUDA used if available. Torch may be skipped on Apple Silicon by design.
|
| 209 |
+
- Windows PowerShell activation policy blocks venv activation:
|
| 210 |
+
- Run PowerShell as Admin and set a suitable execution policy for the current user (e.g., RemoteSigned), or manually run `python app.py` after installing dependencies.
|
| 211 |
+
- File System or Shell path errors:
|
| 212 |
+
- Paths are relative to the tool root. Set NYMBO_TOOLS_ROOT to customize. Set UNSAFE_ALLOW_ABS_PATHS=1 only if you fully trust the environment.
|
| 213 |
+
|
| 214 |
+
## License
|
| 215 |
+
|
| 216 |
+
Apache-2.0 (see Space metadata). If you duplicate the Space or use these tools, ensure your usage complies with the licenses and terms of the underlying models and providers.
|
app.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# Project by Nymbo
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class RateLimiter:
|
| 17 |
+
"""Best-effort in-process rate limiter for HTTP-heavy tools."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, requests_per_minute: int = 30) -> None:
|
| 20 |
+
self.requests_per_minute = requests_per_minute
|
| 21 |
+
self._requests: list[datetime] = []
|
| 22 |
+
self._lock = threading.Lock()
|
| 23 |
+
|
| 24 |
+
def acquire(self) -> None:
|
| 25 |
+
now = datetime.now()
|
| 26 |
+
with self._lock:
|
| 27 |
+
self._requests = [req for req in self._requests if now - req < timedelta(minutes=1)]
|
| 28 |
+
if len(self._requests) >= self.requests_per_minute:
|
| 29 |
+
wait_time = 60 - (now - self._requests[0]).total_seconds()
|
| 30 |
+
if wait_time > 0:
|
| 31 |
+
time.sleep(max(1, wait_time))
|
| 32 |
+
self._requests.append(now)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
_search_rate_limiter = RateLimiter(requests_per_minute=20)
|
| 36 |
+
_fetch_rate_limiter = RateLimiter(requests_per_minute=25)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _truncate_for_log(value: Any, limit: int = 500) -> str:
|
| 40 |
+
if not isinstance(value, str):
|
| 41 |
+
value = str(value)
|
| 42 |
+
if len(value) <= limit:
|
| 43 |
+
return value
|
| 44 |
+
return value[: limit - 1] + "…"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _serialize_input(val: Any) -> Any:
|
| 48 |
+
try:
|
| 49 |
+
if isinstance(val, (str, int, float, bool)) or val is None:
|
| 50 |
+
return val
|
| 51 |
+
if isinstance(val, (list, tuple)):
|
| 52 |
+
return [_serialize_input(v) for v in list(val)[:10]] + (["…"] if len(val) > 10 else [])
|
| 53 |
+
if isinstance(val, dict):
|
| 54 |
+
out: dict[str, Any] = {}
|
| 55 |
+
for i, (k, v) in enumerate(val.items()):
|
| 56 |
+
if i >= 12:
|
| 57 |
+
out["…"] = "…"
|
| 58 |
+
break
|
| 59 |
+
out[str(k)] = _serialize_input(v)
|
| 60 |
+
return out
|
| 61 |
+
return repr(val)[:120]
|
| 62 |
+
except Exception:
|
| 63 |
+
return "<unserializable>"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _log_call_start(func_name: str, **kwargs: Any) -> None:
|
| 67 |
+
try:
|
| 68 |
+
compact = {k: _serialize_input(v) for k, v in kwargs.items()}
|
| 69 |
+
print(f"[TOOL CALL] {func_name} inputs: {json.dumps(compact, ensure_ascii=False)[:800]}", flush=True)
|
| 70 |
+
except Exception as exc:
|
| 71 |
+
print(f"[TOOL CALL] {func_name} (failed to log inputs: {exc})", flush=True)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _log_call_end(func_name: str, output_desc: str) -> None:
|
| 75 |
+
try:
|
| 76 |
+
print(f"[TOOL RESULT] {func_name} output: {output_desc}", flush=True)
|
| 77 |
+
except Exception as exc:
|
| 78 |
+
print(f"[TOOL RESULT] {func_name} (failed to log output: {exc})", flush=True)
|
| 79 |
+
|
| 80 |
+
# Ensure Tools modules can import 'app' when this file is executed as a script
|
| 81 |
+
# (their code does `from app import ...`).
|
| 82 |
+
sys.modules.setdefault("app", sys.modules[__name__])
|
| 83 |
+
|
| 84 |
+
# Import per-tool interface builders from the Tools package
|
| 85 |
+
from Modules.Web_Fetch import build_interface as build_fetch_interface
|
| 86 |
+
from Modules.Web_Search import build_interface as build_search_interface
|
| 87 |
+
from Modules.Agent_Terminal import build_interface as build_agent_terminal_interface
|
| 88 |
+
from Modules.Code_Interpreter import build_interface as build_code_interface
|
| 89 |
+
from Modules.Memory_Manager import build_interface as build_memory_interface
|
| 90 |
+
from Modules.Generate_Speech import build_interface as build_speech_interface
|
| 91 |
+
from Modules.Generate_Image import build_interface as build_image_interface
|
| 92 |
+
from Modules.Generate_Video import build_interface as build_video_interface
|
| 93 |
+
from Modules.Deep_Research import build_interface as build_research_interface
|
| 94 |
+
from Modules.File_System import build_interface as build_fs_interface
|
| 95 |
+
from Modules.Obsidian_Vault import build_interface as build_obsidian_interface
|
| 96 |
+
from Modules.Shell_Command import build_interface as build_shell_interface
|
| 97 |
+
|
| 98 |
+
# Optional environment flags used to conditionally show API schemas (unchanged behavior)
|
| 99 |
+
HF_IMAGE_TOKEN = bool(os.getenv("HF_READ_TOKEN"))
|
| 100 |
+
HF_VIDEO_TOKEN = bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN"))
|
| 101 |
+
HF_TEXTGEN_TOKEN = bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN"))
|
| 102 |
+
|
| 103 |
+
# Load CSS from external file
|
| 104 |
+
_css_path = os.path.join(os.path.dirname(__file__), "styles.css")
|
| 105 |
+
with open(_css_path, "r", encoding="utf-8") as _css_file:
|
| 106 |
+
CSS_STYLES = _css_file.read()
|
| 107 |
+
|
| 108 |
+
# Build each tab interface using modular builders
|
| 109 |
+
fetch_interface = build_fetch_interface()
|
| 110 |
+
web_search_interface = build_search_interface()
|
| 111 |
+
agent_terminal_interface = build_agent_terminal_interface()
|
| 112 |
+
code_interface = build_code_interface()
|
| 113 |
+
memory_interface = build_memory_interface()
|
| 114 |
+
kokoro_interface = build_speech_interface()
|
| 115 |
+
image_generation_interface = build_image_interface()
|
| 116 |
+
video_generation_interface = build_video_interface()
|
| 117 |
+
deep_research_interface = build_research_interface()
|
| 118 |
+
fs_interface = build_fs_interface()
|
| 119 |
+
shell_interface = build_shell_interface()
|
| 120 |
+
obsidian_interface = build_obsidian_interface()
|
| 121 |
+
|
| 122 |
+
_interfaces = [
|
| 123 |
+
agent_terminal_interface,
|
| 124 |
+
fetch_interface,
|
| 125 |
+
web_search_interface,
|
| 126 |
+
code_interface,
|
| 127 |
+
shell_interface,
|
| 128 |
+
fs_interface,
|
| 129 |
+
obsidian_interface,
|
| 130 |
+
memory_interface,
|
| 131 |
+
kokoro_interface,
|
| 132 |
+
image_generation_interface,
|
| 133 |
+
video_generation_interface,
|
| 134 |
+
deep_research_interface,
|
| 135 |
+
]
|
| 136 |
+
_tab_names = [
|
| 137 |
+
"Agent Terminal",
|
| 138 |
+
"Web Fetch",
|
| 139 |
+
"Web Search",
|
| 140 |
+
"Code Interpreter",
|
| 141 |
+
"Shell Command",
|
| 142 |
+
"File System",
|
| 143 |
+
"Obsidian Vault",
|
| 144 |
+
"Memory Manager",
|
| 145 |
+
"Generate Speech",
|
| 146 |
+
"Generate Image",
|
| 147 |
+
"Generate Video",
|
| 148 |
+
"Deep Research",
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
with gr.Blocks(title="Nymbo/Tools MCP") as demo:
|
| 152 |
+
|
| 153 |
+
with gr.Sidebar(width=300, elem_classes="app-sidebar"):
|
| 154 |
+
gr.Markdown("## Nymbo/Tools MCP\n<p style='font-size: 0.7rem; opacity: 0.85; margin-top: 2px;'>General purpose tools useful for any agent.</p>\n<code style='font-size: 0.7rem; word-break: break-all;'>https://nymbo.net/gradio_api/mcp/</code>")
|
| 155 |
+
|
| 156 |
+
with gr.Accordion("Information", open=False):
|
| 157 |
+
gr.HTML(
|
| 158 |
+
"""
|
| 159 |
+
<div class="info-accordion">
|
| 160 |
+
<div class="info-grid" style="grid-template-columns: 1fr;">
|
| 161 |
+
<section class="info-card">
|
| 162 |
+
<div class="info-card__body">
|
| 163 |
+
<h3>Connecting from an MCP Client</h3>
|
| 164 |
+
<p>
|
| 165 |
+
This Space also runs as a Model Context Protocol (MCP) server. Point your client to:
|
| 166 |
+
<br/>
|
| 167 |
+
<code>https://nymbo.net/gradio_api/mcp/</code>
|
| 168 |
+
</p>
|
| 169 |
+
<p>Example client configuration:</p>
|
| 170 |
+
<pre><code class="language-json">{
|
| 171 |
+
"mcpServers": {
|
| 172 |
+
"nymbo-tools": {
|
| 173 |
+
"url": "https://nymbo.net/gradio_api/mcp/"
|
| 174 |
+
}
|
| 175 |
+
}
|
| 176 |
+
}</code></pre>
|
| 177 |
+
<p>Run the following commands in sequence to run the server locally:</p>
|
| 178 |
+
<pre><code>git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 179 |
+
cd Tools
|
| 180 |
+
python -m venv env
|
| 181 |
+
source env/bin/activate
|
| 182 |
+
pip install -r requirements.txt
|
| 183 |
+
python app.py</code></pre>
|
| 184 |
+
</div>
|
| 185 |
+
</section>
|
| 186 |
+
|
| 187 |
+
<section class="info-card">
|
| 188 |
+
<div class="info-card__body">
|
| 189 |
+
<h3>Enable Image Gen, Video Gen, and Deep Research</h3>
|
| 190 |
+
<p>
|
| 191 |
+
The <code>Generate_Image</code>, <code>Generate_Video</code>, and <code>Deep_Research</code> tools require a
|
| 192 |
+
<code>HF_READ_TOKEN</code> set as a secret or environment variable.
|
| 193 |
+
</p>
|
| 194 |
+
<ul class="info-list">
|
| 195 |
+
<li>Duplicate this Space and add a HF token with model read access.</li>
|
| 196 |
+
<li>Or run locally with <code>HF_READ_TOKEN</code> in your environment.</li>
|
| 197 |
+
</ul>
|
| 198 |
+
<div class="info-hint">
|
| 199 |
+
MCP clients can see these tools even without tokens, but calls will fail until a valid token is provided.
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
</section>
|
| 203 |
+
|
| 204 |
+
<section class="info-card">
|
| 205 |
+
<div class="info-card__body">
|
| 206 |
+
<h3>Persistent Memories and Files</h3>
|
| 207 |
+
<p>
|
| 208 |
+
In this public demo, memories and files created with the <code>Memory_Manager</code> and <code>File_System</code> are stored in the Space's running container and are cleared when the Space restarts. Content is visible to everyone—avoid personal data.
|
| 209 |
+
</p>
|
| 210 |
+
<p>
|
| 211 |
+
When running locally, memories are saved to <code>memories.json</code> at the repo root for privacy, and files are saved to the <code>Tools/Filesystem</code> directory on disk.
|
| 212 |
+
</p>
|
| 213 |
+
</div>
|
| 214 |
+
</section>
|
| 215 |
+
|
| 216 |
+
<section class="info-card">
|
| 217 |
+
<div class="info-card__body">
|
| 218 |
+
<h3>Tool Notes & Kokoro Voice Legend</h3>
|
| 219 |
+
<p><strong>No authentication required for:</strong></p>
|
| 220 |
+
<ul class="info-list">
|
| 221 |
+
<li><code>Web_Fetch</code></li>
|
| 222 |
+
<li><code>Web_Search</code></li>
|
| 223 |
+
<li><code>Agent_Terminal</code></li>
|
| 224 |
+
<li><code>Code_Interpreter</code></li>
|
| 225 |
+
<li><code>Memory_Manager</code></li>
|
| 226 |
+
<li><code>Generate_Speech</code></li>
|
| 227 |
+
<li><code>File_System</code></li>
|
| 228 |
+
<li><code>Shell_Command</code></li>
|
| 229 |
+
</ul>
|
| 230 |
+
<p><strong>Kokoro voice prefixes</strong></p>
|
| 231 |
+
<table style="width:100%; border-collapse:collapse; font-size:0.9em; margin-top:8px;">
|
| 232 |
+
<thead>
|
| 233 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.15);">
|
| 234 |
+
<th style="padding:6px 8px; text-align:left;">Accent</th>
|
| 235 |
+
<th style="padding:6px 8px; text-align:center;">Female</th>
|
| 236 |
+
<th style="padding:6px 8px; text-align:center;">Male</th>
|
| 237 |
+
</tr>
|
| 238 |
+
</thead>
|
| 239 |
+
<tbody>
|
| 240 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 241 |
+
<td style="padding:6px 8px; font-weight:600;">American</td>
|
| 242 |
+
<td style="padding:6px 8px; text-align:center;"><code>af</code></td>
|
| 243 |
+
<td style="padding:6px 8px; text-align:center;"><code>am</code></td>
|
| 244 |
+
</tr>
|
| 245 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 246 |
+
<td style="padding:6px 8px; font-weight:600;">British</td>
|
| 247 |
+
<td style="padding:6px 8px; text-align:center;"><code>bf</code></td>
|
| 248 |
+
<td style="padding:6px 8px; text-align:center;"><code>bm</code></td>
|
| 249 |
+
</tr>
|
| 250 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 251 |
+
<td style="padding:6px 8px; font-weight:600;">European</td>
|
| 252 |
+
<td style="padding:6px 8px; text-align:center;"><code>ef</code></td>
|
| 253 |
+
<td style="padding:6px 8px; text-align:center;"><code>em</code></td>
|
| 254 |
+
</tr>
|
| 255 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 256 |
+
<td style="padding:6px 8px; font-weight:600;">French</td>
|
| 257 |
+
<td style="padding:6px 8px; text-align:center;"><code>ff</code></td>
|
| 258 |
+
<td style="padding:6px 8px; text-align:center;">—</td>
|
| 259 |
+
</tr>
|
| 260 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 261 |
+
<td style="padding:6px 8px; font-weight:600;">Hindi</td>
|
| 262 |
+
<td style="padding:6px 8px; text-align:center;"><code>hf</code></td>
|
| 263 |
+
<td style="padding:6px 8px; text-align:center;"><code>hm</code></td>
|
| 264 |
+
</tr>
|
| 265 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 266 |
+
<td style="padding:6px 8px; font-weight:600;">Italian</td>
|
| 267 |
+
<td style="padding:6px 8px; text-align:center;"><code>if</code></td>
|
| 268 |
+
<td style="padding:6px 8px; text-align:center;"><code>im</code></td>
|
| 269 |
+
</tr>
|
| 270 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 271 |
+
<td style="padding:6px 8px; font-weight:600;">Japanese</td>
|
| 272 |
+
<td style="padding:6px 8px; text-align:center;"><code>jf</code></td>
|
| 273 |
+
<td style="padding:6px 8px; text-align:center;"><code>jm</code></td>
|
| 274 |
+
</tr>
|
| 275 |
+
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 276 |
+
<td style="padding:6px 8px; font-weight:600;">Portuguese</td>
|
| 277 |
+
<td style="padding:6px 8px; text-align:center;"><code>pf</code></td>
|
| 278 |
+
<td style="padding:6px 8px; text-align:center;"><code>pm</code></td>
|
| 279 |
+
</tr>
|
| 280 |
+
<tr>
|
| 281 |
+
<td style="padding:6px 8px; font-weight:600;">Chinese</td>
|
| 282 |
+
<td style="padding:6px 8px; text-align:center;"><code>zf</code></td>
|
| 283 |
+
<td style="padding:6px 8px; text-align:center;"><code>zm</code></td>
|
| 284 |
+
</tr>
|
| 285 |
+
</tbody>
|
| 286 |
+
</table>
|
| 287 |
+
</div>
|
| 288 |
+
</section>
|
| 289 |
+
</div>
|
| 290 |
+
</div>
|
| 291 |
+
"""
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
gr.Markdown("### Tools")
|
| 295 |
+
tool_selector = gr.Radio(
|
| 296 |
+
choices=_tab_names,
|
| 297 |
+
value=_tab_names[0],
|
| 298 |
+
label="Select Tool",
|
| 299 |
+
show_label=False,
|
| 300 |
+
container=False,
|
| 301 |
+
elem_classes="sidebar-nav"
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
with gr.Tabs(elem_classes="hidden-tabs", selected=_tab_names[0]) as tool_tabs:
|
| 305 |
+
for name, interface in zip(_tab_names, _interfaces):
|
| 306 |
+
with gr.TabItem(label=name, id=name, elem_id=f"tab-{name}"):
|
| 307 |
+
interface.render()
|
| 308 |
+
|
| 309 |
+
# Use JavaScript to click the hidden tab button when the radio selection changes
|
| 310 |
+
tool_selector.change(
|
| 311 |
+
fn=None,
|
| 312 |
+
inputs=tool_selector,
|
| 313 |
+
outputs=None,
|
| 314 |
+
js="(selected_tool) => { const buttons = document.querySelectorAll('.hidden-tabs button'); buttons.forEach(btn => { if (btn.innerText.trim() === selected_tool) { btn.click(); } }); }"
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
if __name__ == "__main__":
|
| 318 |
+
demo.launch(mcp_server=True, theme="Nymbo/Nymbo_Theme", css=CSS_STYLES, ssr_mode=False)
|
memories.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "c8e3965d-270c-4baf-836f-33c6ed57f527",
|
| 4 |
+
"text": "The user's personal website is driven by Markdown and Vue, hosted on Vercel.",
|
| 5 |
+
"timestamp": "2025-09-06 02:21:17",
|
| 6 |
+
"tags": "website,markdown,vue,vercel"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"id": "17806073-cb86-472f-9b39-c1aaaf3ac058",
|
| 10 |
+
"text": "The user lives in New York City.",
|
| 11 |
+
"timestamp": "2025-09-06 17:07:27",
|
| 12 |
+
"tags": "location,address"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"id": "86e9f249-b43d-4aaa-bca0-b55fcb0c03be",
|
| 16 |
+
"text": "The user has a pet Russian tortoise who is 8 years old.",
|
| 17 |
+
"timestamp": "2025-09-06 02:20:59",
|
| 18 |
+
"tags": "pet,tortoise,animals"
|
| 19 |
+
}
|
| 20 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]==6.0.0
|
| 2 |
+
requests
|
| 3 |
+
beautifulsoup4
|
| 4 |
+
lxml
|
| 5 |
+
readability-lxml
|
| 6 |
+
ddgs
|
| 7 |
+
kokoro>=0.9.4
|
| 8 |
+
numpy
|
| 9 |
+
torch; platform_system != "Darwin" or platform_machine != "arm64"
|
| 10 |
+
Pillow
|
| 11 |
+
huggingface_hub>=0.30.0
|
| 12 |
+
markdownify
|
| 13 |
+
scipy
|
| 14 |
+
onnxruntime
|
styles.css
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Style only the top-level app title to avoid affecting headings elsewhere */
|
| 2 |
+
.app-title {
|
| 3 |
+
text-align: center;
|
| 4 |
+
/* Ensure main title appears first, then our two subtitle lines */
|
| 5 |
+
display: grid;
|
| 6 |
+
justify-items: center;
|
| 7 |
+
}
|
| 8 |
+
.app-title::after {
|
| 9 |
+
grid-row: 2;
|
| 10 |
+
content: "General purpose tools useful for any agent.";
|
| 11 |
+
display: block;
|
| 12 |
+
font-size: 1rem;
|
| 13 |
+
font-weight: 400;
|
| 14 |
+
opacity: 0.9;
|
| 15 |
+
margin-top: 2px;
|
| 16 |
+
white-space: pre-wrap;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/* Sidebar Container */
|
| 20 |
+
.app-sidebar {
|
| 21 |
+
background: var(--body-background-fill) !important;
|
| 22 |
+
border-right: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 23 |
+
}
|
| 24 |
+
@media (prefers-color-scheme: light) {
|
| 25 |
+
.app-sidebar {
|
| 26 |
+
border-right: 1px solid rgba(0, 0, 0, 0.08) !important;
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/* Historical safeguard: if any h1 appears inside tabs, don't attach pseudo content */
|
| 31 |
+
.gradio-container [role="tabpanel"] h1::before,
|
| 32 |
+
.gradio-container [role="tabpanel"] h1::after {
|
| 33 |
+
content: none !important;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
/* Information accordion - modern info cards */
|
| 37 |
+
.info-accordion {
|
| 38 |
+
margin: 8px 0 2px;
|
| 39 |
+
}
|
| 40 |
+
.info-grid {
|
| 41 |
+
display: grid;
|
| 42 |
+
gap: 12px;
|
| 43 |
+
/* Force a 2x2 layout on medium+ screens */
|
| 44 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 45 |
+
align-items: stretch;
|
| 46 |
+
}
|
| 47 |
+
/* On narrow screens, stack into a single column */
|
| 48 |
+
@media (max-width: 800px) {
|
| 49 |
+
.info-grid {
|
| 50 |
+
grid-template-columns: 1fr;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
.info-card {
|
| 54 |
+
display: flex;
|
| 55 |
+
gap: 14px;
|
| 56 |
+
padding: 14px 16px;
|
| 57 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 58 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03));
|
| 59 |
+
border-radius: 12px;
|
| 60 |
+
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
|
| 61 |
+
position: relative;
|
| 62 |
+
overflow: hidden;
|
| 63 |
+
-webkit-backdrop-filter: blur(2px);
|
| 64 |
+
backdrop-filter: blur(2px);
|
| 65 |
+
}
|
| 66 |
+
.info-card::before {
|
| 67 |
+
content: "";
|
| 68 |
+
position: absolute;
|
| 69 |
+
inset: 0;
|
| 70 |
+
border-radius: 12px;
|
| 71 |
+
pointer-events: none;
|
| 72 |
+
background: linear-gradient(90deg, rgba(99,102,241,0.06), rgba(59,130,246,0.05));
|
| 73 |
+
}
|
| 74 |
+
.info-card__icon {
|
| 75 |
+
font-size: 24px;
|
| 76 |
+
flex: 0 0 28px;
|
| 77 |
+
line-height: 1;
|
| 78 |
+
filter: saturate(1.1);
|
| 79 |
+
}
|
| 80 |
+
.info-card__body {
|
| 81 |
+
min-width: 0;
|
| 82 |
+
}
|
| 83 |
+
.info-card__body h3 {
|
| 84 |
+
margin: 0 0 6px;
|
| 85 |
+
font-size: 1.05rem;
|
| 86 |
+
}
|
| 87 |
+
.info-card__body p {
|
| 88 |
+
margin: 6px 0;
|
| 89 |
+
opacity: 0.95;
|
| 90 |
+
}
|
| 91 |
+
/* Readable code blocks inside info cards */
|
| 92 |
+
.info-card pre {
|
| 93 |
+
margin: 8px 0;
|
| 94 |
+
padding: 10px 12px;
|
| 95 |
+
background: rgba(20, 20, 30, 0.55);
|
| 96 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 97 |
+
border-radius: 10px;
|
| 98 |
+
overflow-x: auto;
|
| 99 |
+
white-space: pre;
|
| 100 |
+
}
|
| 101 |
+
.info-card code {
|
| 102 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
| 103 |
+
font-size: 0.95em;
|
| 104 |
+
}
|
| 105 |
+
.info-card pre code {
|
| 106 |
+
display: block;
|
| 107 |
+
}
|
| 108 |
+
.info-card p {
|
| 109 |
+
word-wrap: break-word;
|
| 110 |
+
overflow-wrap: break-word;
|
| 111 |
+
}
|
| 112 |
+
.info-card p code {
|
| 113 |
+
word-break: break-all;
|
| 114 |
+
}
|
| 115 |
+
.info-list {
|
| 116 |
+
margin: 6px 0 0 18px;
|
| 117 |
+
padding: 0;
|
| 118 |
+
}
|
| 119 |
+
.info-hint {
|
| 120 |
+
margin-top: 8px;
|
| 121 |
+
font-size: 0.9em;
|
| 122 |
+
opacity: 0.9;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/* Light theme adjustments */
|
| 126 |
+
@media (prefers-color-scheme: light) {
|
| 127 |
+
.info-card {
|
| 128 |
+
border-color: rgba(0, 0, 0, 0.08);
|
| 129 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.9));
|
| 130 |
+
}
|
| 131 |
+
.info-card::before {
|
| 132 |
+
background: linear-gradient(90deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06));
|
| 133 |
+
}
|
| 134 |
+
.info-card pre {
|
| 135 |
+
background: rgba(245, 246, 250, 0.95);
|
| 136 |
+
border-color: rgba(0, 0, 0, 0.08);
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* Sidebar Navigation - styled like the previous tabs */
|
| 141 |
+
.sidebar-nav {
|
| 142 |
+
background: transparent !important;
|
| 143 |
+
border: none !important;
|
| 144 |
+
padding: 0 !important;
|
| 145 |
+
}
|
| 146 |
+
.sidebar-nav .form {
|
| 147 |
+
gap: 8px !important;
|
| 148 |
+
display: flex !important;
|
| 149 |
+
flex-direction: column !important;
|
| 150 |
+
border: none !important;
|
| 151 |
+
background: transparent !important;
|
| 152 |
+
}
|
| 153 |
+
.sidebar-nav label {
|
| 154 |
+
display: flex !important;
|
| 155 |
+
align-items: center !important;
|
| 156 |
+
padding: 10px 12px !important;
|
| 157 |
+
border-radius: 10px !important;
|
| 158 |
+
border: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 159 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03)) !important;
|
| 160 |
+
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 161 |
+
cursor: pointer !important;
|
| 162 |
+
margin-bottom: 0 !important;
|
| 163 |
+
width: 100% !important;
|
| 164 |
+
justify-content: flex-start !important;
|
| 165 |
+
text-align: left !important;
|
| 166 |
+
}
|
| 167 |
+
.sidebar-nav label:hover {
|
| 168 |
+
border-color: rgba(99,102,241,0.28) !important;
|
| 169 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.10), rgba(59,130,246,0.08)) !important;
|
| 170 |
+
}
|
| 171 |
+
/* Selected state - Gradio adds 'selected' class to the label in some versions, or we check input:checked */
|
| 172 |
+
.sidebar-nav label.selected {
|
| 173 |
+
border-color: rgba(99,102,241,0.35) !important;
|
| 174 |
+
box-shadow: inset 0 0 0 1px rgba(99,102,241,0.25), 0 1px 2px rgba(0,0,0,0.25) !important;
|
| 175 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.18), rgba(59,130,246,0.14)) !important;
|
| 176 |
+
color: rgba(255, 255, 255, 0.95) !important;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
/* Light theme adjustments for sidebar */
|
| 180 |
+
@media (prefers-color-scheme: light) {
|
| 181 |
+
.sidebar-nav label {
|
| 182 |
+
border-color: rgba(0, 0, 0, 0.08) !important;
|
| 183 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.90)) !important;
|
| 184 |
+
color: rgba(0, 0, 0, 0.85) !important;
|
| 185 |
+
}
|
| 186 |
+
.sidebar-nav label:hover {
|
| 187 |
+
border-color: rgba(99,102,241,0.25) !important;
|
| 188 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06)) !important;
|
| 189 |
+
}
|
| 190 |
+
.sidebar-nav label.selected {
|
| 191 |
+
border-color: rgba(99,102,241,0.35) !important;
|
| 192 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.16), rgba(59,130,246,0.12)) !important;
|
| 193 |
+
color: rgba(0, 0, 0, 0.85) !important;
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/* Hide scrollbars/arrows that can appear on the description block in some browsers */
|
| 198 |
+
/* stylelint-disable compat-api/css */
|
| 199 |
+
article.prose, .prose, .gr-prose {
|
| 200 |
+
overflow: visible !important;
|
| 201 |
+
max-height: none !important;
|
| 202 |
+
-ms-overflow-style: none !important; /* IE/Edge */
|
| 203 |
+
scrollbar-width: none !important; /* Firefox */
|
| 204 |
+
}
|
| 205 |
+
/* stylelint-enable compat-api/css */
|
| 206 |
+
article.prose::-webkit-scrollbar,
|
| 207 |
+
.prose::-webkit-scrollbar,
|
| 208 |
+
.gr-prose::-webkit-scrollbar {
|
| 209 |
+
display: none !important; /* Chrome/Safari */
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
/* Fix for white background on single-line inputs in dark mode */
|
| 213 |
+
.gradio-container input[type="text"],
|
| 214 |
+
.gradio-container input[type="password"],
|
| 215 |
+
.gradio-container input[type="number"],
|
| 216 |
+
.gradio-container input[type="email"] {
|
| 217 |
+
background-color: var(--input-background-fill) !important;
|
| 218 |
+
color: var(--body-text-color) !important;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
/* Custom glossy purple styling for primary action buttons */
|
| 222 |
+
.gradio-container button.primary {
|
| 223 |
+
border: 1px solid rgba(99, 102, 241, 0.35) !important;
|
| 224 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.25), rgba(59, 130, 246, 0.20)) !important;
|
| 225 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.12), 0 2px 4px rgba(0, 0, 0, 0.15) !important;
|
| 226 |
+
color: rgba(255, 255, 255, 0.95) !important;
|
| 227 |
+
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 228 |
+
}
|
| 229 |
+
.gradio-container button.primary:hover {
|
| 230 |
+
border-color: rgba(99, 102, 241, 0.5) !important;
|
| 231 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.35), rgba(59, 130, 246, 0.28)) !important;
|
| 232 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 3px 6px rgba(0, 0, 0, 0.2) !important;
|
| 233 |
+
}
|
| 234 |
+
.gradio-container button.primary:active {
|
| 235 |
+
transform: scale(0.98) !important;
|
| 236 |
+
box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.2), 0 1px 2px rgba(0, 0, 0, 0.1) !important;
|
| 237 |
+
}
|
| 238 |
+
@media (prefers-color-scheme: light) {
|
| 239 |
+
.gradio-container button.primary {
|
| 240 |
+
border-color: rgba(99, 102, 241, 0.4) !important;
|
| 241 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.85), rgba(79, 70, 229, 0.75)) !important;
|
| 242 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.25), 0 2px 4px rgba(0, 0, 0, 0.12) !important;
|
| 243 |
+
color: rgba(255, 255, 255, 0.98) !important;
|
| 244 |
+
}
|
| 245 |
+
.gradio-container button.primary:hover {
|
| 246 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.95), rgba(79, 70, 229, 0.85)) !important;
|
| 247 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.3), 0 3px 6px rgba(0, 0, 0, 0.15) !important;
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
/* Hide the actual tabs since we use the sidebar to control them */
|
| 252 |
+
.hidden-tabs .tab-nav,
|
| 253 |
+
.hidden-tabs [role="tablist"] {
|
| 254 |
+
display: none !important;
|
| 255 |
+
}
|
| 256 |
+
/* Hide the entire first row of the tabs container (contains tab buttons + overflow) */
|
| 257 |
+
.hidden-tabs > div:first-child {
|
| 258 |
+
display: none !important;
|
| 259 |
+
}
|
| 260 |
+
/* Ensure audio component buttons remain visible - they're inside tab panels, not the first row */
|
| 261 |
+
.hidden-tabs [role="tabpanel"] button {
|
| 262 |
+
display: inline-flex !important;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
/* Custom scrollbar styling - Progressive enhancement, falls back to default scrollbars */
|
| 266 |
+
/* stylelint-disable compat-api/css */
|
| 267 |
+
* {
|
| 268 |
+
scrollbar-width: thin;
|
| 269 |
+
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(255, 255, 255, 0.05);
|
| 270 |
+
}
|
| 271 |
+
*::-webkit-scrollbar {
|
| 272 |
+
width: 8px;
|
| 273 |
+
height: 8px;
|
| 274 |
+
}
|
| 275 |
+
*::-webkit-scrollbar-track {
|
| 276 |
+
background: rgba(255, 255, 255, 0.05);
|
| 277 |
+
border-radius: 4px;
|
| 278 |
+
}
|
| 279 |
+
*::-webkit-scrollbar-thumb {
|
| 280 |
+
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 281 |
+
border-radius: 4px;
|
| 282 |
+
border: 1px solid rgba(119, 247, 209, 0.2);
|
| 283 |
+
}
|
| 284 |
+
*::-webkit-scrollbar-thumb:hover {
|
| 285 |
+
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 286 |
+
}
|
| 287 |
+
*::-webkit-scrollbar-corner {
|
| 288 |
+
background: rgba(255, 255, 255, 0.05);
|
| 289 |
+
}
|
| 290 |
+
@media (prefers-color-scheme: light) {
|
| 291 |
+
* {
|
| 292 |
+
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(0, 0, 0, 0.05);
|
| 293 |
+
}
|
| 294 |
+
*::-webkit-scrollbar-track {
|
| 295 |
+
background: rgba(0, 0, 0, 0.05);
|
| 296 |
+
}
|
| 297 |
+
*::-webkit-scrollbar-thumb {
|
| 298 |
+
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 299 |
+
border-color: rgba(0, 0, 0, 0.1);
|
| 300 |
+
}
|
| 301 |
+
*::-webkit-scrollbar-thumb:hover {
|
| 302 |
+
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 303 |
+
}
|
| 304 |
+
*::-webkit-scrollbar-corner {
|
| 305 |
+
background: rgba(0, 0, 0, 0.05);
|
| 306 |
+
}
|
| 307 |
+
}
|
| 308 |
+
/* stylelint-enable compat-api/css */
|