YourUsername commited on
Commit
c3bea8b
·
1 Parent(s): 2cae13b
Filesystem/demo.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Hello, World!
Modules/Agent_Terminal.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ import types
6
+ import ast
7
+ from io import StringIO
8
+ from typing import Annotated
9
+ import importlib.metadata
10
+
11
+ import gradio as gr
12
+ from ._docstrings import autodoc
13
+ from .File_System import ROOT_DIR, File_System
14
+ from .Web_Fetch import Web_Fetch
15
+ from .Web_Search import Web_Search
16
+ from .Memory_Manager import Memory_Manager
17
+ from .Generate_Speech import Generate_Speech, List_Kokoro_Voices, List_Supertonic_Voices
18
+ from .Generate_Image import Generate_Image
19
+ from .Generate_Video import Generate_Video
20
+ from .Deep_Research import Deep_Research
21
+ from .Obsidian_Vault import Obsidian_Vault
22
+ from .Shell_Command import Shell_Command
23
+ from .Code_Interpreter import Code_Interpreter
24
+
25
+ from app import _log_call_end, _log_call_start, _truncate_for_log
26
+
27
+ def search_packages(query: str = "") -> str:
28
+ """Search for installed Python packages by name. If query is empty, lists all."""
29
+ packages = []
30
+ query = query.lower()
31
+ for dist in importlib.metadata.distributions():
32
+ name = dist.metadata['Name']
33
+ if query in name.lower():
34
+ packages.append(f"{name} ({dist.version})")
35
+ packages.sort()
36
+ if not packages:
37
+ return f"No packages found matching '{query}'."
38
+ return "\n".join(packages)
39
+
40
+ def _get_tools_map():
41
+ return {
42
+ "Web_Fetch": Web_Fetch,
43
+ "Web_Search": Web_Search,
44
+ "Memory_Manager": Memory_Manager,
45
+ "Generate_Speech": Generate_Speech,
46
+ "List_Kokoro_Voices": List_Kokoro_Voices,
47
+ "List_Supertonic_Voices": List_Supertonic_Voices,
48
+ "Generate_Image": Generate_Image,
49
+ "Generate_Video": Generate_Video,
50
+ "Deep_Research": Deep_Research,
51
+ "File_System": File_System,
52
+ "Obsidian_Vault": Obsidian_Vault,
53
+ "Shell_Command": Shell_Command,
54
+ "Code_Interpreter": Code_Interpreter,
55
+ }
56
+
57
+ def list_tools() -> list[str]:
58
+ """List all available tools in the Code Interpreter environment."""
59
+ return list(_get_tools_map().keys())
60
+
61
+ def search_tools(query: str) -> str:
62
+ """Search for tools by name or description. Returns usage info for matches."""
63
+ query = query.lower()
64
+ matches = []
65
+ tools = _get_tools_map()
66
+ for name, func in tools.items():
67
+ doc = (func.__doc__ or "").lower()
68
+ if query in name.lower() or query in doc:
69
+ matches.append((name, func))
70
+
71
+ if not matches:
72
+ return f"No tools found matching '{query}'."
73
+
74
+ output = []
75
+ for name, func in matches:
76
+ output.append(f"--- {name} ---")
77
+ output.append(func.__doc__ or "No documentation available.")
78
+ output.append("")
79
+ return "\n".join(output)
80
+
81
+ def usage(tool_name: str) -> str:
82
+ """Get detailed usage information for a specific tool."""
83
+ tools = _get_tools_map()
84
+ if tool_name not in tools:
85
+ return f"Tool '{tool_name}' not found. Available tools: {', '.join(tools.keys())}"
86
+ func = tools[tool_name]
87
+ return f"--- {tool_name} ---\n{func.__doc__ or 'No documentation available.'}"
88
+
89
+ def _initialize_mock_modules():
90
+ """
91
+ Registers a mock 'functions' module in sys.modules so that LLMs
92
+ can do 'from functions import ...' without error.
93
+ """
94
+ mock_module = types.ModuleType("functions")
95
+
96
+ # Add tools
97
+ for name, tool in _get_tools_map().items():
98
+ setattr(mock_module, name, tool)
99
+
100
+ # Add helpers
101
+ helpers = {
102
+ "list_tools": list_tools,
103
+ "search_tools": search_tools,
104
+ "usage": usage,
105
+ "search_packages": search_packages,
106
+ }
107
+ for name, func in helpers.items():
108
+ setattr(mock_module, name, func)
109
+
110
+ sys.modules["functions"] = mock_module
111
+
112
+ _initialize_mock_modules()
113
+
114
+ # Single source of truth for the LLM-facing tool description
115
+ TOOL_SUMMARY = (
116
+ "Executes Python code as the unified interface for the entire tools ecosystem. "
117
+ "All tool interactions must happen through this code-execution gateway. "
118
+ "Use Agent Terminal repeatedly whenever you need to chain or combine tool operations. "
119
+ "Available tools: `Web_Fetch`, `Web_Search`, `Code_Interpreter`, `Shell_Command`, `File_System`, `Obsidian_Vault`, `Memory_Manager`, `Generate_Speech`, `Generate_Image`, `Generate_Video`, `Deep_Research`."
120
+ )
121
+
122
+
123
+
124
+ @autodoc(
125
+ summary=TOOL_SUMMARY,
126
+ )
127
+ def Agent_Terminal(input: Annotated[str, (
128
+ "Python source code to run; stdout is captured and returned. "
129
+ "Execute these commands: "
130
+ "`search_tools('query')` to search for tools by name or capability; "
131
+ "`list_tools()` to list all available tools; "
132
+ "`usage('ToolName')` to inspect a tool’s expected input parameters; "
133
+ "`search_packages('query')` to search for installed Python libraries."
134
+ )]) -> str:
135
+ _log_call_start("Agent_Terminal", input=_truncate_for_log(input or "", 300))
136
+ if input is None:
137
+ result = "No code provided."
138
+ _log_call_end("Agent_Terminal", result)
139
+ return result
140
+ old_stdout = sys.stdout
141
+ old_cwd = os.getcwd()
142
+ redirected_output = sys.stdout = StringIO()
143
+
144
+ # Prepare the execution environment with all tools
145
+ tools_env = {
146
+ "Web_Fetch": Web_Fetch,
147
+ "Web_Search": Web_Search,
148
+ "Memory_Manager": Memory_Manager,
149
+ "Generate_Speech": Generate_Speech,
150
+ "List_Kokoro_Voices": List_Kokoro_Voices,
151
+ "List_Supertonic_Voices": List_Supertonic_Voices,
152
+ "Generate_Image": Generate_Image,
153
+ "Generate_Video": Generate_Video,
154
+ "Deep_Research": Deep_Research,
155
+ "File_System": File_System,
156
+ "Obsidian_Vault": Obsidian_Vault,
157
+ "Shell_Command": Shell_Command,
158
+ "Code_Interpreter": Code_Interpreter,
159
+ "list_tools": list_tools,
160
+ "search_tools": search_tools,
161
+ "usage": usage,
162
+ "search_packages": search_packages,
163
+ "print": print, # Ensure print is available
164
+ "__builtins__": __builtins__,
165
+ }
166
+
167
+ try:
168
+ os.chdir(ROOT_DIR)
169
+
170
+ # Parse code to check if the last statement is an expression
171
+ tree = ast.parse(input)
172
+ if tree.body and isinstance(tree.body[-1], ast.Expr):
173
+ last_node = tree.body.pop()
174
+
175
+ # Execute preceding statements
176
+ if tree.body:
177
+ exec(compile(tree, filename="<string>", mode="exec"), tools_env)
178
+
179
+ # Evaluate and print the last expression
180
+ expr = compile(ast.Expression(last_node.value), filename="<string>", mode="eval")
181
+ result_val = eval(expr, tools_env)
182
+ if result_val is not None:
183
+ print(result_val)
184
+ else:
185
+ exec(input, tools_env)
186
+
187
+ result = redirected_output.getvalue()
188
+ except Exception as exc: # pylint: disable=broad-except
189
+ result = str(exc)
190
+ finally:
191
+ sys.stdout = old_stdout
192
+ try:
193
+ os.chdir(old_cwd)
194
+ except Exception:
195
+ pass
196
+ _log_call_end("Agent_Terminal", _truncate_for_log(result))
197
+ return result
198
+
199
+
200
+ def build_interface() -> gr.Interface:
201
+ return gr.Interface(
202
+ fn=Agent_Terminal,
203
+ inputs=gr.Code(label="Python Code", language="python"),
204
+ outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
205
+ title="Agent Terminal",
206
+ description="<div style=\"text-align:center\">Interact with all other tools via a Python API. Reduces token usage by 90%.</div>",
207
+ api_description=TOOL_SUMMARY,
208
+ flagging_mode="never",
209
+ )
210
+
211
+
212
+ __all__ = ["Agent_Terminal", "build_interface"]
Modules/Code_Interpreter.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from io import StringIO
6
+ from typing import Annotated
7
+
8
+ import gradio as gr
9
+ from ._docstrings import autodoc
10
+ from .File_System import ROOT_DIR
11
+
12
+ from app import _log_call_end, _log_call_start, _truncate_for_log
13
+
14
+ # Single source of truth for the LLM-facing tool description
15
+ TOOL_SUMMARY = (
16
+ "Execute Python code from the tool root; returns captured stdout or the exception text."
17
+ )
18
+
19
+
20
+ @autodoc(
21
+ summary=TOOL_SUMMARY,
22
+ )
23
+ def Code_Interpreter(code: Annotated[str, "Python source code to run; stdout is captured and returned."]) -> str:
24
+ _log_call_start("Code_Interpreter", code=_truncate_for_log(code or "", 300))
25
+ if code is None:
26
+ result = "No code provided."
27
+ _log_call_end("Code_Interpreter", result)
28
+ return result
29
+ old_stdout = sys.stdout
30
+ old_cwd = os.getcwd()
31
+ redirected_output = sys.stdout = StringIO()
32
+ try:
33
+ os.chdir(ROOT_DIR)
34
+ exec(code)
35
+ result = redirected_output.getvalue()
36
+ except Exception as exc: # pylint: disable=broad-except
37
+ result = str(exc)
38
+ finally:
39
+ sys.stdout = old_stdout
40
+ try:
41
+ os.chdir(old_cwd)
42
+ except Exception:
43
+ pass
44
+ _log_call_end("Code_Interpreter", _truncate_for_log(result))
45
+ return result
46
+
47
+
48
+ def build_interface() -> gr.Interface:
49
+ return gr.Interface(
50
+ fn=Code_Interpreter,
51
+ inputs=gr.Code(label="Python Code", language="python"),
52
+ outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
53
+ title="Code Interpreter",
54
+ description="<div style=\"text-align:center\">Execute Python code and see the output.</div>",
55
+ api_description=TOOL_SUMMARY,
56
+ flagging_mode="never",
57
+ )
58
+
59
+
60
+ __all__ = ["Code_Interpreter", "build_interface"]
Modules/Deep_Research.py ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ import tempfile
6
+ import time
7
+ import uuid
8
+ from collections import OrderedDict, deque
9
+ from concurrent.futures import Future, ThreadPoolExecutor, as_completed
10
+ from datetime import datetime
11
+ from typing import Annotated, Callable, Dict, List, Tuple
12
+ from urllib.parse import urlparse
13
+
14
+ import gradio as gr
15
+ import requests
16
+ from bs4 import BeautifulSoup
17
+ from ddgs import DDGS
18
+ from huggingface_hub import InferenceClient
19
+
20
+ from .Web_Fetch import _fullpage_markdown_from_soup, _http_get_enhanced
21
+ from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
22
+ from ._docstrings import autodoc
23
+ from .File_System import ROOT_DIR
24
+
25
+ HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
26
+
27
+ # Single source of truth for the LLM-facing tool description
28
+ TOOL_SUMMARY = (
29
+ "Write a summary of what the user wants to research, and "
30
+ "run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
31
+ "returns (Markdown report, newline-separated source links, downloadable report path). "
32
+ "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report](URL)`."
33
+ )
34
+
35
+ RESEARCHER_SYSTEM_PROMPT = (
36
+ "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
37
+ "You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
38
+ "<report_format>\n"
39
+ "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
40
+ "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
41
+ "</report_format>\n\n"
42
+ "<document_structure>\n"
43
+ "- Always begin with a clear title using a single # header\n"
44
+ "- Organize content into major sections using ## headers\n"
45
+ "- Further divide into subsections using ### headers\n"
46
+ "- Use #### headers sparingly for special subsections\n"
47
+ "- Never skip header levels\n"
48
+ "- Write multiple paragraphs per section or subsection\n"
49
+ "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
50
+ "- Never use lists, instead always use text or tables\n\n"
51
+ "Mandatory Section Flow:\n"
52
+ "1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
53
+ "2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
54
+ "3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
55
+ "</document_structure>\n\n"
56
+ "<planning_rules>\n"
57
+ "- Always break it down into multiple steps\n"
58
+ "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
59
+ "- Create the best report that weighs all the evidence from the sources\n"
60
+ "- Use the current date supplied in the first user message to contextualize findings\n"
61
+ "- Make sure that your final report addresses all parts of the query\n"
62
+ "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
63
+ "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
64
+ "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
65
+ "</planning_rules>\n\n"
66
+ )
67
+
68
+ FILTERER_SYSTEM_PROMPT = (
69
+ "You are Nymbot Filterer, an analyst who selects the most relevant sources for a research task. "
70
+ "You will be given a summary of the research topic (and optional search queries) followed by multiple fetched documents. "
71
+ "Each document includes its URL and a truncated excerpt. Evaluate how well each source helps answer the research topic. "
72
+ "Return only the URLs that should be used for the final research step. Output plain text with exactly one URL per line and no additional commentary, bullets, numbering, or explanations. "
73
+ "If no sources are relevant, return an empty string."
74
+ )
75
+
76
+
77
+ class SlowHost(Exception):
78
+ pass
79
+
80
+
81
+ def _normalize_query(q: str) -> str:
82
+ if not q:
83
+ return ""
84
+ repl = {"“": '"', "”": '"', "‘": "'", "’": "'", "`": "'"}
85
+ for key, value in repl.items():
86
+ q = q.replace(key, value)
87
+ q = re.sub(r"\s+", " ", q)
88
+ q = re.sub(r'"\s+"', " ", q)
89
+ q = q.strip().strip('"').strip()
90
+ return q
91
+
92
+
93
+ def _search_urls_only(query: str, max_results: int) -> list[str]:
94
+ if not query or not query.strip() or max_results <= 0:
95
+ return []
96
+ urls: list[str] = []
97
+ try:
98
+ _search_rate_limiter.acquire()
99
+ with DDGS() as ddgs:
100
+ for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
101
+ url = (item.get("href") or item.get("url") or "").strip()
102
+ if url:
103
+ urls.append(url)
104
+ except Exception:
105
+ pass
106
+ seen = set()
107
+ deduped = []
108
+ for url in urls:
109
+ if url not in seen:
110
+ seen.add(url)
111
+ deduped.append(url)
112
+ return deduped
113
+
114
+
115
+ def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
116
+ try:
117
+ resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
118
+ resp.raise_for_status()
119
+ except requests.exceptions.RequestException as exc:
120
+ msg = str(exc)
121
+ if "timed out" in msg.lower():
122
+ raise SlowHost(msg) from exc
123
+ return ""
124
+ final_url = str(resp.url)
125
+ ctype = resp.headers.get("Content-Type", "")
126
+ if "html" not in ctype.lower():
127
+ return ""
128
+ resp.encoding = resp.encoding or resp.apparent_encoding
129
+ html = resp.text
130
+ soup = BeautifulSoup(html, "lxml")
131
+ md_text = _fullpage_markdown_from_soup(soup, final_url, "")
132
+ if max_chars > 0 and len(md_text) > max_chars:
133
+ md_text = md_text[:max_chars]
134
+ return md_text
135
+
136
+
137
+ def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
138
+ out = []
139
+ total = 0
140
+ truncated = False
141
+ for part in parts:
142
+ if not part:
143
+ continue
144
+ if total + len(part) > max_chars:
145
+ out.append(part[: max(0, max_chars - total)])
146
+ truncated = True
147
+ break
148
+ out.append(part)
149
+ total += len(part)
150
+ return ("\n\n".join(out), truncated)
151
+
152
+
153
+ def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
154
+ sources_blocks: List[str] = []
155
+ indexed_urls: List[str] = []
156
+ for idx, url in enumerate(url_list, start=1):
157
+ text = pages_map.get(url, "").strip()
158
+ if not text:
159
+ continue
160
+ indexed_urls.append(f"[{idx}] {url}")
161
+ sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
162
+ sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
163
+ prompt_parts: List[str] = []
164
+ prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
165
+ populated = [q for q in queries if q and q.strip()]
166
+ if populated:
167
+ prompt_parts.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
168
+ if indexed_urls:
169
+ prompt_parts.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
170
+ prompt_parts.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
171
+ return "\n\n".join(prompt_parts)
172
+
173
+
174
+ def _build_filter_prompt(summary: str, queries: List[str], pages_map: Dict[str, str]) -> str:
175
+ populated = [q for q in queries if q and q.strip()]
176
+ summary_text = summary or ""
177
+ prompt_sections: List[str] = []
178
+ prompt_sections.append("<research_topic_summary>\n" + summary_text + "\n</research_topic_summary>")
179
+ if populated:
180
+ prompt_sections.append("<search_queries>\n" + "\n".join(populated) + "\n</search_queries>")
181
+ sources: List[str] = []
182
+ for idx, (url, text) in enumerate(pages_map.items(), start=1):
183
+ content = text.strip()
184
+ if not content:
185
+ continue
186
+ sources.append(f"[Source {idx}] URL: {url}\n\n{content}")
187
+ sources_joined, truncated = _truncate_join(sources, max_chars=60_000)
188
+ prompt_sections.append("<candidate_sources>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</candidate_sources>")
189
+ prompt_sections.append(
190
+ "<task>\nIdentify which of the provided URLs should be retained for the final research synthesis. "
191
+ "Consider coverage, credibility, and relevance to the research topic. "
192
+ "Return ONLY the URLs you choose, with one URL per line and no additional text.\n</task>"
193
+ )
194
+ return "\n\n".join(prompt_sections)
195
+
196
+
197
+ def _parse_filterer_output(raw: str, allowed_urls: List[str]) -> List[str]:
198
+ if not raw:
199
+ return []
200
+ allowed_set = {url.strip(): idx for idx, url in enumerate(allowed_urls)}
201
+ found_indices: set[int] = set()
202
+ for line in raw.splitlines():
203
+ candidate = line.strip()
204
+ if not candidate:
205
+ continue
206
+ if candidate in allowed_set:
207
+ found_indices.add(allowed_set[candidate])
208
+ continue
209
+ match = re.search(r"https?://[^\s]+", candidate)
210
+ if not match:
211
+ continue
212
+ url = match.group(0).rstrip(".,);]")
213
+ if url in allowed_set:
214
+ found_indices.add(allowed_set[url])
215
+ selected = [allowed_urls[idx] for idx in sorted(found_indices)]
216
+ return selected
217
+
218
+
219
+ def _write_report_tmp(text: str) -> str:
220
+ filename = f"research_report_{uuid.uuid4().hex}.txt"
221
+ path = os.path.join(ROOT_DIR, filename)
222
+ with open(path, "w", encoding="utf-8") as file:
223
+ file.write(text)
224
+ return path
225
+
226
+
227
+ def _fetch_pages_within_budget(urls: List[str], char_limit: int, time_left_fn: Callable[[], float]) -> OrderedDict:
228
+ pages: dict[str, str] = {}
229
+ if not urls:
230
+ return OrderedDict()
231
+ queue = deque(urls)
232
+ attempts: dict[str, int] = {url: 0 for url in urls}
233
+ max_attempts = 2
234
+ max_workers = min(12, max(4, len(urls)))
235
+ in_flight: dict[Future, str] = {}
236
+ delayed: list[tuple[float, str]] = []
237
+
238
+ def schedule_next(executor: ThreadPoolExecutor) -> None:
239
+ while queue and len(in_flight) < max_workers:
240
+ url = queue.popleft()
241
+ if url in pages:
242
+ continue
243
+ attempts.setdefault(url, 0)
244
+ if attempts[url] >= max_attempts:
245
+ continue
246
+ attempts[url] += 1
247
+ tl = time_left_fn()
248
+ if tl <= 0.1:
249
+ return
250
+ per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
251
+ future = executor.submit(_fetch_page_markdown_fast, url, char_limit, per_timeout)
252
+ in_flight[future] = url
253
+
254
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
255
+ schedule_next(executor)
256
+ while (in_flight or queue or delayed) and time_left_fn() > 0.2:
257
+ now = time.time()
258
+ if delayed:
259
+ ready: list[tuple[float, str]] = []
260
+ not_ready: list[tuple[float, str]] = []
261
+ for ready_time, delayed_url in delayed:
262
+ (ready if ready_time <= now else not_ready).append((ready_time, delayed_url))
263
+ delayed = not_ready
264
+ for _, delayed_url in ready:
265
+ queue.append(delayed_url)
266
+ if ready:
267
+ schedule_next(executor)
268
+ done = [future for future in list(in_flight.keys()) if future.done()]
269
+ if not done:
270
+ if not queue and delayed:
271
+ next_ready = min((t for t, _ in delayed), default=time.time())
272
+ sleep_for = max(0.0, next_ready - time.time())
273
+ time.sleep(max(0.02, min(0.25, sleep_for)))
274
+ else:
275
+ time.sleep(0.05)
276
+ continue
277
+ for future in done:
278
+ url = in_flight.pop(future)
279
+ try:
280
+ md = future.result()
281
+ if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
282
+ pages[url] = md
283
+ try:
284
+ print(f"[FETCH OK] {url} (chars={len(md)})", flush=True)
285
+ except Exception:
286
+ pass
287
+ except SlowHost:
288
+ if time_left_fn() > 5.0:
289
+ delayed.append((time.time() + 3.0, url))
290
+ except Exception:
291
+ pass
292
+ schedule_next(executor)
293
+ ordered = OrderedDict((url, pages[url]) for url in urls if url in pages)
294
+ return ordered
295
+
296
+
297
+ @autodoc(
298
+ summary=TOOL_SUMMARY,
299
+ )
300
+ def Deep_Research(
301
+ summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
302
+ query1: Annotated[str, "DDG Search Query 1"],
303
+ max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
304
+ query2: Annotated[str, "DDG Search Query 2"] = "",
305
+ max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
306
+ query3: Annotated[str, "DDG Search Query 3"] = "",
307
+ max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
308
+ query4: Annotated[str, "DDG Search Query 4"] = "",
309
+ max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
310
+ query5: Annotated[str, "DDG Search Query 5"] = "",
311
+ max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
312
+ ) -> tuple[str, str, str]:
313
+ _log_call_start(
314
+ "Deep_Research",
315
+ summary=_truncate_for_log(summary or "", 200),
316
+ queries=[q for q in [query1, query2, query3, query4, query5] if q],
317
+ )
318
+ if not HF_TEXTGEN_TOKEN:
319
+ _log_call_end("Deep_Research", "error=missing HF token")
320
+ raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
321
+ queries = [
322
+ _normalize_query(query1 or ""),
323
+ _normalize_query(query2 or ""),
324
+ _normalize_query(query3 or ""),
325
+ _normalize_query(query4 or ""),
326
+ _normalize_query(query5 or ""),
327
+ ]
328
+ reqs = [
329
+ max(1, min(50, int(max1))),
330
+ max(1, min(50, int(max2))),
331
+ max(1, min(50, int(max3))),
332
+ max(1, min(50, int(max4))),
333
+ max(1, min(50, int(max5))),
334
+ ]
335
+ total_requested = sum(reqs)
336
+ if total_requested > 50:
337
+ reqs = [10, 10, 10, 10, 10]
338
+ start_ts = time.time()
339
+ budget_seconds = 55.0
340
+ deadline = start_ts + budget_seconds
341
+
342
+ def time_left() -> float:
343
+ return max(0.0, deadline - time.time())
344
+
345
+ now_dt = datetime.now().astimezone()
346
+ date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
347
+ if not date_str:
348
+ date_str = now_dt.isoformat()
349
+
350
+ all_urls: list[str] = []
351
+ tasks = []
352
+ with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
353
+ for query, count in zip(queries, reqs):
354
+ if not query.strip():
355
+ continue
356
+ tasks.append(executor.submit(_search_urls_only, query.strip(), count))
357
+ for future in as_completed(tasks):
358
+ try:
359
+ urls = future.result() or []
360
+ except Exception:
361
+ urls = []
362
+ for url in urls:
363
+ if url not in all_urls:
364
+ all_urls.append(url)
365
+ if len(all_urls) >= 50:
366
+ break
367
+ if time_left() <= 0.5:
368
+ break
369
+ if len(all_urls) > 50:
370
+ all_urls = all_urls[:50]
371
+ blacklist = {
372
+ "homedepot.com",
373
+ "tractorsupply.com",
374
+ "mcmaster.com",
375
+ "mrchain.com",
376
+ "answers.com",
377
+ "city-data.com",
378
+ "dictionary.cambridge.org",
379
+ }
380
+
381
+ def _domain(url: str) -> str:
382
+ try:
383
+ return urlparse(url).netloc.lower()
384
+ except Exception:
385
+ return ""
386
+
387
+ all_urls = [url for url in all_urls if _domain(url) not in blacklist]
388
+ skip_exts = (
389
+ ".pdf",
390
+ ".ppt",
391
+ ".pptx",
392
+ ".doc",
393
+ ".docx",
394
+ ".xls",
395
+ ".xlsx",
396
+ ".zip",
397
+ ".gz",
398
+ ".tgz",
399
+ ".bz2",
400
+ ".7z",
401
+ ".rar",
402
+ )
403
+
404
+ def _skip_url(url: str) -> bool:
405
+ try:
406
+ path = urlparse(url).path.lower()
407
+ except Exception:
408
+ return False
409
+ return any(path.endswith(ext) for ext in skip_exts)
410
+
411
+ all_urls = [url for url in all_urls if not _skip_url(url)]
412
+ truncated_pages = OrderedDict()
413
+ if all_urls and time_left() > 0.2:
414
+ truncated_pages = _fetch_pages_within_budget(all_urls, 3000, time_left)
415
+ print(
416
+ f"[PIPELINE] Initial fetch complete: candidates={len(all_urls)}, truncated_documents={len(truncated_pages)}, time_left={time_left():.2f}s",
417
+ flush=True,
418
+ )
419
+
420
+ def _invoke_chat(messages, provider: str, max_tokens: int, temp: float, top_p: float):
421
+ client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
422
+ return client.chat.completions.create(
423
+ model="zai-org/GLM-4.6",
424
+ messages=messages,
425
+ max_tokens=max_tokens,
426
+ temperature=temp,
427
+ top_p=top_p,
428
+ )
429
+
430
+ filtered_urls: List[str] = list(truncated_pages.keys())
431
+ filter_output = ""
432
+ filter_used_fallback = False
433
+ filter_success = False
434
+ if truncated_pages and time_left() > 3.0:
435
+ filter_prompt = _build_filter_prompt(summary or "", [q for q in queries if q.strip()], truncated_pages)
436
+ filter_messages = [
437
+ {"role": "system", "content": FILTERER_SYSTEM_PROMPT},
438
+ {"role": "user", "content": f"The current date is {date_str}. Consider how recent each source is when deciding relevance."},
439
+ {"role": "user", "content": filter_prompt},
440
+ ]
441
+ filter_completion = None
442
+ try:
443
+ print("[FILTER] Attempt 1: provider=cerebras, max_tokens=2048", flush=True)
444
+ filter_completion = _invoke_chat(filter_messages, "cerebras", 2048, 0.2, 0.9)
445
+ except Exception as exc1:
446
+ print(f"[FILTER] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
447
+ try:
448
+ print("[FILTER] Attempt 2: provider=auto, max_tokens=2048", flush=True)
449
+ filter_completion = _invoke_chat(filter_messages, "auto", 2048, 0.2, 0.9)
450
+ except Exception as exc2:
451
+ print(f"[FILTER] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
452
+ if filter_completion and filter_completion.choices:
453
+ filter_output = filter_completion.choices[0].message.content or ""
454
+ filtered_urls = _parse_filterer_output(filter_output, list(truncated_pages.keys()))
455
+ filter_success = bool(filter_output.strip()) and bool(filtered_urls)
456
+ if not filtered_urls:
457
+ filter_used_fallback = True
458
+ fallback_count = min(8, len(truncated_pages))
459
+ filtered_urls = list(truncated_pages.keys())[:fallback_count]
460
+ max_final_urls = 20
461
+ if len(filtered_urls) > max_final_urls:
462
+ filter_used_fallback = True
463
+ filtered_urls = filtered_urls[:max_final_urls]
464
+ if not filter_success:
465
+ filter_used_fallback = True
466
+ print(
467
+ f"[FILTER] Selected URLs={len(filtered_urls)}, fallback={filter_used_fallback}, time_left={time_left():.2f}s",
468
+ flush=True,
469
+ )
470
+
471
+ final_pages_fetched = OrderedDict()
472
+ if filtered_urls and time_left() > 0.2:
473
+ final_pages_fetched = _fetch_pages_within_budget(filtered_urls, 8000, time_left)
474
+ merged_pages = OrderedDict()
475
+ for url in filtered_urls:
476
+ content = final_pages_fetched.get(url) or truncated_pages.get(url) or ""
477
+ if content:
478
+ merged_pages[url] = content
479
+ pages = merged_pages
480
+ print(
481
+ f"[PIPELINE] Final fetch complete: retained_documents={len(pages)}, time_left={time_left():.2f}s",
482
+ flush=True,
483
+ )
484
+ prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
485
+ system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
486
+ date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
487
+ messages = [
488
+ system_message,
489
+ date_message,
490
+ {"role": "user", "content": prompt},
491
+ ]
492
+ try:
493
+ prompt_chars = len(prompt)
494
+ except Exception:
495
+ prompt_chars = -1
496
+ print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
497
+ print("[PIPELINE] Starting inference (provider=cerebras, model=zai-org/GLM-4.6)", flush=True)
498
+
499
+ try:
500
+ print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
501
+ completion = _invoke_chat(messages, "cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
502
+ except Exception as exc1:
503
+ print(f"[LLM] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
504
+ try:
505
+ prompt2 = _build_research_prompt(
506
+ summary=summary or "",
507
+ queries=[q for q in queries if q.strip()],
508
+ url_list=list(pages.keys())[:30],
509
+ pages_map={key: pages[key] for key in list(pages.keys())[:30]},
510
+ )
511
+ messages = [
512
+ system_message,
513
+ date_message,
514
+ {"role": "user", "content": prompt2},
515
+ ]
516
+ print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
517
+ completion = _invoke_chat(messages, "cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
518
+ except Exception as exc2:
519
+ print(f"[LLM] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
520
+ try:
521
+ print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
522
+ completion = _invoke_chat(messages, "auto", max_tokens=8192, temp=0.7, top_p=0.95)
523
+ except Exception as exc3:
524
+ _log_call_end("Deep_Research", f"error={_truncate_for_log(str(exc3), 260)}")
525
+ raise gr.Error(f"Researcher model call failed: {exc3}")
526
+ raw = completion.choices[0].message.content or ""
527
+ try:
528
+ no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
529
+ no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
530
+ except Exception:
531
+ no_think = raw
532
+ try:
533
+ paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
534
+ keep: List[str] = []
535
+ removed = 0
536
+ planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|now i'll|now i will)\b", re.IGNORECASE)
537
+ for paragraph in paragraphs:
538
+ if planning_re.search(paragraph):
539
+ removed += 1
540
+ continue
541
+ keep.append(paragraph)
542
+ report = "\n\n".join(keep).strip()
543
+ if not report:
544
+ report = no_think.strip()
545
+ except Exception:
546
+ report = no_think
547
+ removed = 0
548
+ report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
549
+ try:
550
+ print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
551
+ except Exception:
552
+ pass
553
+ links_text = "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
554
+ if links_text:
555
+ sources_section = "\n\n## Sources\n" + "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
556
+ report = report.rstrip() + sources_section
557
+ file_path = _write_report_tmp(report)
558
+ elapsed = time.time() - start_ts
559
+ print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
560
+ _log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
561
+ return report, links_text, file_path
562
+
563
+
564
+ def build_interface() -> gr.Interface:
565
+ return gr.Interface(
566
+ fn=Deep_Research,
567
+ inputs=[
568
+ gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question", info="Summarization of research topic (one or more sentences)"),
569
+ gr.Textbox(label="DDG Search Query 1", max_lines=1, info="DDG Search Query 1"),
570
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)", info="Max results for Query 1 (1-50)"),
571
+ gr.Textbox(label="DDG Search Query 2", value="", max_lines=1, info="DDG Search Query 2"),
572
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)", info="Max results for Query 2 (1-50)"),
573
+ gr.Textbox(label="DDG Search Query 3", value="", max_lines=1, info="DDG Search Query 3"),
574
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)", info="Max results for Query 3 (1-50)"),
575
+ gr.Textbox(label="DDG Search Query 4", value="", max_lines=1, info="DDG Search Query 4"),
576
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)", info="Max results for Query 4 (1-50)"),
577
+ gr.Textbox(label="DDG Search Query 5", value="", max_lines=1, info="DDG Search Query 5"),
578
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)", info="Max results for Query 5 (1-50)"),
579
+ ],
580
+ outputs=[
581
+ gr.Markdown(label="Research Report"),
582
+ gr.Textbox(label="Fetched Links", lines=8),
583
+ gr.File(label="Download Research Report", file_count="single"),
584
+ ],
585
+ title="Deep Research",
586
+ description=(
587
+ "<div style=\"text-align:center\">Perform multi-query web research: search with DuckDuckGo, fetch up to 50 pages in parallel, "
588
+ "and generate a comprehensive report using a large LLM via Hugging Face Inference Providers (Cerebras). Requires HF_READ_TOKEN.</div>"
589
+ ),
590
+ api_description=TOOL_SUMMARY,
591
+ flagging_mode="never",
592
+ )
593
+
594
+
595
+ __all__ = ["Deep_Research", "build_interface"]
Modules/File_System.py ADDED
@@ -0,0 +1,648 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import shutil
7
+ import stat
8
+ from datetime import datetime
9
+ from typing import Annotated, Optional
10
+
11
+ import gradio as gr
12
+
13
+ from app import _log_call_end, _log_call_start, _truncate_for_log
14
+ from ._docstrings import autodoc
15
+
16
+
17
+ TOOL_SUMMARY = (
18
+ "Browse, search, and manage files within a safe root. "
19
+ "Actions: list, read, write, append, mkdir, move, copy, delete, info, search, help. "
20
+ "Fill other fields as needed. "
21
+ "Use paths like `/` or `/notes/todo.txt` because all paths are relative to the root (`/`). "
22
+ "Use 'help' to see action-specific required fields and examples."
23
+ )
24
+
25
+ HELP_TEXT = (
26
+ "File System — actions and usage\n\n"
27
+ "Root: paths resolve under Nymbo-Tools/Filesystem by default (or NYMBO_TOOLS_ROOT if set). "
28
+ "Start paths with '/' to refer to the tool root (e.g., /notes). "
29
+ "Absolute paths are disabled unless UNSAFE_ALLOW_ABS_PATHS=1.\n\n"
30
+ "Actions and fields:\n"
31
+ "- list: path='/' (default), recursive=false, show_hidden=false, max_entries=20\n"
32
+ "- read: path (e.g., /notes/todo.txt), offset=0, max_chars=4000 (shows next_cursor when truncated)\n"
33
+ "- write: path, content (UTF-8), create_dirs=true\n"
34
+ "- append: path, content (UTF-8), create_dirs=true\n"
35
+ "- mkdir: path (directory), exist_ok=true\n"
36
+ "- move: path (src), dest_path (dst), overwrite=false\n"
37
+ "- copy: path (src), dest_path (dst), overwrite=false\n"
38
+ "- delete: path, recursive=true (required for directories)\n"
39
+ "- info: path\n"
40
+ "- search: path (dir or file), content=query text, recursive=false, show_hidden=false, max_entries=20, case_sensitive=false, offset=0\n"
41
+ "- help: show this guide\n\n"
42
+ "Errors are returned as JSON with fields: {status:'error', code, message, path?, hint?, data?}.\n\n"
43
+ "Examples:\n"
44
+ "- list current: action=list, path='/'\n"
45
+ "- make folder: action=mkdir, path='/notes'\n"
46
+ "- write file: action=write, path='/notes/todo.txt', content='hello'\n"
47
+ "- read file: action=read, path='/notes/todo.txt', max_chars=200\n"
48
+ "- move file: action=move, path='/notes/todo.txt', dest_path='/notes/todo-old.txt', overwrite=true\n"
49
+ "- delete dir: action=delete, path='/notes', recursive=true\n"
50
+ "- search text: action=search, path='/notes', content='TODO', recursive=true, max_entries=50\n"
51
+ "- page search results: action=search, content='TODO', offset=10\n"
52
+ "- case-sensitive search: action=search, content='TODO', case_sensitive=true\n"
53
+ )
54
+
55
+
56
+ def _default_root() -> str:
57
+ # Prefer explicit root via env var
58
+ root = os.getenv("NYMBO_TOOLS_ROOT")
59
+ if root and root.strip():
60
+ return os.path.abspath(os.path.expanduser(root.strip()))
61
+ # Default to "Nymbo-Tools/Filesystem" alongside this module package
62
+ try:
63
+ here = os.path.abspath(__file__)
64
+ tools_dir = os.path.dirname(os.path.dirname(here)) # .../Nymbo-Tools
65
+ default_root = os.path.abspath(os.path.join(tools_dir, "Filesystem"))
66
+ return default_root
67
+ except Exception:
68
+ # Final fallback
69
+ return os.path.abspath(os.getcwd())
70
+
71
+
72
+ ROOT_DIR = _default_root()
73
+ # Ensure the default root directory exists to make listing/writing more convenient
74
+ try:
75
+ os.makedirs(ROOT_DIR, exist_ok=True)
76
+ except Exception:
77
+ pass
78
+ ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
79
+
80
+ def _safe_err(exc: Exception | str) -> str:
81
+ """Return an error string with any absolute root replaced by '/' and slashes normalized.
82
+ This handles variants like backslashes and duplicate slashes in OS messages.
83
+ """
84
+ s = str(exc)
85
+ # Normalize to forward slashes for comparison
86
+ s_norm = s.replace("\\", "/")
87
+ root_fwd = ROOT_DIR.replace("\\", "/")
88
+ # Collapse duplicate slashes in root representation
89
+ root_variants = {ROOT_DIR, root_fwd, re.sub(r"/+", "/", root_fwd)}
90
+ for variant in root_variants:
91
+ if variant:
92
+ s_norm = s_norm.replace(variant, "/")
93
+ # Collapse duplicate slashes in final output
94
+ s_norm = re.sub(r"/+", "/", s_norm)
95
+ return s_norm
96
+
97
+
98
+ def _err(code: str, message: str, *, path: Optional[str] = None, hint: Optional[str] = None, data: Optional[dict] = None) -> str:
99
+ """Return a structured error JSON string.
100
+ Fields: status='error', code, message, path?, hint?, data?, root='/'
101
+ """
102
+ payload = {
103
+ "status": "error",
104
+ "code": code,
105
+ "message": message,
106
+ "root": "/",
107
+ }
108
+ if path is not None and path != "":
109
+ payload["path"] = path
110
+ if hint:
111
+ payload["hint"] = hint
112
+ if data:
113
+ payload["data"] = data
114
+ return json.dumps(payload, ensure_ascii=False)
115
+
116
+
117
+ def _resolve_path(path: str) -> tuple[str, str]:
118
+ """
119
+ Resolve a user-provided path to an absolute, normalized path constrained to ROOT_DIR
120
+ (unless UNSAFE_ALLOW_ABS_PATHS=1). Returns (abs_path, error_message). error_message is empty when ok.
121
+ """
122
+ try:
123
+ user_input = (path or "/").strip() or "/"
124
+ if user_input.startswith("/"):
125
+ # Treat leading '/' as the virtual root for the tool.
126
+ rel_part = user_input.lstrip("/") or "."
127
+ raw = os.path.expanduser(rel_part)
128
+ treat_as_relative = True
129
+ else:
130
+ raw = os.path.expanduser(user_input)
131
+ treat_as_relative = False
132
+
133
+ if not treat_as_relative and os.path.isabs(raw):
134
+ if not ALLOW_ABS:
135
+ # Absolute paths are not allowed in safe mode
136
+ return "", _err(
137
+ "absolute_path_disabled",
138
+ "Absolute paths are disabled in safe mode.",
139
+ path=raw.replace("\\", "/"),
140
+ hint="Use a path relative to / (e.g., /notes/todo.txt)."
141
+ )
142
+ abs_path = os.path.abspath(raw)
143
+ else:
144
+ abs_path = os.path.abspath(os.path.join(ROOT_DIR, raw))
145
+ # Constrain to ROOT when not unsafe mode
146
+ if not ALLOW_ABS:
147
+ try:
148
+ common = os.path.commonpath([os.path.normpath(ROOT_DIR), os.path.normpath(abs_path)])
149
+ except Exception:
150
+ # Fallback to simple check
151
+ root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
152
+ abs_cmp = os.path.normcase(os.path.normpath(abs_path))
153
+ if not abs_cmp.startswith(root_cmp):
154
+ return "", _err(
155
+ "path_outside_root",
156
+ "Path not allowed outside root.",
157
+ path=user_input.replace("\\", "/"),
158
+ hint="Use a path under / (the tool's root)."
159
+ )
160
+ else:
161
+ root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
162
+ common_cmp = os.path.normcase(os.path.normpath(common))
163
+ if common_cmp != root_cmp:
164
+ return "", _err(
165
+ "path_outside_root",
166
+ "Path not allowed outside root.",
167
+ path=user_input.replace("\\", "/"),
168
+ hint="Use a path under / (the tool's root)."
169
+ )
170
+ return abs_path, ""
171
+ except Exception as exc:
172
+ return "", _err(
173
+ "resolve_path_failed",
174
+ "Failed to resolve path.",
175
+ path=(path or ""),
176
+ data={"error": _safe_err(exc)}
177
+ )
178
+
179
+
180
+ def _fmt_size(num_bytes: int) -> str:
181
+ units = ["B", "KB", "MB", "GB", "TB"]
182
+ size = float(num_bytes)
183
+ for unit in units:
184
+ if size < 1024.0:
185
+ return f"{size:.1f} {unit}"
186
+ size /= 1024.0
187
+ return f"{size:.1f} PB"
188
+
189
+
190
+ def _display_path(abs_path: str) -> str:
191
+ """Return a user-friendly path relative to ROOT_DIR using forward slashes.
192
+ Example: ROOT_DIR -> '/', a file under it -> '/sub/dir/file.txt'."""
193
+ try:
194
+ norm_root = os.path.normpath(ROOT_DIR)
195
+ norm_abs = os.path.normpath(abs_path)
196
+ common = os.path.commonpath([norm_root, norm_abs])
197
+ if os.path.normcase(common) == os.path.normcase(norm_root):
198
+ rel = os.path.relpath(norm_abs, norm_root)
199
+ if rel == ".":
200
+ return "/"
201
+ return "/" + rel.replace("\\", "/")
202
+ except Exception:
203
+ pass
204
+ # Fallback to original absolute path
205
+ return abs_path.replace("\\", "/")
206
+
207
+
208
+ def _list_dir(abs_path: str, *, show_hidden: bool, recursive: bool, max_entries: int) -> str:
209
+ lines: list[str] = []
210
+ total = 0
211
+ root_display = "/"
212
+ listing_display = _display_path(abs_path)
213
+ for root, dirs, files in os.walk(abs_path):
214
+ # filter hidden
215
+ if not show_hidden:
216
+ dirs[:] = [d for d in dirs if not d.startswith('.')]
217
+ files = [f for f in files if not f.startswith('.')]
218
+ try:
219
+ rel_root = os.path.relpath(root, ROOT_DIR)
220
+ except Exception:
221
+ rel_root = root
222
+ rel_root_disp = "/" if rel_root == "." else "/" + rel_root.replace("\\", "/")
223
+ lines.append(f"\n📂 {rel_root_disp}")
224
+ # sort
225
+ dirs.sort()
226
+ files.sort()
227
+ for d in dirs:
228
+ p = os.path.join(root, d)
229
+ try:
230
+ mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
231
+ except Exception:
232
+ mtime = "?"
233
+ lines.append(f" • [DIR] {d} (modified {mtime})")
234
+ total += 1
235
+ if total >= max_entries:
236
+ lines.append(f"\n… Truncated at {max_entries} entries.")
237
+ return "\n".join(lines).strip()
238
+ for f in files:
239
+ p = os.path.join(root, f)
240
+ try:
241
+ size = _fmt_size(os.path.getsize(p))
242
+ mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
243
+ except Exception:
244
+ size, mtime = "?", "?"
245
+ lines.append(f" • {f} ({size}, modified {mtime})")
246
+ total += 1
247
+ if total >= max_entries:
248
+ lines.append(f"\n… Truncated at {max_entries} entries.")
249
+ return "\n".join(lines).strip()
250
+ if not recursive:
251
+ break
252
+ header = f"Listing of {listing_display}\nRoot: {root_display}\nEntries: {total}"
253
+ return (header + "\n" + "\n".join(lines)).strip()
254
+
255
+
256
+ def _search_text(
257
+ abs_path: str,
258
+ query: str,
259
+ *,
260
+ recursive: bool,
261
+ show_hidden: bool,
262
+ max_results: int,
263
+ case_sensitive: bool,
264
+ start_index: int,
265
+ ) -> str:
266
+ if not os.path.exists(abs_path):
267
+ return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
268
+
269
+ query = query or ""
270
+ normalized_query = query if case_sensitive else query.lower()
271
+ if normalized_query == "":
272
+ return _err(
273
+ "missing_search_query",
274
+ "Search query is required for the search action.",
275
+ hint="Provide text in the Content field to search for.",
276
+ )
277
+
278
+ max_results = max(1, int(max_results) if max_results is not None else 20)
279
+ start_index = max(0, int(start_index) if start_index is not None else 0)
280
+ matches: list[tuple[str, int, str]] = []
281
+ errors: list[str] = []
282
+ files_scanned = 0
283
+ truncated = False
284
+ total_matches = 0
285
+
286
+ def _should_skip(name: str) -> bool:
287
+ return not show_hidden and name.startswith('.')
288
+
289
+ def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
290
+ nonlocal truncated, total_matches
291
+ total_matches += 1
292
+ if total_matches <= start_index:
293
+ return False
294
+ if len(matches) < max_results:
295
+ snippet = line_text.strip()
296
+ if len(snippet) > 200:
297
+ snippet = snippet[:197] + "…"
298
+ matches.append((_display_path(file_path), line_no, snippet))
299
+ return False
300
+ truncated = True
301
+ return True
302
+
303
+ def _search_file(file_path: str) -> bool:
304
+ nonlocal files_scanned
305
+ files_scanned += 1
306
+ try:
307
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as handle:
308
+ for line_no, line in enumerate(handle, start=1):
309
+ haystack = line if case_sensitive else line.lower()
310
+ if normalized_query in haystack:
311
+ if _handle_match(file_path, line_no, line):
312
+ return True
313
+ except Exception as exc:
314
+ errors.append(f"{_display_path(file_path)} ({_safe_err(exc)})")
315
+ return truncated
316
+
317
+ if os.path.isfile(abs_path):
318
+ _search_file(abs_path)
319
+ else:
320
+ for root, dirs, files in os.walk(abs_path):
321
+ dirs[:] = [d for d in dirs if not _should_skip(d)]
322
+ visible_files = [f for f in files if show_hidden or not f.startswith('.')]
323
+ for name in visible_files:
324
+ file_path = os.path.join(root, name)
325
+ if _search_file(file_path):
326
+ break
327
+ if truncated:
328
+ break
329
+ if not recursive:
330
+ break
331
+
332
+ header_lines = [
333
+ f"Search results for {query!r}",
334
+ f"Scope: {_display_path(abs_path)}",
335
+ f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
336
+ f"Start offset: {start_index}",
337
+ f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
338
+ f"Files scanned: {files_scanned}",
339
+ ]
340
+
341
+ next_cursor = start_index + len(matches) if truncated else None
342
+
343
+ if truncated:
344
+ header_lines.append(f"Matches encountered before truncation: {total_matches}")
345
+ header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
346
+ header_lines.append(f"Next cursor: {next_cursor}")
347
+ else:
348
+ header_lines.append(f"Total matches found: {total_matches}")
349
+ header_lines.append("Truncated: no — end of results.")
350
+ header_lines.append("Next cursor: None")
351
+
352
+ if not matches:
353
+ if total_matches > 0 and start_index >= total_matches:
354
+ hint_limit = max(total_matches - 1, 0)
355
+ body_lines = [
356
+ f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
357
+ (f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
358
+ ]
359
+ body_lines = [line for line in body_lines if line]
360
+ else:
361
+ body_lines = [
362
+ "No matches found.",
363
+ (f"Total matches encountered: {total_matches}." if total_matches else ""),
364
+ ]
365
+ body_lines = [line for line in body_lines if line]
366
+ else:
367
+ body_lines = [f"{idx}. {path}:{line_no}: {text}" for idx, (path, line_no, text) in enumerate(matches, start=1)]
368
+
369
+ if errors:
370
+ shown = errors[:5]
371
+ body_lines.extend(["", "Warnings:"])
372
+ body_lines.extend(shown)
373
+ if len(errors) > len(shown):
374
+ body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
375
+
376
+ return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
377
+
378
+
379
+ def _read_file(abs_path: str, *, offset: int, max_chars: int) -> str:
380
+ if not os.path.exists(abs_path):
381
+ return _err("file_not_found", f"File not found: {_display_path(abs_path)}", path=_display_path(abs_path))
382
+ if os.path.isdir(abs_path):
383
+ return _err("is_directory", f"Path is a directory, not a file: {_display_path(abs_path)}", path=_display_path(abs_path), hint="Provide a file path.")
384
+ try:
385
+ with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
386
+ data = f.read()
387
+ except Exception as exc:
388
+ return _err("read_failed", "Failed to read file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
389
+ total = len(data)
390
+ start = max(0, min(offset, total))
391
+ if max_chars > 0:
392
+ end = min(total, start + max_chars)
393
+ else:
394
+ end = total
395
+ chunk = data[start:end]
396
+ next_cursor = end if end < total else None
397
+ meta = {
398
+ "offset": start,
399
+ "returned": len(chunk),
400
+ "total": total,
401
+ "next_cursor": next_cursor,
402
+ "path": _display_path(abs_path),
403
+ }
404
+ header = (
405
+ f"Reading {_display_path(abs_path)}\n"
406
+ f"Offset {start}, returned {len(chunk)} of {total}."
407
+ + (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
408
+ )
409
+ sep = "\n\n---\n\n"
410
+ return header + sep + chunk
411
+
412
+
413
+ def _ensure_parent(abs_path: str, create_dirs: bool) -> None:
414
+ parent = os.path.dirname(abs_path)
415
+ if parent and not os.path.exists(parent):
416
+ if create_dirs:
417
+ os.makedirs(parent, exist_ok=True)
418
+ else:
419
+ raise FileNotFoundError(f"Parent directory does not exist: {_display_path(parent)}")
420
+
421
+
422
+ def _write_file(abs_path: str, content: str, *, append: bool, create_dirs: bool) -> str:
423
+ try:
424
+ _ensure_parent(abs_path, create_dirs)
425
+ mode = 'a' if append else 'w'
426
+ with open(abs_path, mode, encoding='utf-8') as f:
427
+ f.write(content or "")
428
+ return f"{'Appended to' if append else 'Wrote'} file: {_display_path(abs_path)} (chars={len(content or '')})"
429
+ except Exception as exc:
430
+ return _err("write_failed", "Failed to write file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
431
+
432
+
433
+ def _mkdir(abs_path: str, exist_ok: bool) -> str:
434
+ try:
435
+ os.makedirs(abs_path, exist_ok=exist_ok)
436
+ return f"Created directory: {_display_path(abs_path)}"
437
+ except Exception as exc:
438
+ return _err("mkdir_failed", "Failed to create directory.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
439
+
440
+
441
+ def _move_copy(action: str, src: str, dst: str, *, overwrite: bool) -> str:
442
+ try:
443
+ if not os.path.exists(src):
444
+ return _err("source_not_found", f"Source not found: {_display_path(src)}", path=_display_path(src))
445
+ if os.path.isdir(dst):
446
+ # allow moving into an existing directory
447
+ dst_path = os.path.join(dst, os.path.basename(src))
448
+ else:
449
+ dst_path = dst
450
+ if os.path.exists(dst_path):
451
+ if overwrite:
452
+ if os.path.isdir(dst_path):
453
+ shutil.rmtree(dst_path)
454
+ else:
455
+ os.remove(dst_path)
456
+ else:
457
+ return _err(
458
+ "destination_exists",
459
+ f"Destination already exists: {_display_path(dst_path)}",
460
+ path=_display_path(dst_path),
461
+ hint="Set overwrite=True to replace the destination."
462
+ )
463
+ if action == 'move':
464
+ shutil.move(src, dst_path)
465
+ else:
466
+ if os.path.isdir(src):
467
+ shutil.copytree(src, dst_path)
468
+ else:
469
+ shutil.copy2(src, dst_path)
470
+ return f"{action.capitalize()}d: {_display_path(src)} -> {_display_path(dst_path)}"
471
+ except Exception as exc:
472
+ return _err(f"{action}_failed", f"Failed to {action}.", path=_display_path(src), data={"error": _safe_err(exc), "destination": _display_path(dst)})
473
+
474
+
475
+ def _delete(abs_path: str, *, recursive: bool) -> str:
476
+ try:
477
+ if not os.path.exists(abs_path):
478
+ return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
479
+ if os.path.isdir(abs_path):
480
+ if not recursive:
481
+ # Refuse to delete a dir unless recursive=True
482
+ return _err("requires_recursive", "Refusing to delete a directory without recursive=True", path=_display_path(abs_path), hint="Pass recursive=True to delete a directory.")
483
+ shutil.rmtree(abs_path)
484
+ else:
485
+ os.remove(abs_path)
486
+ return f"Deleted: {_display_path(abs_path)}"
487
+ except Exception as exc:
488
+ return _err("delete_failed", "Failed to delete path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
489
+
490
+
491
+ def _info(abs_path: str) -> str:
492
+ try:
493
+ st = os.stat(abs_path)
494
+ except Exception as exc:
495
+ return _err("stat_failed", "Failed to stat path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
496
+ info = {
497
+ "path": _display_path(abs_path),
498
+ "type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
499
+ "size": st.st_size,
500
+ "modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=' ', timespec='seconds'),
501
+ "created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=' ', timespec='seconds'),
502
+ "mode": oct(st.st_mode),
503
+ "root": "/",
504
+ }
505
+ return json.dumps(info, indent=2)
506
+
507
+
508
+ @autodoc(summary=TOOL_SUMMARY)
509
+ def File_System(
510
+ action: Annotated[str, "Operation to perform: 'list', 'read', 'write', 'append', 'mkdir', 'move', 'copy', 'delete', 'info', 'search'."],
511
+ path: Annotated[str, "Target path, relative to root unless UNSAFE_ALLOW_ABS_PATHS=1."] = "/",
512
+ content: Annotated[Optional[str], "Content for write/append actions or search query (UTF-8)."] = None,
513
+ dest_path: Annotated[Optional[str], "Destination for move/copy (relative to root unless unsafe absolute allowed)."] = None,
514
+ recursive: Annotated[bool, "For list/search (recurse into subfolders) and delete (required for directories)."] = False,
515
+ show_hidden: Annotated[bool, "Include hidden files (dotfiles) for list/search."] = False,
516
+ max_entries: Annotated[int, "Max entries to list or matches to return (for list/search)."] = 20,
517
+ offset: Annotated[int, "Start offset for reading files (for read)."] = 0,
518
+ max_chars: Annotated[int, "Max characters to return when reading (0 = full file)."] = 4000,
519
+ create_dirs: Annotated[bool, "Create parent directories for write/append if missing."] = True,
520
+ overwrite: Annotated[bool, "Allow overwrite for move/copy destinations."] = False,
521
+ case_sensitive: Annotated[bool, "Match case when searching text."] = False,
522
+ ) -> str:
523
+ _log_call_start(
524
+ "File_System",
525
+ action=action,
526
+ path=path,
527
+ dest_path=dest_path,
528
+ recursive=recursive,
529
+ show_hidden=show_hidden,
530
+ max_entries=max_entries,
531
+ offset=offset,
532
+ max_chars=max_chars,
533
+ create_dirs=create_dirs,
534
+ overwrite=overwrite,
535
+ case_sensitive=case_sensitive,
536
+ )
537
+ action = (action or "").strip().lower()
538
+ if action not in {"list", "read", "write", "append", "mkdir", "move", "copy", "delete", "info", "search", "help"}:
539
+ result = _err(
540
+ "invalid_action",
541
+ "Invalid action.",
542
+ hint="Choose from: list, read, write, append, mkdir, move, copy, delete, info, search, help."
543
+ )
544
+ _log_call_end("File_System", _truncate_for_log(result))
545
+ return result
546
+
547
+ abs_path, err = _resolve_path(path)
548
+ if err:
549
+ _log_call_end("File_System", _truncate_for_log(err))
550
+ return err
551
+
552
+ try:
553
+ if action == "help":
554
+ result = HELP_TEXT
555
+ elif action == "list":
556
+ if not os.path.exists(abs_path):
557
+ result = _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
558
+ else:
559
+ result = _list_dir(abs_path, show_hidden=show_hidden, recursive=recursive, max_entries=max_entries)
560
+ elif action == "read":
561
+ result = _read_file(abs_path, offset=offset, max_chars=max_chars)
562
+ elif action in {"write", "append"}:
563
+ # Prevent attempts to write to root or any directory
564
+ if _display_path(abs_path) == "/" or os.path.isdir(abs_path):
565
+ result = _err(
566
+ "invalid_write_path",
567
+ "Invalid path for write/append.",
568
+ path=_display_path(abs_path),
569
+ hint="Provide a file path under / (e.g., /notes/todo.txt)."
570
+ )
571
+ else:
572
+ result = _write_file(abs_path, content or "", append=(action == "append"), create_dirs=create_dirs)
573
+ elif action == "mkdir":
574
+ result = _mkdir(abs_path, exist_ok=True)
575
+ elif action in {"move", "copy"}:
576
+ if not dest_path:
577
+ result = _err("missing_dest_path", "dest_path is required for move/copy (ignored for other actions).")
578
+ else:
579
+ abs_dst, err2 = _resolve_path(dest_path)
580
+ if err2:
581
+ result = err2
582
+ else:
583
+ result = _move_copy(action, abs_path, abs_dst, overwrite=overwrite)
584
+ elif action == "delete":
585
+ result = _delete(abs_path, recursive=recursive)
586
+ elif action == "search":
587
+ query_text = content or ""
588
+ if query_text.strip() == "":
589
+ result = _err(
590
+ "missing_search_query",
591
+ "Search query is required for the search action.",
592
+ hint="Provide text in the Content field to search for.",
593
+ )
594
+ else:
595
+ result = _search_text(
596
+ abs_path,
597
+ query_text,
598
+ recursive=recursive,
599
+ show_hidden=show_hidden,
600
+ max_results=max_entries,
601
+ case_sensitive=case_sensitive,
602
+ start_index=offset,
603
+ )
604
+ else: # info
605
+ result = _info(abs_path)
606
+ except Exception as exc:
607
+ result = _err("exception", "Unhandled error during operation.", data={"error": _safe_err(exc)})
608
+
609
+ _log_call_end("File_System", _truncate_for_log(result))
610
+ return result
611
+
612
+
613
+ def build_interface() -> gr.Interface:
614
+ return gr.Interface(
615
+ fn=File_System,
616
+ inputs=[
617
+ gr.Radio(
618
+ label="Action",
619
+ choices=["list", "read", "write", "append", "mkdir", "move", "copy", "delete", "info", "search", "help"],
620
+ value="help",
621
+ info="Operation to perform",
622
+ ),
623
+ gr.Textbox(label="Path", placeholder="/ or /src/file.txt", max_lines=1, value="/", info="Target path (relative to root)"),
624
+ gr.Textbox(label="Content", lines=6, placeholder="Text to write or search for...", info="Content for write/append actions or search query"),
625
+ gr.Textbox(label="Destination", max_lines=1, info="Destination path (Move/Copy only)"),
626
+ gr.Checkbox(label="Recursive", value=False, info="Recurse into subfolders (List/Delete/Search)"),
627
+ gr.Checkbox(label="Show hidden", value=False, info="Include hidden files (List/Search)"),
628
+ gr.Slider(minimum=10, maximum=5000, step=10, value=20, label="Max entries / matches", info="Max entries to list or matches to return (List/Search)"),
629
+ gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Read/Search)"),
630
+ gr.Slider(minimum=0, maximum=100_000, step=500, value=4000, label="Max chars", info="Max characters to return (Read, 0=all)"),
631
+ gr.Checkbox(label="Create parent dirs", value=True, info="Create parent directories if missing (Write)"),
632
+ gr.Checkbox(label="Overwrite destination", value=False, info="Allow overwrite (Move/Copy)"),
633
+ gr.Checkbox(label="Case sensitive search", value=False, info="Match case (Search)"),
634
+ ],
635
+ outputs=gr.Textbox(label="Result", lines=20),
636
+ title="File System",
637
+ description=(
638
+ "<div id=\"fs-desc\" style=\"text-align:center; overflow:hidden;\">Browse, search, and interact with a filesystem. "
639
+ "Choose an action and fill optional fields as needed."
640
+ "</div>"
641
+ ),
642
+ api_description=TOOL_SUMMARY,
643
+ flagging_mode="never",
644
+ submit_btn="Run",
645
+ )
646
+
647
+
648
+ __all__ = ["File_System", "build_interface"]
Modules/Generate_Image.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import uuid
5
+ import random
6
+ from typing import Annotated
7
+
8
+ import gradio as gr
9
+ from PIL import Image
10
+ from huggingface_hub import InferenceClient
11
+ from .File_System import ROOT_DIR
12
+
13
+ from app import _log_call_end, _log_call_start, _truncate_for_log
14
+ from ._docstrings import autodoc
15
+
16
+ HF_API_TOKEN = os.getenv("HF_READ_TOKEN")
17
+
18
+ # Single source of truth for the LLM-facing tool description
19
+ TOOL_SUMMARY = (
20
+ "Generate an image from a text prompt via Hugging Face serverless inference; "
21
+ "tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
22
+ "Return the generated media to the user in this format `![Alt text](URL)`."
23
+ )
24
+
25
+
26
+ @autodoc(
27
+ summary=TOOL_SUMMARY,
28
+ )
29
+ def Generate_Image(
30
+ prompt: Annotated[str, "Text description of the image to generate."],
31
+ model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., black-forest-labs/FLUX.1-Krea-dev)."] = "black-forest-labs/FLUX.1-Krea-dev",
32
+ negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
33
+ "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
34
+ "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
35
+ "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
36
+ ),
37
+ steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
38
+ cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
39
+ seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
40
+ width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
41
+ height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
42
+ sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
43
+ ) -> str:
44
+ _log_call_start(
45
+ "Generate_Image",
46
+ prompt=_truncate_for_log(prompt, 200),
47
+ model_id=model_id,
48
+ steps=steps,
49
+ cfg_scale=cfg_scale,
50
+ seed=seed,
51
+ size=f"{width}x{height}",
52
+ )
53
+ if not prompt or not prompt.strip():
54
+ _log_call_end("Generate_Image", "error=empty prompt")
55
+ raise gr.Error("Please provide a non-empty prompt.")
56
+ enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
57
+ providers = ["auto", "replicate", "fal-ai"]
58
+ last_error: Exception | None = None
59
+ for provider in providers:
60
+ try:
61
+ client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
62
+ image = client.text_to_image(
63
+ prompt=enhanced_prompt,
64
+ negative_prompt=negative_prompt,
65
+ model=model_id,
66
+ width=width,
67
+ height=height,
68
+ num_inference_steps=steps,
69
+ guidance_scale=cfg_scale,
70
+ seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
71
+ )
72
+
73
+ filename = f"image_{uuid.uuid4().hex[:8]}.png"
74
+ output_path = os.path.join(ROOT_DIR, filename)
75
+ image.save(output_path)
76
+
77
+ _log_call_end("Generate_Image", f"provider={provider} size={image.size} saved_to={filename}")
78
+ return output_path
79
+ except Exception as exc: # pylint: disable=broad-except
80
+ last_error = exc
81
+ continue
82
+ msg = str(last_error) if last_error else "Unknown error"
83
+ lowered = msg.lower()
84
+ if "404" in msg:
85
+ raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
86
+ if "503" in msg:
87
+ raise gr.Error("The model is warming up. Please try again shortly.")
88
+ if "401" in msg or "403" in msg:
89
+ raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
90
+ if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
91
+ raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
92
+ _log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
93
+ raise gr.Error(f"Image generation failed: {msg}")
94
+
95
+
96
+ def build_interface() -> gr.Interface:
97
+ return gr.Interface(
98
+ fn=Generate_Image,
99
+ inputs=[
100
+ gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2, info="Text description of the image to generate"),
101
+ gr.Textbox(
102
+ label="Model",
103
+ value="black-forest-labs/FLUX.1-Krea-dev",
104
+ placeholder="creator/model-name",
105
+ max_lines=1,
106
+ info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
107
+ ),
108
+ gr.Textbox(
109
+ label="Negative Prompt",
110
+ value=(
111
+ "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
112
+ "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
113
+ "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
114
+ ),
115
+ lines=2,
116
+ info="What should NOT appear in the image",
117
+ ),
118
+ gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps", info="Number of denoising steps (1–100)"),
119
+ gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale", info="Classifier-free guidance scale (1–20)"),
120
+ gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
121
+ gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width", info="Output width in pixels"),
122
+ gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height", info="Output height in pixels"),
123
+ gr.Radio(
124
+ label="Sampler",
125
+ value="DPM++ 2M Karras",
126
+ choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
127
+ info="Sampling method",
128
+ ),
129
+ ],
130
+ outputs=gr.Image(label="Generated Image"),
131
+ title="Generate Image",
132
+ description=(
133
+ "<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
134
+ "Default model is FLUX.1-Krea-dev.</div>"
135
+ ),
136
+ api_description=TOOL_SUMMARY,
137
+ flagging_mode="never",
138
+ )
139
+
140
+
141
+ __all__ = ["Generate_Image", "build_interface"]
Modules/Generate_Speech.py ADDED
@@ -0,0 +1,677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ from contextlib import contextmanager
7
+ from typing import Optional, Annotated
8
+ from unicodedata import normalize
9
+ import re
10
+ import uuid
11
+ import io
12
+ import wave
13
+
14
+ import numpy as np
15
+ import onnxruntime as ort
16
+ import scipy.io.wavfile
17
+ import gradio as gr
18
+
19
+ from .File_System import ROOT_DIR
20
+ from app import _log_call_end, _log_call_start, _truncate_for_log
21
+ from ._docstrings import autodoc
22
+
23
+ try:
24
+ import torch # type: ignore
25
+ except Exception: # pragma: no cover
26
+ torch = None # type: ignore
27
+
28
+ try:
29
+ from kokoro import KModel, KPipeline # type: ignore
30
+ except Exception: # pragma: no cover
31
+ KModel = None # type: ignore
32
+ KPipeline = None # type: ignore
33
+
34
+ try:
35
+ from huggingface_hub import snapshot_download, list_repo_files
36
+ except ImportError:
37
+ snapshot_download = None
38
+ list_repo_files = None
39
+
40
+
41
+ # --- Supertonic Helper Classes & Functions ---
42
+
43
+ class UnicodeProcessor:
44
+ def __init__(self, unicode_indexer_path: str):
45
+ with open(unicode_indexer_path, "r") as f:
46
+ self.indexer = json.load(f)
47
+
48
+ def _preprocess_text(self, text: str) -> str:
49
+ # TODO: add more preprocessing
50
+ text = normalize("NFKD", text)
51
+ return text
52
+
53
+ def _get_text_mask(self, text_ids_lengths: np.ndarray) -> np.ndarray:
54
+ text_mask = length_to_mask(text_ids_lengths)
55
+ return text_mask
56
+
57
+ def _text_to_unicode_values(self, text: str) -> np.ndarray:
58
+ unicode_values = np.array(
59
+ [ord(char) for char in text], dtype=np.uint16
60
+ ) # 2 bytes
61
+ return unicode_values
62
+
63
+ def __call__(self, text_list: list[str]) -> tuple[np.ndarray, np.ndarray]:
64
+ text_list = [self._preprocess_text(t) for t in text_list]
65
+ text_ids_lengths = np.array([len(text) for text in text_list], dtype=np.int64)
66
+ text_ids = np.zeros((len(text_list), text_ids_lengths.max()), dtype=np.int64)
67
+ for i, text in enumerate(text_list):
68
+ unicode_vals = self._text_to_unicode_values(text)
69
+ text_ids[i, : len(unicode_vals)] = np.array(
70
+ [self.indexer[val] for val in unicode_vals], dtype=np.int64
71
+ )
72
+ text_mask = self._get_text_mask(text_ids_lengths)
73
+ return text_ids, text_mask
74
+
75
+
76
+ class Style:
77
+ def __init__(self, style_ttl_onnx: np.ndarray, style_dp_onnx: np.ndarray):
78
+ self.ttl = style_ttl_onnx
79
+ self.dp = style_dp_onnx
80
+
81
+
82
+ class TextToSpeech:
83
+ def __init__(
84
+ self,
85
+ cfgs: dict,
86
+ text_processor: UnicodeProcessor,
87
+ dp_ort: ort.InferenceSession,
88
+ text_enc_ort: ort.InferenceSession,
89
+ vector_est_ort: ort.InferenceSession,
90
+ vocoder_ort: ort.InferenceSession,
91
+ ):
92
+ self.cfgs = cfgs
93
+ self.text_processor = text_processor
94
+ self.dp_ort = dp_ort
95
+ self.text_enc_ort = text_enc_ort
96
+ self.vector_est_ort = vector_est_ort
97
+ self.vocoder_ort = vocoder_ort
98
+ self.sample_rate = cfgs["ae"]["sample_rate"]
99
+ self.base_chunk_size = cfgs["ae"]["base_chunk_size"]
100
+ self.chunk_compress_factor = cfgs["ttl"]["chunk_compress_factor"]
101
+ self.ldim = cfgs["ttl"]["latent_dim"]
102
+
103
+ def sample_noisy_latent(
104
+ self, duration: np.ndarray
105
+ ) -> tuple[np.ndarray, np.ndarray]:
106
+ bsz = len(duration)
107
+ wav_len_max = duration.max() * self.sample_rate
108
+ wav_lengths = (duration * self.sample_rate).astype(np.int64)
109
+ chunk_size = self.base_chunk_size * self.chunk_compress_factor
110
+ latent_len = ((wav_len_max + chunk_size - 1) / chunk_size).astype(np.int32)
111
+ latent_dim = self.ldim * self.chunk_compress_factor
112
+ noisy_latent = np.random.randn(bsz, latent_dim, latent_len).astype(np.float32)
113
+ latent_mask = get_latent_mask(
114
+ wav_lengths, self.base_chunk_size, self.chunk_compress_factor
115
+ )
116
+
117
+ noisy_latent = noisy_latent * latent_mask
118
+ return noisy_latent, latent_mask
119
+
120
+ def _infer(
121
+ self, text_list: list[str], style: Style, total_step: int, speed: float = 1.05
122
+ ) -> tuple[np.ndarray, np.ndarray]:
123
+ assert (
124
+ len(text_list) == style.ttl.shape[0]
125
+ ), "Number of texts must match number of style vectors"
126
+ bsz = len(text_list)
127
+ text_ids, text_mask = self.text_processor(text_list)
128
+ dur_onnx, *_ = self.dp_ort.run(
129
+ None, {"text_ids": text_ids, "style_dp": style.dp, "text_mask": text_mask}
130
+ )
131
+ dur_onnx = dur_onnx / speed
132
+ text_emb_onnx, *_ = self.text_enc_ort.run(
133
+ None,
134
+ {"text_ids": text_ids, "style_ttl": style.ttl, "text_mask": text_mask},
135
+ ) # dur_onnx: [bsz]
136
+ xt, latent_mask = self.sample_noisy_latent(dur_onnx)
137
+ total_step_np = np.array([total_step] * bsz, dtype=np.float32)
138
+ for step in range(total_step):
139
+ current_step = np.array([step] * bsz, dtype=np.float32)
140
+ xt, *_ = self.vector_est_ort.run(
141
+ None,
142
+ {
143
+ "noisy_latent": xt,
144
+ "text_emb": text_emb_onnx,
145
+ "style_ttl": style.ttl,
146
+ "text_mask": text_mask,
147
+ "latent_mask": latent_mask,
148
+ "current_step": current_step,
149
+ "total_step": total_step_np,
150
+ },
151
+ )
152
+ wav, *_ = self.vocoder_ort.run(None, {"latent": xt})
153
+ return wav, dur_onnx
154
+
155
+ def __call__(
156
+ self,
157
+ text: str,
158
+ style: Style,
159
+ total_step: int,
160
+ speed: float = 1.05,
161
+ silence_duration: float = 0.3,
162
+ max_len: int = 300,
163
+ ) -> tuple[np.ndarray, np.ndarray]:
164
+ assert (
165
+ style.ttl.shape[0] == 1
166
+ ), "Single speaker text to speech only supports single style"
167
+ text_list = chunk_text(text, max_len=max_len)
168
+ wav_cat = None
169
+ dur_cat = None
170
+ for text in text_list:
171
+ wav, dur_onnx = self._infer([text], style, total_step, speed)
172
+ if wav_cat is None:
173
+ wav_cat = wav
174
+ dur_cat = dur_onnx
175
+ else:
176
+ silence = np.zeros(
177
+ (1, int(silence_duration * self.sample_rate)), dtype=np.float32
178
+ )
179
+ wav_cat = np.concatenate([wav_cat, silence, wav], axis=1)
180
+ dur_cat += dur_onnx + silence_duration
181
+ return wav_cat, dur_cat
182
+
183
+ def stream(
184
+ self,
185
+ text: str,
186
+ style: Style,
187
+ total_step: int,
188
+ speed: float = 1.05,
189
+ silence_duration: float = 0.3,
190
+ max_len: int = 300,
191
+ ):
192
+ assert (
193
+ style.ttl.shape[0] == 1
194
+ ), "Single speaker text to speech only supports single style"
195
+ text_list = chunk_text(text, max_len=max_len)
196
+
197
+ for i, text in enumerate(text_list):
198
+ wav, _ = self._infer([text], style, total_step, speed)
199
+ yield wav.flatten()
200
+
201
+ if i < len(text_list) - 1:
202
+ silence = np.zeros(
203
+ (int(silence_duration * self.sample_rate),), dtype=np.float32
204
+ )
205
+ yield silence
206
+
207
+ def batch(
208
+ self, text_list: list[str], style: Style, total_step: int, speed: float = 1.05
209
+ ) -> tuple[np.ndarray, np.ndarray]:
210
+ return self._infer(text_list, style, total_step, speed)
211
+
212
+
213
+ def length_to_mask(lengths: np.ndarray, max_len: Optional[int] = None) -> np.ndarray:
214
+ """
215
+ Convert lengths to binary mask.
216
+
217
+ Args:
218
+ lengths: (B,)
219
+ max_len: int
220
+
221
+ Returns:
222
+ mask: (B, 1, max_len)
223
+ """
224
+ max_len = max_len or lengths.max()
225
+ ids = np.arange(0, max_len)
226
+ mask = (ids < np.expand_dims(lengths, axis=1)).astype(np.float32)
227
+ return mask.reshape(-1, 1, max_len)
228
+
229
+
230
+ def get_latent_mask(
231
+ wav_lengths: np.ndarray, base_chunk_size: int, chunk_compress_factor: int
232
+ ) -> np.ndarray:
233
+ latent_size = base_chunk_size * chunk_compress_factor
234
+ latent_lengths = (wav_lengths + latent_size - 1) // latent_size
235
+ latent_mask = length_to_mask(latent_lengths)
236
+ return latent_mask
237
+
238
+
239
+ def load_onnx(
240
+ onnx_path: str, opts: ort.SessionOptions, providers: list[str]
241
+ ) -> ort.InferenceSession:
242
+ return ort.InferenceSession(onnx_path, sess_options=opts, providers=providers)
243
+
244
+
245
+ def load_onnx_all(
246
+ onnx_dir: str, opts: ort.SessionOptions, providers: list[str]
247
+ ) -> tuple[
248
+ ort.InferenceSession,
249
+ ort.InferenceSession,
250
+ ort.InferenceSession,
251
+ ort.InferenceSession,
252
+ ]:
253
+ dp_onnx_path = os.path.join(onnx_dir, "duration_predictor.onnx")
254
+ text_enc_onnx_path = os.path.join(onnx_dir, "text_encoder.onnx")
255
+ vector_est_onnx_path = os.path.join(onnx_dir, "vector_estimator.onnx")
256
+ vocoder_onnx_path = os.path.join(onnx_dir, "vocoder.onnx")
257
+
258
+ dp_ort = load_onnx(dp_onnx_path, opts, providers)
259
+ text_enc_ort = load_onnx(text_enc_onnx_path, opts, providers)
260
+ vector_est_ort = load_onnx(vector_est_onnx_path, opts, providers)
261
+ vocoder_ort = load_onnx(vocoder_onnx_path, opts, providers)
262
+ return dp_ort, text_enc_ort, vector_est_ort, vocoder_ort
263
+
264
+
265
+ def load_cfgs(onnx_dir: str) -> dict:
266
+ cfg_path = os.path.join(onnx_dir, "tts.json")
267
+ with open(cfg_path, "r") as f:
268
+ cfgs = json.load(f)
269
+ return cfgs
270
+
271
+
272
+ def load_text_processor(onnx_dir: str) -> UnicodeProcessor:
273
+ unicode_indexer_path = os.path.join(onnx_dir, "unicode_indexer.json")
274
+ text_processor = UnicodeProcessor(unicode_indexer_path)
275
+ return text_processor
276
+
277
+
278
+ def load_text_to_speech(onnx_dir: str, use_gpu: bool = False) -> TextToSpeech:
279
+ opts = ort.SessionOptions()
280
+ if use_gpu:
281
+ raise NotImplementedError("GPU mode is not fully tested")
282
+ else:
283
+ providers = ["CPUExecutionProvider"]
284
+ print("Using CPU for inference")
285
+ cfgs = load_cfgs(onnx_dir)
286
+ dp_ort, text_enc_ort, vector_est_ort, vocoder_ort = load_onnx_all(
287
+ onnx_dir, opts, providers
288
+ )
289
+ text_processor = load_text_processor(onnx_dir)
290
+ return TextToSpeech(
291
+ cfgs, text_processor, dp_ort, text_enc_ort, vector_est_ort, vocoder_ort
292
+ )
293
+
294
+
295
+ def load_voice_style(voice_style_paths: list[str], verbose: bool = False) -> Style:
296
+ bsz = len(voice_style_paths)
297
+
298
+ # Read first file to get dimensions
299
+ with open(voice_style_paths[0], "r") as f:
300
+ first_style = json.load(f)
301
+ ttl_dims = first_style["style_ttl"]["dims"]
302
+ dp_dims = first_style["style_dp"]["dims"]
303
+
304
+ # Pre-allocate arrays with full batch size
305
+ ttl_style = np.zeros([bsz, ttl_dims[1], ttl_dims[2]], dtype=np.float32)
306
+ dp_style = np.zeros([bsz, dp_dims[1], dp_dims[2]], dtype=np.float32)
307
+
308
+ # Fill in the data
309
+ for i, voice_style_path in enumerate(voice_style_paths):
310
+ with open(voice_style_path, "r") as f:
311
+ voice_style = json.load(f)
312
+
313
+ ttl_data = np.array(
314
+ voice_style["style_ttl"]["data"], dtype=np.float32
315
+ ).flatten()
316
+ ttl_style[i] = ttl_data.reshape(ttl_dims[1], ttl_dims[2])
317
+
318
+ dp_data = np.array(
319
+ voice_style["style_dp"]["data"], dtype=np.float32
320
+ ).flatten()
321
+ dp_style[i] = dp_data.reshape(dp_dims[1], dp_dims[2])
322
+
323
+ if verbose:
324
+ print(f"Loaded {bsz} voice styles")
325
+ return Style(ttl_style, dp_style)
326
+
327
+
328
+ @contextmanager
329
+ def timer(name: str):
330
+ start = time.time()
331
+ print(f"{name}...")
332
+ yield
333
+ print(f" -> {name} completed in {time.time() - start:.2f} sec")
334
+
335
+
336
+ def sanitize_filename(text: str, max_len: int) -> str:
337
+ """Sanitize filename by replacing non-alphanumeric characters with underscores"""
338
+ prefix = text[:max_len]
339
+ return re.sub(r"[^a-zA-Z0-9]", "_", prefix)
340
+
341
+
342
+ def chunk_text(text: str, max_len: int = 300) -> list[str]:
343
+ """
344
+ Split text into chunks by paragraphs and sentences.
345
+
346
+ Args:
347
+ text: Input text to chunk
348
+ max_len: Maximum length of each chunk (default: 300)
349
+
350
+ Returns:
351
+ List of text chunks
352
+ """
353
+ # Split by paragraph (two or more newlines)
354
+ paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", text.strip()) if p.strip()]
355
+
356
+ chunks = []
357
+
358
+ for paragraph in paragraphs:
359
+ paragraph = paragraph.strip()
360
+ if not paragraph:
361
+ continue
362
+
363
+ # Split by sentence boundaries (period, question mark, exclamation mark followed by space)
364
+ # But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F.
365
+ pattern = r"(?<!Mr\.)(?<!Mrs\.)(?<!Ms\.)(?<!Dr\.)(?<!Prof\.)(?<!Sr\.)(?<!Jr\.)(?<!Ph\.D\.)(?<!etc\.)(?<!e\.g\.)(?<!i\.e\.)(?<!vs\.)(?<!Inc\.)(?<!Ltd\.)(?<!Co\.)(?<!Corp\.)(?<!St\.)(?<!Ave\.)(?<!Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+"
366
+ sentences = re.split(pattern, paragraph)
367
+
368
+ current_chunk = ""
369
+
370
+ for sentence in sentences:
371
+ if len(current_chunk) + len(sentence) + 1 <= max_len:
372
+ current_chunk += (" " if current_chunk else "") + sentence
373
+ else:
374
+ if current_chunk:
375
+ chunks.append(current_chunk.strip())
376
+ current_chunk = sentence
377
+
378
+ if current_chunk:
379
+ chunks.append(current_chunk.strip())
380
+
381
+ return chunks
382
+
383
+
384
+ # --- Main Tool Logic ---
385
+
386
+ # --- Kokoro State ---
387
+ _KOKORO_STATE = {
388
+ "initialized": False,
389
+ "device": "cpu",
390
+ "model": None,
391
+ "pipelines": {},
392
+ }
393
+
394
+ # --- Supertonic State ---
395
+ _SUPERTONIC_STATE = {
396
+ "initialized": False,
397
+ "tts": None,
398
+ "assets_dir": None,
399
+ }
400
+
401
+ def _audio_np_to_int16(audio_np: np.ndarray) -> np.ndarray:
402
+ audio_clipped = np.clip(audio_np, -1.0, 1.0)
403
+ return (audio_clipped * 32767.0).astype(np.int16)
404
+
405
+ # --- Kokoro Functions ---
406
+
407
+ def get_kokoro_voices() -> list[str]:
408
+ try:
409
+ if list_repo_files:
410
+ files = list_repo_files("hexgrad/Kokoro-82M")
411
+ voice_files = [file for file in files if file.endswith(".pt") and file.startswith("voices/")]
412
+ voices = [file.replace("voices/", "").replace(".pt", "") for file in voice_files]
413
+ return sorted(voices) if voices else _get_fallback_voices()
414
+ return _get_fallback_voices()
415
+ except Exception:
416
+ return _get_fallback_voices()
417
+
418
+
419
+ def _get_fallback_voices() -> list[str]:
420
+ return [
421
+ "af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
422
+ "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa",
423
+ "bf_alice", "bf_emma", "bf_isabella", "bf_lily",
424
+ "bm_daniel", "bm_fable", "bm_george", "bm_lewis",
425
+ "ef_dora", "em_alex", "em_santa",
426
+ "ff_siwis",
427
+ "hf_alpha", "hf_beta", "hm_omega", "hm_psi",
428
+ "if_sara", "im_nicola",
429
+ "jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo",
430
+ "pf_dora", "pm_alex", "pm_santa",
431
+ "zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi",
432
+ "zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
433
+ ]
434
+
435
+
436
+ def _init_kokoro() -> None:
437
+ if _KOKORO_STATE["initialized"]:
438
+ return
439
+ if KModel is None or KPipeline is None:
440
+ raise RuntimeError("Kokoro is not installed. Please install the 'kokoro' package (>=0.9.4).")
441
+ device = "cpu"
442
+ if torch is not None:
443
+ try:
444
+ if torch.cuda.is_available():
445
+ device = "cuda"
446
+ except Exception:
447
+ device = "cpu"
448
+ model = KModel(repo_id="hexgrad/Kokoro-82M").to(device).eval()
449
+ pipelines = {"a": KPipeline(lang_code="a", model=False, repo_id="hexgrad/Kokoro-82M")}
450
+ try:
451
+ pipelines["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
452
+ except Exception:
453
+ pass
454
+ _KOKORO_STATE.update({"initialized": True, "device": device, "model": model, "pipelines": pipelines})
455
+
456
+ # --- Supertonic Functions ---
457
+
458
+ def _init_supertonic() -> None:
459
+ if _SUPERTONIC_STATE["initialized"]:
460
+ return
461
+
462
+ if snapshot_download is None:
463
+ raise RuntimeError("huggingface_hub is not installed.")
464
+
465
+ # Use a local assets directory within Nymbo-Tools
466
+ # Assuming this file is in Nymbo-Tools/Modules
467
+ base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
468
+ assets_dir = os.path.join(base_dir, "assets", "supertonic")
469
+
470
+ if not os.path.exists(assets_dir):
471
+ print(f"Downloading Supertonic models to {assets_dir}...")
472
+ snapshot_download(repo_id="Supertone/supertonic", local_dir=assets_dir)
473
+
474
+ onnx_dir = os.path.join(assets_dir, "onnx")
475
+ tts = load_text_to_speech(onnx_dir, use_gpu=False)
476
+
477
+ _SUPERTONIC_STATE.update({"initialized": True, "tts": tts, "assets_dir": assets_dir})
478
+
479
+
480
+ def get_supertonic_voices() -> list[str]:
481
+ # We need assets to list voices. If not initialized, try to find them or init.
482
+ if not _SUPERTONIC_STATE["initialized"]:
483
+ # Check if assets exist without full init
484
+ base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
485
+ assets_dir = os.path.join(base_dir, "assets", "supertonic")
486
+ if not os.path.exists(assets_dir):
487
+ # If we can't list, return a default list or empty
488
+ return ["F1", "F2", "M1", "M2"] # Known defaults
489
+ else:
490
+ assets_dir = _SUPERTONIC_STATE["assets_dir"]
491
+
492
+ voice_styles_dir = os.path.join(assets_dir, "voice_styles")
493
+ if not os.path.exists(voice_styles_dir):
494
+ return ["F1", "F2", "M1", "M2"]
495
+
496
+ files = os.listdir(voice_styles_dir)
497
+ voices = [f.replace('.json', '') for f in files if f.endswith('.json')]
498
+ return sorted(voices)
499
+
500
+
501
+ def List_Kokoro_Voices() -> list[str]:
502
+ return get_kokoro_voices()
503
+
504
+ def List_Supertonic_Voices() -> list[str]:
505
+ return get_supertonic_voices()
506
+
507
+
508
+ # Single source of truth for the LLM-facing tool description
509
+ TOOL_SUMMARY = (
510
+ "Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
511
+ "Supertonic: faster, supports steps/silence/chunking. "
512
+ "Kokoro: slower, supports many languages/accents. "
513
+ "Return the generated media to the user in this format `![Alt text](URL)`."
514
+ )
515
+
516
+
517
+ @autodoc(
518
+ summary=TOOL_SUMMARY,
519
+ )
520
+ def Generate_Speech(
521
+ text: Annotated[str, "The text to synthesize (English)."],
522
+ model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
523
+ speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
524
+ steps: Annotated[int, "Supertonic only. Diffusion steps (1-50). Higher = better quality but slower."] = 5,
525
+ voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
526
+ silence_duration: Annotated[float, "Supertonic only. Silence duration between chunks (0.0-2.0s)."] = 0.3,
527
+ max_chunk_size: Annotated[int, "Supertonic only. Max text chunk length (50-1000)."] = 300,
528
+ ) -> str:
529
+ _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
530
+
531
+ if not text or not text.strip():
532
+ try:
533
+ _log_call_end("Generate_Speech", "error=empty text")
534
+ finally:
535
+ pass
536
+ raise gr.Error("Please provide non-empty text to synthesize.")
537
+
538
+ model_lower = model.lower()
539
+
540
+ # Handle default voice switching if user didn't specify appropriate voice for model
541
+ if model_lower == "kokoro" and voice == "F1":
542
+ voice = "af_heart"
543
+ elif model_lower == "supertonic" and voice == "af_heart":
544
+ voice = "F1"
545
+
546
+ try:
547
+ if model_lower == "kokoro":
548
+ return _generate_kokoro(text, speed, voice)
549
+ else:
550
+ # Default to Supertonic
551
+ return _generate_supertonic(text, speed, voice, steps, silence_duration, max_chunk_size)
552
+
553
+ except gr.Error as exc:
554
+ _log_call_end("Generate_Speech", f"gr_error={str(exc)}")
555
+ raise
556
+ except Exception as exc: # pylint: disable=broad-except
557
+ _log_call_end("Generate_Speech", f"error={str(exc)[:120]}")
558
+ raise gr.Error(f"Error during speech generation: {exc}")
559
+
560
+
561
+ def _generate_kokoro(text: str, speed: float, voice: str) -> str:
562
+ _init_kokoro()
563
+ model = _KOKORO_STATE["model"]
564
+ pipelines = _KOKORO_STATE["pipelines"]
565
+ pipeline = pipelines.get("a")
566
+ if pipeline is None:
567
+ raise gr.Error("Kokoro English pipeline not initialized.")
568
+
569
+ audio_segments = []
570
+ pack = pipeline.load_voice(voice)
571
+
572
+ segments = list(pipeline(text, voice, speed))
573
+ total_segments = len(segments)
574
+ for segment_idx, (text_chunk, ps, _) in enumerate(segments):
575
+ ref_s = pack[len(ps) - 1]
576
+ try:
577
+ audio = model(ps, ref_s, float(speed))
578
+ audio_segments.append(audio.detach().cpu().numpy())
579
+ if total_segments > 10 and (segment_idx + 1) % 5 == 0:
580
+ print(f"Progress: Generated {segment_idx + 1}/{total_segments} segments...")
581
+ except Exception as exc:
582
+ raise gr.Error(f"Error generating audio for segment {segment_idx + 1}: {exc}")
583
+
584
+ if not audio_segments:
585
+ raise gr.Error("No audio was generated (empty synthesis result).")
586
+
587
+ if len(audio_segments) == 1:
588
+ final_audio = audio_segments[0]
589
+ else:
590
+ final_audio = np.concatenate(audio_segments, axis=0)
591
+ if total_segments > 1:
592
+ duration = len(final_audio) / 24_000
593
+ print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
594
+
595
+ # Save to file
596
+ filename = f"speech_kokoro_{uuid.uuid4().hex[:8]}.wav"
597
+ output_path = os.path.join(ROOT_DIR, filename)
598
+
599
+ # Normalize to 16-bit PCM
600
+ audio_int16 = (final_audio * 32767).astype(np.int16)
601
+ scipy.io.wavfile.write(output_path, 24000, audio_int16)
602
+
603
+ _log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/24_000:.2f}")
604
+ return output_path
605
+
606
+
607
+ def _generate_supertonic(text: str, speed: float, voice: str, steps: int, silence_duration: float, max_chunk_size: int) -> str:
608
+ _init_supertonic()
609
+ tts = _SUPERTONIC_STATE["tts"]
610
+ assets_dir = _SUPERTONIC_STATE["assets_dir"]
611
+
612
+ voice_path = os.path.join(assets_dir, "voice_styles", f"{voice}.json")
613
+ if not os.path.exists(voice_path):
614
+ # Fallback or error?
615
+ # Try to find if it's just a name mismatch or use default
616
+ if not os.path.exists(voice_path):
617
+ raise gr.Error(f"Voice style {voice} not found for Supertonic.")
618
+
619
+ style = load_voice_style([voice_path])
620
+
621
+ sr = tts.sample_rate
622
+
623
+ # Supertonic returns a generator of chunks, or we can use __call__ for full audio
624
+ # Using __call__ to get full audio for saving
625
+ # But __call__ returns (wav_cat, dur_cat)
626
+
627
+ wav_cat, _ = tts(text, style, steps, speed, silence_duration, max_chunk_size)
628
+
629
+ if wav_cat is None or wav_cat.size == 0:
630
+ raise gr.Error("No audio generated.")
631
+
632
+ # wav_cat is (1, samples) float32
633
+ final_audio = wav_cat.flatten()
634
+
635
+ # Save to file
636
+ filename = f"speech_supertonic_{uuid.uuid4().hex[:8]}.wav"
637
+ output_path = os.path.join(ROOT_DIR, filename)
638
+
639
+ audio_int16 = _audio_np_to_int16(final_audio)
640
+ scipy.io.wavfile.write(output_path, sr, audio_int16)
641
+
642
+ _log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/sr:.2f}")
643
+ return output_path
644
+
645
+
646
+ def build_interface() -> gr.Interface:
647
+ kokoro_voices = get_kokoro_voices()
648
+ supertonic_voices = get_supertonic_voices()
649
+ all_voices = sorted(list(set(kokoro_voices + supertonic_voices)))
650
+
651
+ return gr.Interface(
652
+ fn=Generate_Speech,
653
+ inputs=[
654
+ gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4, info="The text to synthesize (English)"),
655
+ gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic", info="The TTS model to use"),
656
+ gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed", info="Speech speed multiplier (1.0 = normal)"),
657
+ gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only: Diffusion steps (1-50)"),
658
+ gr.Dropdown(
659
+ label="Voice",
660
+ choices=all_voices,
661
+ value="F1",
662
+ info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
663
+ ),
664
+ gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only: Silence duration between chunks"),
665
+ gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only: Max text chunk length"),
666
+ ],
667
+ outputs=gr.Audio(label="Audio", type="filepath", format="wav"),
668
+ title="Generate Speech",
669
+ description=(
670
+ "<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>"
671
+ ),
672
+ api_description=TOOL_SUMMARY,
673
+ flagging_mode="never",
674
+ )
675
+
676
+
677
+ __all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]
Modules/Generate_Video.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import uuid
5
+ import random
6
+ import tempfile
7
+ from typing import Annotated
8
+
9
+ import gradio as gr
10
+ from huggingface_hub import InferenceClient
11
+ from .File_System import ROOT_DIR
12
+
13
+ from app import _log_call_end, _log_call_start, _truncate_for_log
14
+ from ._docstrings import autodoc
15
+
16
+ HF_VIDEO_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
17
+
18
+ # Single source of truth for the LLM-facing tool description
19
+ TOOL_SUMMARY = (
20
+ "Generate a short MP4 video from a text prompt via Hugging Face serverless inference; "
21
+ "control model, steps, guidance, seed, size, fps, and duration; returns a temporary MP4 file path. "
22
+ "Return the generated media to the user in this format `![Alt text](URL)`."
23
+ )
24
+
25
+
26
+ def _write_video_tmp(data_iter_or_bytes: object, suffix: str = ".mp4") -> str:
27
+ filename = f"video_{uuid.uuid4().hex[:8]}{suffix}"
28
+ path = os.path.join(ROOT_DIR, filename)
29
+ try:
30
+ with open(path, "wb") as file:
31
+ if isinstance(data_iter_or_bytes, (bytes, bytearray)):
32
+ file.write(data_iter_or_bytes)
33
+ elif hasattr(data_iter_or_bytes, "read"):
34
+ file.write(data_iter_or_bytes.read())
35
+ elif hasattr(data_iter_or_bytes, "content"):
36
+ file.write(data_iter_or_bytes.content) # type: ignore[attr-defined]
37
+ elif hasattr(data_iter_or_bytes, "__iter__") and not isinstance(data_iter_or_bytes, (str, dict)):
38
+ for chunk in data_iter_or_bytes: # type: ignore[assignment]
39
+ if chunk:
40
+ file.write(chunk)
41
+ else:
42
+ raise gr.Error("Unsupported video data type returned by provider.")
43
+ except Exception:
44
+ try:
45
+ os.remove(path)
46
+ except Exception:
47
+ pass
48
+ raise
49
+ return path
50
+
51
+
52
+ @autodoc(
53
+ summary=TOOL_SUMMARY,
54
+ )
55
+ def Generate_Video(
56
+ prompt: Annotated[str, "Text description of the video to generate (e.g., 'a red fox running through a snowy forest at sunrise')."],
57
+ model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name'. Defaults to akhaliq/sora-2."] = "akhaliq/sora-2",
58
+ negative_prompt: Annotated[str, "What should NOT appear in the video."] = "",
59
+ steps: Annotated[int, "Number of denoising steps (1–100). Higher can improve quality but is slower."] = 25,
60
+ cfg_scale: Annotated[float, "Guidance scale (1–20). Higher = follow the prompt more closely, lower = more creative."] = 3.5,
61
+ seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
62
+ width: Annotated[int, "Output width in pixels (multiples of 8 recommended)."] = 768,
63
+ height: Annotated[int, "Output height in pixels (multiples of 8 recommended)."] = 768,
64
+ fps: Annotated[int, "Frames per second of the output video (e.g., 24)."] = 24,
65
+ duration: Annotated[float, "Target duration in seconds. For Sora-2, must be 4, 8, or 12."] = 4.0,
66
+ ) -> str:
67
+ _log_call_start(
68
+ "Generate_Video",
69
+ prompt=_truncate_for_log(prompt, 160),
70
+ model_id=model_id,
71
+ steps=steps,
72
+ cfg_scale=cfg_scale,
73
+ fps=fps,
74
+ duration=duration,
75
+ size=f"{width}x{height}",
76
+ )
77
+ if not prompt or not prompt.strip():
78
+ _log_call_end("Generate_Video", "error=empty prompt")
79
+ raise gr.Error("Please provide a non-empty prompt.")
80
+ providers = ["auto", "replicate", "fal-ai"]
81
+ last_error: Exception | None = None
82
+ parameters = {
83
+ "negative_prompt": negative_prompt or None,
84
+ "num_inference_steps": steps,
85
+ "guidance_scale": cfg_scale,
86
+ "seed": seed if seed != -1 else random.randint(1, 1_000_000_000),
87
+ "width": width,
88
+ "height": height,
89
+ "fps": fps,
90
+ "duration": duration,
91
+ }
92
+ for provider in providers:
93
+ try:
94
+ client = InferenceClient(api_key=HF_VIDEO_TOKEN, provider=provider)
95
+ if hasattr(client, "text_to_video"):
96
+ num_frames = int(duration * fps) if duration and fps else None
97
+ extra_body = {}
98
+ if width:
99
+ extra_body["width"] = width
100
+ if height:
101
+ extra_body["height"] = height
102
+ if fps:
103
+ extra_body["fps"] = fps
104
+ if duration:
105
+ extra_body["duration"] = duration
106
+ result = client.text_to_video(
107
+ prompt=prompt,
108
+ model=model_id,
109
+ guidance_scale=cfg_scale,
110
+ negative_prompt=[negative_prompt] if negative_prompt else None,
111
+ num_frames=num_frames,
112
+ num_inference_steps=steps,
113
+ seed=parameters["seed"],
114
+ extra_body=extra_body if extra_body else None,
115
+ )
116
+ else:
117
+ # Fallback for older clients or specific providers if needed, though InferenceClient usually has text_to_video
118
+ # Note: client.post is not available in some versions of InferenceClient
119
+ continue
120
+
121
+ path = _write_video_tmp(result, suffix=".mp4")
122
+ try:
123
+ size = os.path.getsize(path)
124
+ except Exception:
125
+ size = -1
126
+ _log_call_end("Generate_Video", f"provider={provider} path={os.path.basename(path)} bytes={size}")
127
+ return path
128
+ except KeyError as exc:
129
+ # Handle specific provider errors that manifest as KeyError (e.g. fal-ai missing 'video' key on error)
130
+ if "video" in str(exc):
131
+ last_error = ValueError(f"Provider {provider} returned an invalid response. This often happens with invalid parameters (e.g. duration must be 4, 8, or 12 for Sora-2).")
132
+ else:
133
+ last_error = exc
134
+ continue
135
+ except Exception as exc: # pylint: disable=broad-except
136
+ last_error = exc
137
+ continue
138
+ msg = str(last_error) if last_error else "Unknown error"
139
+ lowered = msg.lower()
140
+ if "404" in msg:
141
+ raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and HF token access.")
142
+ if "503" in msg:
143
+ raise gr.Error("The model is warming up. Please try again shortly.")
144
+ if "401" in msg or "403" in msg:
145
+ raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
146
+ if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
147
+ raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
148
+ _log_call_end("Generate_Video", f"error={_truncate_for_log(msg, 200)}")
149
+ raise gr.Error(f"Video generation failed: {msg}")
150
+
151
+
152
+ def build_interface() -> gr.Interface:
153
+ return gr.Interface(
154
+ fn=Generate_Video,
155
+ inputs=[
156
+ gr.Textbox(label="Prompt", placeholder="Enter a prompt for the video", lines=2, info="Text description of the video to generate"),
157
+ gr.Textbox(
158
+ label="Model",
159
+ value="akhaliq/sora-2",
160
+ placeholder="creator/model-name",
161
+ max_lines=1,
162
+ info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-video&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
163
+ ),
164
+ gr.Textbox(label="Negative Prompt", value="", lines=2, info="What should NOT appear in the video"),
165
+ gr.Slider(minimum=1, maximum=100, value=25, step=1, label="Steps", info="Number of denoising steps (1–100)"),
166
+ gr.Slider(minimum=1.0, maximum=20.0, value=3.5, step=0.1, label="CFG Scale", info="Guidance scale (1–20)"),
167
+ gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
168
+ gr.Slider(minimum=64, maximum=1920, value=768, step=8, label="Width", info="Output width in pixels"),
169
+ gr.Slider(minimum=64, maximum=1920, value=768, step=8, label="Height", info="Output height in pixels"),
170
+ gr.Slider(minimum=4, maximum=60, value=24, step=1, label="FPS", info="Frames per second"),
171
+ gr.Slider(minimum=1.0, maximum=10.0, value=4.0, step=0.5, label="Duration (s)", info="Target duration in seconds"),
172
+ ],
173
+ outputs=gr.Video(label="Generated Video", buttons=["download"], format="mp4"),
174
+ title="Generate Video",
175
+ description=(
176
+ "<div style=\"text-align:center\">Generate short videos via Hugging Face serverless inference. "
177
+ "Default model is Sora-2.</div>"
178
+ ),
179
+ api_description=TOOL_SUMMARY,
180
+ flagging_mode="never",
181
+ )
182
+
183
+
184
+ __all__ = ["Generate_Video", "build_interface"]
Modules/Memory_Manager.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import threading
6
+ import uuid
7
+ from datetime import datetime
8
+ from typing import Annotated, Dict, List, Literal, Optional
9
+
10
+ import gradio as gr
11
+ from ._docstrings import autodoc
12
+
13
+ _MODULE_DIR = os.path.dirname(os.path.abspath(__file__))
14
+ MEMORY_FILE = os.path.join(os.path.dirname(_MODULE_DIR), "memories.json")
15
+ _MEMORY_LOCK = threading.RLock()
16
+ _MAX_MEMORIES = 10_000
17
+
18
+
19
+ def _now_iso() -> str:
20
+ return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
21
+
22
+
23
+ def _load_memories() -> List[Dict[str, str]]:
24
+ if not os.path.exists(MEMORY_FILE):
25
+ return []
26
+ try:
27
+ with open(MEMORY_FILE, "r", encoding="utf-8") as file:
28
+ data = json.load(file)
29
+ if isinstance(data, list):
30
+ cleaned: List[Dict[str, str]] = []
31
+ for item in data:
32
+ if isinstance(item, dict) and "id" in item and "text" in item:
33
+ cleaned.append(item)
34
+ return cleaned
35
+ return []
36
+ except Exception:
37
+ try:
38
+ backup = MEMORY_FILE + ".corrupt"
39
+ if not os.path.exists(backup):
40
+ os.replace(MEMORY_FILE, backup)
41
+ except Exception:
42
+ pass
43
+ return []
44
+
45
+
46
+ def _save_memories(memories: List[Dict[str, str]]) -> None:
47
+ tmp_path = MEMORY_FILE + ".tmp"
48
+ with open(tmp_path, "w", encoding="utf-8") as file:
49
+ json.dump(memories, file, ensure_ascii=False, indent=2)
50
+ os.replace(tmp_path, MEMORY_FILE)
51
+
52
+
53
+ def _mem_save(text: str, tags: str) -> str:
54
+ text_clean = (text or "").strip()
55
+ if not text_clean:
56
+ return "Error: memory text is empty."
57
+ with _MEMORY_LOCK:
58
+ memories = _load_memories()
59
+ if memories and memories[-1].get("text") == text_clean:
60
+ return "Skipped: identical to last stored memory."
61
+ mem_id = str(uuid.uuid4())
62
+ entry = {
63
+ "id": mem_id,
64
+ "text": text_clean,
65
+ "timestamp": _now_iso(),
66
+ "tags": tags.strip(),
67
+ }
68
+ memories.append(entry)
69
+ if len(memories) > _MAX_MEMORIES:
70
+ overflow = len(memories) - _MAX_MEMORIES
71
+ memories = memories[overflow:]
72
+ _save_memories(memories)
73
+ return f"Memory saved: {mem_id}"
74
+
75
+
76
+ def _mem_list(limit: int, include_tags: bool) -> str:
77
+ limit = max(1, min(200, limit))
78
+ with _MEMORY_LOCK:
79
+ memories = _load_memories()
80
+ if not memories:
81
+ return "No memories stored yet."
82
+ chosen = memories[-limit:][::-1]
83
+ lines: List[str] = []
84
+ for memory in chosen:
85
+ base = f"{memory['id'][:8]} [{memory.get('timestamp','?')}] {memory.get('text','')}"
86
+ if include_tags and memory.get("tags"):
87
+ base += f" | tags: {memory['tags']}"
88
+ lines.append(base)
89
+ omitted = len(memories) - len(chosen)
90
+ if omitted > 0:
91
+ lines.append(f"… ({omitted} older memorie{'s' if omitted!=1 else ''} omitted; total={len(memories)})")
92
+ return "\n".join(lines)
93
+
94
+
95
+ def _parse_search_query(query: str) -> Dict[str, List[str]]:
96
+ import re
97
+
98
+ result = {"tag_terms": [], "text_terms": [], "operator": "and"}
99
+ if not query or not query.strip():
100
+ return result
101
+ query = re.sub(r"\s+", " ", query.strip())
102
+ if re.search(r"\bOR\b", query, re.IGNORECASE):
103
+ result["operator"] = "or"
104
+ parts = re.split(r"\s+OR\s+", query, flags=re.IGNORECASE)
105
+ else:
106
+ parts = re.split(r"\s+(?:AND\s+)?", query, flags=re.IGNORECASE)
107
+ parts = [p for p in parts if p.strip() and p.strip().upper() != "AND"]
108
+ for part in parts:
109
+ part = part.strip()
110
+ if not part:
111
+ continue
112
+ tag_match = re.match(r"^tag:(.+)$", part, re.IGNORECASE)
113
+ if tag_match:
114
+ tag_name = tag_match.group(1).strip()
115
+ if tag_name:
116
+ result["tag_terms"].append(tag_name.lower())
117
+ else:
118
+ result["text_terms"].append(part.lower())
119
+ return result
120
+
121
+
122
+ def _match_memory_with_query(memory: Dict[str, str], parsed_query: Dict[str, List[str]]) -> bool:
123
+ tag_terms = parsed_query["tag_terms"]
124
+ text_terms = parsed_query["text_terms"]
125
+ operator = parsed_query["operator"]
126
+ if not tag_terms and not text_terms:
127
+ return False
128
+ memory_text = memory.get("text", "").lower()
129
+ memory_tags = memory.get("tags", "").lower()
130
+ memory_tag_list = [tag.strip() for tag in memory_tags.split(",") if tag.strip()]
131
+ tag_matches = [any(tag_term in tag for tag in memory_tag_list) for tag_term in tag_terms]
132
+ combined_text = memory_text + " " + memory_tags
133
+ text_matches = [text_term in combined_text for text_term in text_terms]
134
+ all_matches = tag_matches + text_matches
135
+ if not all_matches:
136
+ return False
137
+ if operator == "or":
138
+ return any(all_matches)
139
+ return all(all_matches)
140
+
141
+
142
+ def _mem_search(query: str, limit: int) -> str:
143
+ q = (query or "").strip()
144
+ if not q:
145
+ return "Error: empty query."
146
+ parsed_query = _parse_search_query(q)
147
+ if not parsed_query["tag_terms"] and not parsed_query["text_terms"]:
148
+ return "Error: no valid search terms found."
149
+ limit = max(1, min(200, limit))
150
+ with _MEMORY_LOCK:
151
+ memories = _load_memories()
152
+ matches: List[Dict[str, str]] = []
153
+ total_matches = 0
154
+ for memory in reversed(memories):
155
+ if _match_memory_with_query(memory, parsed_query):
156
+ total_matches += 1
157
+ if len(matches) < limit:
158
+ matches.append(memory)
159
+ if not matches:
160
+ return f"No matches for: {query}"
161
+ lines = [
162
+ f"{memory['id'][:8]} [{memory.get('timestamp','?')}] {memory.get('text','')}" + (f" | tags: {memory['tags']}" if memory.get('tags') else "")
163
+ for memory in matches
164
+ ]
165
+ omitted = total_matches - len(matches)
166
+ if omitted > 0:
167
+ lines.append(f"… ({omitted} additional match{'es' if omitted!=1 else ''} omitted; total_matches={total_matches})")
168
+ return "\n".join(lines)
169
+
170
+
171
+ def _mem_delete(memory_id: str) -> str:
172
+ key = (memory_id or "").strip().lower()
173
+ if len(key) < 4:
174
+ return "Error: supply at least 4 characters of the id."
175
+ with _MEMORY_LOCK:
176
+ memories = _load_memories()
177
+ matched = [memory for memory in memories if memory["id"].lower().startswith(key)]
178
+ if not matched:
179
+ return "Memory not found."
180
+ if len(matched) > 1 and key != matched[0]["id"].lower():
181
+ sample = ", ".join(memory["id"][:8] for memory in matched[:5])
182
+ more = "…" if len(matched) > 5 else ""
183
+ return f"Ambiguous prefix (matches {len(matched)} ids: {sample}{more}). Provide more characters."
184
+ target_id = matched[0]["id"]
185
+ memories = [memory for memory in memories if memory["id"] != target_id]
186
+ _save_memories(memories)
187
+ return f"Deleted memory: {target_id}"
188
+
189
+
190
+ # Single source of truth for the LLM-facing tool description
191
+ TOOL_SUMMARY = (
192
+ "Manage short text memories (save, list, search, delete) in a local JSON store with tags and simple query language; "
193
+ "returns a result string (confirmation, listing, matches, or error)."
194
+ )
195
+
196
+
197
+ @autodoc(
198
+ summary=TOOL_SUMMARY,
199
+ )
200
+ def Memory_Manager(
201
+ action: Annotated[Literal["save", "list", "search", "delete"], "Action to perform: save | list | search | delete"],
202
+ text: Annotated[Optional[str], "Text content (Save only)"] = None,
203
+ tags: Annotated[Optional[str], "Comma-separated tags (Save only)"] = None,
204
+ query: Annotated[Optional[str], "Enhanced search with tag:name syntax, AND/OR operators (Search only)"] = None,
205
+ limit: Annotated[int, "Max results (List/Search only)"] = 20,
206
+ memory_id: Annotated[Optional[str], "Full UUID or unique prefix (Delete only)"] = None,
207
+ include_tags: Annotated[bool, "Include tags (List/Search only)"] = True,
208
+ ) -> str:
209
+ act = (action or "").lower().strip()
210
+ text = text or ""
211
+ tags = tags or ""
212
+ query = query or ""
213
+ memory_id = memory_id or ""
214
+ if act == "save":
215
+ if not text.strip():
216
+ return "Error: 'text' is required when action=save."
217
+ return _mem_save(text=text, tags=tags)
218
+ if act == "list":
219
+ return _mem_list(limit=limit, include_tags=include_tags)
220
+ if act == "search":
221
+ if not query.strip():
222
+ return "Error: 'query' is required when action=search."
223
+ return _mem_search(query=query, limit=limit)
224
+ if act == "delete":
225
+ if not memory_id.strip():
226
+ return "Error: 'memory_id' is required when action=delete."
227
+ return _mem_delete(memory_id=memory_id)
228
+ return "Error: invalid action (use save|list|search|delete)."
229
+
230
+
231
+ def build_interface() -> gr.Interface:
232
+ return gr.Interface(
233
+ fn=Memory_Manager,
234
+ inputs=[
235
+ gr.Radio(label="Action", choices=["save", "list", "search", "delete"], value="list", info="Action to perform"),
236
+ gr.Textbox(label="Text", lines=3, info="Memory text (Save only)"),
237
+ gr.Textbox(label="Tags", placeholder="tag1, tag2", max_lines=1, info="Comma-separated tags (Save only)"),
238
+ gr.Textbox(label="Query", placeholder="tag:work AND tag:project OR meeting", max_lines=1, info="Search query (Search only)"),
239
+ gr.Slider(1, 200, value=20, step=1, label="Limit", info="Max results (List/Search only)"),
240
+ gr.Textbox(label="Memory ID / Prefix", max_lines=1, info="UUID or prefix (Delete only)"),
241
+ gr.Checkbox(value=True, label="Include Tags", info="Include tags in output (List/Search only)"),
242
+ ],
243
+ outputs=gr.Textbox(label="Result", lines=14),
244
+ title="Memory Manager",
245
+ description=(
246
+ "<div style=\"text-align:center\">Lightweight local JSON memory store (no external DB). Choose an Action, fill only the relevant fields, and run.</div>"
247
+ ),
248
+ api_description=TOOL_SUMMARY,
249
+ flagging_mode="never",
250
+ )
251
+
252
+
253
+ __all__ = ["Memory_Manager", "build_interface", "_load_memories", "_save_memories"]
Modules/Obsidian_Vault.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import stat
7
+ from datetime import datetime
8
+ from typing import Annotated, Optional
9
+
10
+ import gradio as gr
11
+
12
+ from app import _log_call_end, _log_call_start, _truncate_for_log
13
+ from ._docstrings import autodoc
14
+
15
+
16
+ TOOL_SUMMARY = (
17
+ "Browse and search the Obsidian vault in read-only mode. "
18
+ "Actions: list, read, info, search, help. "
19
+ "All paths resolve within the vault root. Start paths with '/' (e.g., /Notes)."
20
+ )
21
+
22
+ HELP_TEXT = (
23
+ "Obsidian Vault — actions and usage\n\n"
24
+ "Root: Tools/Obsidian (override with OBSIDIAN_VAULT_ROOT). "
25
+ "Start paths with '/' to reference the vault root (e.g., /Projects/note.md). "
26
+ "Absolute paths are disabled unless UNSAFE_ALLOW_ABS_PATHS=1.\n\n"
27
+ "Actions and fields:\n"
28
+ "- list: path='/' (default), recursive=false, show_hidden=false, max_entries=20\n"
29
+ "- read: path (e.g., /Projects/note.md), offset=0, max_chars=4000 (shows next_cursor when truncated)\n"
30
+ "- info: path\n"
31
+ "- search: path (note or folder), query text in the Search field, recursive=false, show_hidden=false, max_entries=20, case_sensitive=false, offset=0\n"
32
+ "- help: show this guide\n\n"
33
+ "Errors are returned as JSON with fields: {status:'error', code, message, path?, hint?, data?}.\n\n"
34
+ "Examples:\n"
35
+ "- list current: action=list, path='/'\n"
36
+ "- read note: action=read, path='/Projects/note.md', max_chars=500\n"
37
+ "- show metadata: action=info, path='/Inbox'\n"
38
+ "- search notes: action=search, path='/Projects', query='deadline', recursive=true, max_entries=100\n"
39
+ "- case-sensitive search: action=search, query='TODO', case_sensitive=true\n"
40
+ "- page search results: action=search, query='TODO', offset=20\n"
41
+ )
42
+
43
+
44
+ def _default_root() -> str:
45
+ env_root = os.getenv("OBSIDIAN_VAULT_ROOT")
46
+ if env_root and env_root.strip():
47
+ return os.path.abspath(os.path.expanduser(env_root.strip()))
48
+ try:
49
+ here = os.path.abspath(__file__)
50
+ tools_dir = os.path.dirname(os.path.dirname(here))
51
+ return os.path.abspath(os.path.join(tools_dir, "Obsidian"))
52
+ except Exception:
53
+ return os.path.abspath(os.getcwd())
54
+
55
+
56
+ ROOT_DIR = _default_root()
57
+ try:
58
+ os.makedirs(ROOT_DIR, exist_ok=True)
59
+ except Exception:
60
+ pass
61
+ ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
62
+
63
+
64
+ def _safe_err(exc: Exception | str) -> str:
65
+ """Return an error string with any absolute root replaced by '/' and slashes normalized."""
66
+ s = str(exc)
67
+ s_norm = s.replace("\\", "/")
68
+ root_fwd = ROOT_DIR.replace("\\", "/")
69
+ root_variants = {ROOT_DIR, root_fwd, re.sub(r"/+", "/", root_fwd)}
70
+ for variant in root_variants:
71
+ if variant:
72
+ s_norm = s_norm.replace(variant, "/")
73
+ s_norm = re.sub(r"/+", "/", s_norm)
74
+ return s_norm
75
+
76
+
77
+ def _err(code: str, message: str, *, path: str | None = None, hint: str | None = None, data: dict | None = None) -> str:
78
+ payload = {
79
+ "status": "error",
80
+ "code": code,
81
+ "message": message,
82
+ "root": "/",
83
+ }
84
+ if path:
85
+ payload["path"] = path
86
+ if hint:
87
+ payload["hint"] = hint
88
+ if data:
89
+ payload["data"] = data
90
+ return json.dumps(payload, ensure_ascii=False)
91
+
92
+
93
+ def _display_path(abs_path: str) -> str:
94
+ try:
95
+ norm_root = os.path.normpath(ROOT_DIR)
96
+ norm_abs = os.path.normpath(abs_path)
97
+ common = os.path.commonpath([norm_root, norm_abs])
98
+ if os.path.normcase(common) == os.path.normcase(norm_root):
99
+ rel = os.path.relpath(norm_abs, norm_root)
100
+ if rel == ".":
101
+ return "/"
102
+ return "/" + rel.replace("\\", "/")
103
+ except Exception:
104
+ pass
105
+ return abs_path.replace("\\", "/")
106
+
107
+
108
+ def _resolve_path(path: str) -> tuple[str, str]:
109
+ try:
110
+ user_input = (path or "/").strip() or "/"
111
+ if user_input.startswith("/"):
112
+ rel_part = user_input.lstrip("/") or "."
113
+ raw = os.path.expanduser(rel_part)
114
+ treat_as_relative = True
115
+ else:
116
+ raw = os.path.expanduser(user_input)
117
+ treat_as_relative = False
118
+
119
+ if not treat_as_relative and os.path.isabs(raw):
120
+ if not ALLOW_ABS:
121
+ return "", _err(
122
+ "absolute_path_disabled",
123
+ "Absolute paths are disabled in safe mode.",
124
+ path=raw.replace("\\", "/"),
125
+ hint="Use a path relative to / (e.g., /Notes/index.md).",
126
+ )
127
+ abs_path = os.path.abspath(raw)
128
+ else:
129
+ abs_path = os.path.abspath(os.path.join(ROOT_DIR, raw))
130
+ if not ALLOW_ABS:
131
+ try:
132
+ common = os.path.commonpath([os.path.normpath(ROOT_DIR), os.path.normpath(abs_path)])
133
+ except Exception:
134
+ root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
135
+ abs_cmp = os.path.normcase(os.path.normpath(abs_path))
136
+ if not abs_cmp.startswith(root_cmp):
137
+ return "", _err(
138
+ "path_outside_root",
139
+ "Path not allowed outside root.",
140
+ path=user_input.replace("\\", "/"),
141
+ hint="Use a path under / (the vault root).",
142
+ )
143
+ else:
144
+ root_cmp = os.path.normcase(os.path.normpath(ROOT_DIR))
145
+ common_cmp = os.path.normcase(os.path.normpath(common))
146
+ if common_cmp != root_cmp:
147
+ return "", _err(
148
+ "path_outside_root",
149
+ "Path not allowed outside root.",
150
+ path=user_input.replace("\\", "/"),
151
+ hint="Use a path under / (the vault root).",
152
+ )
153
+ return abs_path, ""
154
+ except Exception as exc:
155
+ return "", _err(
156
+ "resolve_path_failed",
157
+ "Failed to resolve path.",
158
+ path=(path or ""),
159
+ data={"error": _safe_err(exc)},
160
+ )
161
+
162
+
163
+ def _fmt_size(num_bytes: int) -> str:
164
+ units = ["B", "KB", "MB", "GB", "TB"]
165
+ size = float(num_bytes)
166
+ for unit in units:
167
+ if size < 1024.0:
168
+ return f"{size:.1f} {unit}"
169
+ size /= 1024.0
170
+ return f"{size:.1f} PB"
171
+
172
+
173
+ def _list_dir(abs_path: str, *, show_hidden: bool, recursive: bool, max_entries: int) -> str:
174
+ lines: list[str] = []
175
+ total = 0
176
+ listing_display = _display_path(abs_path)
177
+ for root, dirs, files in os.walk(abs_path):
178
+ if not show_hidden:
179
+ dirs[:] = [d for d in dirs if not d.startswith('.')]
180
+ files = [f for f in files if not f.startswith('.')]
181
+ try:
182
+ rel_root = os.path.relpath(root, ROOT_DIR)
183
+ except Exception:
184
+ rel_root = root
185
+ rel_root_disp = "/" if rel_root == "." else "/" + rel_root.replace("\\", "/")
186
+ lines.append(f"\n📂 {rel_root_disp}")
187
+ dirs.sort()
188
+ files.sort()
189
+ for d in dirs:
190
+ p = os.path.join(root, d)
191
+ try:
192
+ mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
193
+ except Exception:
194
+ mtime = "?"
195
+ lines.append(f" • [DIR] {d} (modified {mtime})")
196
+ total += 1
197
+ if total >= max_entries:
198
+ lines.append(f"\n… Truncated at {max_entries} entries.")
199
+ return "\n".join(lines).strip()
200
+ for f in files:
201
+ p = os.path.join(root, f)
202
+ try:
203
+ size = _fmt_size(os.path.getsize(p))
204
+ mtime = datetime.fromtimestamp(os.path.getmtime(p)).isoformat(sep=' ', timespec='seconds')
205
+ except Exception:
206
+ size, mtime = "?", "?"
207
+ lines.append(f" • {f} ({size}, modified {mtime})")
208
+ total += 1
209
+ if total >= max_entries:
210
+ lines.append(f"\n… Truncated at {max_entries} entries.")
211
+ return "\n".join(lines).strip()
212
+ if not recursive:
213
+ break
214
+ header = f"Listing of {listing_display}\nRoot: /\nEntries: {total}"
215
+ return (header + "\n" + "\n".join(lines)).strip()
216
+
217
+
218
+ def _search_text(
219
+ abs_path: str,
220
+ query: str,
221
+ *,
222
+ recursive: bool,
223
+ show_hidden: bool,
224
+ max_results: int,
225
+ case_sensitive: bool,
226
+ start_index: int,
227
+ ) -> str:
228
+ if not os.path.exists(abs_path):
229
+ return _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
230
+
231
+ query = query or ""
232
+ normalized_query = query if case_sensitive else query.lower()
233
+ if normalized_query == "":
234
+ return _err(
235
+ "missing_search_query",
236
+ "Search query is required for the search action.",
237
+ hint="Provide text in the Search field to look for.",
238
+ )
239
+
240
+ max_results = max(1, int(max_results) if max_results is not None else 20)
241
+ start_index = max(0, int(start_index) if start_index is not None else 0)
242
+ matches: list[tuple[str, int, str]] = []
243
+ errors: list[str] = []
244
+ files_scanned = 0
245
+ truncated = False
246
+ total_matches = 0
247
+
248
+ def _should_skip(name: str) -> bool:
249
+ return not show_hidden and name.startswith('.')
250
+
251
+ def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
252
+ nonlocal truncated, total_matches
253
+ total_matches += 1
254
+ if total_matches <= start_index:
255
+ return False
256
+ if len(matches) < max_results:
257
+ snippet = line_text.strip()
258
+ if len(snippet) > 200:
259
+ snippet = snippet[:197] + "…"
260
+ matches.append((_display_path(file_path), line_no, snippet))
261
+ return False
262
+ truncated = True
263
+ return True
264
+
265
+ def _search_file(file_path: str) -> bool:
266
+ nonlocal files_scanned
267
+ files_scanned += 1
268
+ try:
269
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as handle:
270
+ for line_no, line in enumerate(handle, start=1):
271
+ haystack = line if case_sensitive else line.lower()
272
+ if normalized_query in haystack:
273
+ if _handle_match(file_path, line_no, line):
274
+ return True
275
+ except Exception as exc:
276
+ errors.append(f"{_display_path(file_path)} ({_safe_err(exc)})")
277
+ return truncated
278
+
279
+ if os.path.isfile(abs_path):
280
+ _search_file(abs_path)
281
+ else:
282
+ for root, dirs, files in os.walk(abs_path):
283
+ dirs[:] = [d for d in dirs if not _should_skip(d)]
284
+ visible_files = [f for f in files if show_hidden or not f.startswith('.')]
285
+ for name in visible_files:
286
+ file_path = os.path.join(root, name)
287
+ if _search_file(file_path):
288
+ break
289
+ if truncated:
290
+ break
291
+ if not recursive:
292
+ break
293
+
294
+ header_lines = [
295
+ f"Search results for {query!r}",
296
+ f"Scope: {_display_path(abs_path)}",
297
+ f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
298
+ f"Start offset: {start_index}",
299
+ f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
300
+ f"Files scanned: {files_scanned}",
301
+ ]
302
+
303
+ next_cursor = start_index + len(matches) if truncated else None
304
+
305
+ if truncated:
306
+ header_lines.append(f"Matches encountered before truncation: {total_matches}")
307
+ header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
308
+ header_lines.append(f"Next cursor: {next_cursor}")
309
+ else:
310
+ header_lines.append(f"Total matches found: {total_matches}")
311
+ header_lines.append("Truncated: no — end of results.")
312
+ header_lines.append("Next cursor: None")
313
+
314
+ if not matches:
315
+ if total_matches > 0 and start_index >= total_matches:
316
+ hint_limit = max(total_matches - 1, 0)
317
+ body_lines = [
318
+ f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
319
+ (f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
320
+ ]
321
+ body_lines = [line for line in body_lines if line]
322
+ else:
323
+ body_lines = [
324
+ "No matches found.",
325
+ (f"Total matches encountered: {total_matches}." if total_matches else ""),
326
+ ]
327
+ body_lines = [line for line in body_lines if line]
328
+ else:
329
+ body_lines = [f"{idx}. {path}:{line_no}: {text}" for idx, (path, line_no, text) in enumerate(matches, start=1)]
330
+
331
+ if errors:
332
+ shown = errors[:5]
333
+ body_lines.extend(["", "Warnings:"])
334
+ body_lines.extend(shown)
335
+ if len(errors) > len(shown):
336
+ body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
337
+
338
+ return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
339
+
340
+
341
+ def _read_file(abs_path: str, *, offset: int, max_chars: int) -> str:
342
+ if not os.path.exists(abs_path):
343
+ return _err("file_not_found", f"File not found: {_display_path(abs_path)}", path=_display_path(abs_path))
344
+ if os.path.isdir(abs_path):
345
+ return _err(
346
+ "is_directory",
347
+ f"Path is a directory, not a file: {_display_path(abs_path)}",
348
+ path=_display_path(abs_path),
349
+ hint="Provide a file path.",
350
+ )
351
+ try:
352
+ with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
353
+ data = f.read()
354
+ except Exception as exc:
355
+ return _err("read_failed", "Failed to read file.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
356
+ total = len(data)
357
+ start = max(0, min(offset, total))
358
+ end = total if max_chars <= 0 else min(total, start + max_chars)
359
+ chunk = data[start:end]
360
+ next_cursor = end if end < total else None
361
+ header = (
362
+ f"Reading {_display_path(abs_path)}\n"
363
+ f"Offset {start}, returned {len(chunk)} of {total}."
364
+ + (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
365
+ )
366
+ return header + "\n\n---\n\n" + chunk
367
+
368
+
369
+ def _info(abs_path: str) -> str:
370
+ try:
371
+ st = os.stat(abs_path)
372
+ except Exception as exc:
373
+ return _err("stat_failed", "Failed to stat path.", path=_display_path(abs_path), data={"error": _safe_err(exc)})
374
+ info = {
375
+ "path": _display_path(abs_path),
376
+ "type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
377
+ "size": st.st_size,
378
+ "modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=' ', timespec='seconds'),
379
+ "created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=' ', timespec='seconds'),
380
+ "mode": oct(st.st_mode),
381
+ "root": "/",
382
+ }
383
+ return json.dumps(info, indent=2)
384
+
385
+
386
+ @autodoc(summary=TOOL_SUMMARY)
387
+ def Obsidian_Vault(
388
+ action: Annotated[str, "Operation to perform: 'list', 'read', 'info', 'search', 'help'."],
389
+ path: Annotated[str, "Target path, relative to the vault root." ] = "/",
390
+ query: Annotated[Optional[str], "Text to search for when action=search."] = None,
391
+ recursive: Annotated[bool, "Recurse into subfolders when listing/searching."] = False,
392
+ show_hidden: Annotated[bool, "Include hidden files when listing/searching."] = False,
393
+ max_entries: Annotated[int, "Max entries to list or matches to return (for list/search)."] = 20,
394
+ offset: Annotated[int, "Start offset when reading files."] = 0,
395
+ max_chars: Annotated[int, "Max characters to return when reading (0 = full file)."] = 4000,
396
+ case_sensitive: Annotated[bool, "Match case when searching text."] = False,
397
+ ) -> str:
398
+ _log_call_start(
399
+ "Obsidian_Vault",
400
+ action=action,
401
+ path=path,
402
+ query=query,
403
+ recursive=recursive,
404
+ show_hidden=show_hidden,
405
+ max_entries=max_entries,
406
+ offset=offset,
407
+ max_chars=max_chars,
408
+ case_sensitive=case_sensitive,
409
+ )
410
+ action = (action or "").strip().lower()
411
+ if action not in {"list", "read", "info", "search", "help"}:
412
+ result = _err(
413
+ "invalid_action",
414
+ "Invalid action.",
415
+ hint="Choose from: list, read, info, search, help.",
416
+ )
417
+ _log_call_end("Obsidian_Vault", _truncate_for_log(result))
418
+ return result
419
+
420
+ if action == "help":
421
+ result = HELP_TEXT
422
+ _log_call_end("Obsidian_Vault", _truncate_for_log(result))
423
+ return result
424
+
425
+ abs_path, err = _resolve_path(path)
426
+ if err:
427
+ _log_call_end("Obsidian_Vault", _truncate_for_log(err))
428
+ return err
429
+
430
+ try:
431
+ if action == "list":
432
+ if not os.path.exists(abs_path):
433
+ result = _err("path_not_found", f"Path not found: {_display_path(abs_path)}", path=_display_path(abs_path))
434
+ else:
435
+ result = _list_dir(abs_path, show_hidden=show_hidden, recursive=recursive, max_entries=max_entries)
436
+ elif action == "read":
437
+ result = _read_file(abs_path, offset=offset, max_chars=max_chars)
438
+ elif action == "search":
439
+ query_text = query or ""
440
+ if query_text.strip() == "":
441
+ result = _err(
442
+ "missing_search_query",
443
+ "Search query is required for the search action.",
444
+ hint="Provide text in the Search field to look for.",
445
+ )
446
+ else:
447
+ result = _search_text(
448
+ abs_path,
449
+ query_text,
450
+ recursive=recursive,
451
+ show_hidden=show_hidden,
452
+ max_results=max_entries,
453
+ case_sensitive=case_sensitive,
454
+ start_index=offset,
455
+ )
456
+ else: # info
457
+ result = _info(abs_path)
458
+ except Exception as exc:
459
+ result = _err("exception", "Unhandled error during operation.", data={"error": _safe_err(exc)})
460
+
461
+ _log_call_end("Obsidian_Vault", _truncate_for_log(result))
462
+ return result
463
+
464
+
465
+ def build_interface() -> gr.Interface:
466
+ return gr.Interface(
467
+ fn=Obsidian_Vault,
468
+ inputs=[
469
+ gr.Radio(
470
+ label="Action",
471
+ choices=["list", "read", "info", "search", "help"],
472
+ value="help",
473
+ info="Operation to perform",
474
+ ),
475
+ gr.Textbox(label="Path", placeholder="/ or /Notes/todo.md", max_lines=1, value="/", info="Target path (relative to vault root)"),
476
+ gr.Textbox(label="Search text", lines=3, placeholder="Text to search for...", info="Text to search for (Search only)"),
477
+ gr.Checkbox(label="Recursive", value=False, info="Recurse into subfolders (List/Search)"),
478
+ gr.Checkbox(label="Show hidden", value=False, info="Include hidden files (List/Search)"),
479
+ gr.Slider(minimum=10, maximum=5000, step=10, value=20, label="Max entries / matches", info="Max entries to list or matches to return (List/Search)"),
480
+ gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Read/Search)"),
481
+ gr.Slider(minimum=0, maximum=100_000, step=500, value=4000, label="Max chars", info="Max characters to return (Read, 0=all)"),
482
+ gr.Checkbox(label="Case sensitive search", value=False, info="Match case (Search)"),
483
+ ],
484
+ outputs=gr.Textbox(label="Result", lines=20),
485
+ title="Obsidian Vault",
486
+ description=(
487
+ "<div style=\"text-align:center; overflow:hidden;\">Explore and search notes in the vault without modifying them." "</div>"
488
+ ),
489
+ api_description=TOOL_SUMMARY,
490
+ flagging_mode="never",
491
+ submit_btn="Run",
492
+ )
493
+
494
+
495
+ __all__ = ["Obsidian_Vault", "build_interface"]
Modules/Shell_Command.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import platform
5
+ import shlex
6
+ import subprocess
7
+ from typing import Annotated
8
+
9
+ import gradio as gr
10
+
11
+ from app import _log_call_end, _log_call_start, _truncate_for_log
12
+ from ._docstrings import autodoc
13
+ from .File_System import _resolve_path, ROOT_DIR, _display_path
14
+ import shutil
15
+
16
+
17
+ def _detect_shell(prefer_powershell: bool = True) -> tuple[list[str], str]:
18
+ """
19
+ Pick an appropriate shell for the host OS.
20
+ - Windows: use PowerShell by default, fall back to cmd.exe.
21
+ - POSIX: use /bin/bash if available, else /bin/sh.
22
+ Returns (shell_cmd_prefix, shell_name) where shell_cmd_prefix is the command list to launch the shell.
23
+ """
24
+ system = platform.system().lower()
25
+ if system == "windows":
26
+ if prefer_powershell:
27
+ pwsh = shutil.which("pwsh")
28
+ candidates = [pwsh, shutil.which("powershell"), shutil.which("powershell.exe")]
29
+ for cand in candidates:
30
+ if cand:
31
+ return [cand, "-NoLogo", "-NoProfile", "-Command"], "powershell"
32
+ # Fallback to cmd
33
+ comspec = os.environ.get("ComSpec", r"C:\\Windows\\System32\\cmd.exe")
34
+ return [comspec, "/C"], "cmd"
35
+ # POSIX
36
+ bash = shutil.which("bash")
37
+ if bash:
38
+ return [bash, "-lc"], "bash"
39
+ sh = os.environ.get("SHELL", "/bin/sh")
40
+ return [sh, "-lc"], "sh"
41
+
42
+
43
+ # Detect shell at import time for docs/UI purposes
44
+ _DETECTED_SHELL_PREFIX, _DETECTED_SHELL_NAME = _detect_shell()
45
+
46
+
47
+ # Clarify path semantics and expose detected shell in summary
48
+ TOOL_SUMMARY = (
49
+ "Execute a shell command within a safe working directory under the tool root ('/'). "
50
+ "Paths must be relative to '/'. "
51
+ "Set workdir to '.' to use the root. "
52
+ "Absolute paths are disabled."
53
+ f"Detected shell: {_DETECTED_SHELL_NAME}."
54
+ )
55
+
56
+
57
+ def _run_command(command: str, cwd: str, timeout: int) -> tuple[str, str, int]:
58
+ shell_prefix, shell_name = _detect_shell()
59
+ full_cmd = shell_prefix + [command]
60
+ try:
61
+ proc = subprocess.run(
62
+ full_cmd,
63
+ cwd=cwd,
64
+ stdout=subprocess.PIPE,
65
+ stderr=subprocess.PIPE,
66
+ text=True,
67
+ encoding="utf-8",
68
+ errors="replace",
69
+ timeout=timeout if timeout and timeout > 0 else None,
70
+ )
71
+ return proc.stdout, proc.stderr, proc.returncode
72
+ except subprocess.TimeoutExpired as exc:
73
+ return exc.stdout or "", (exc.stderr or "") + "\n[timeout]", 124
74
+ except Exception as exc:
75
+ return "", f"Execution failed: {exc}", 1
76
+
77
+
78
+ @autodoc(summary=TOOL_SUMMARY)
79
+ def Shell_Command(
80
+ command: Annotated[str, "Shell command to execute. Accepts multi-part pipelines as a single string."],
81
+ workdir: Annotated[str, "Working directory (relative to root unless UNSAFE_ALLOW_ABS_PATHS=1)."] = ".",
82
+ timeout: Annotated[int, "Timeout in seconds (0 = no timeout, be careful on public hosting)."] = 60,
83
+ ) -> str:
84
+ _log_call_start("Shell_Command", command=command, workdir=workdir, timeout=timeout)
85
+ if not command or not command.strip():
86
+ result = "No command provided."
87
+ _log_call_end("Shell_Command", _truncate_for_log(result))
88
+ return result
89
+
90
+ abs_cwd, err = _resolve_path(workdir)
91
+ if err:
92
+ _log_call_end("Shell_Command", _truncate_for_log(err))
93
+ return err
94
+ if not os.path.exists(abs_cwd):
95
+ result = f"Working directory not found: {abs_cwd}"
96
+ _log_call_end("Shell_Command", _truncate_for_log(result))
97
+ return result
98
+
99
+ # Capture shell used for transparency
100
+ _, shell_name = _detect_shell()
101
+ stdout, stderr, code = _run_command(command, cwd=abs_cwd, timeout=timeout)
102
+ display_cwd = _display_path(abs_cwd)
103
+ header = (
104
+ f"Command: {command}\n"
105
+ f"CWD: {display_cwd}\n"
106
+ f"Root: /\n"
107
+ f"Shell: {shell_name}\n"
108
+ f"Exit code: {code}\n"
109
+ f"--- STDOUT ---\n"
110
+ )
111
+ output = header + (stdout or "<empty>") + "\n--- STDERR ---\n" + (stderr or "<empty>")
112
+ _log_call_end("Shell_Command", _truncate_for_log(f"exit={code} stdout={len(stdout)} stderr={len(stderr)}"))
113
+ return output
114
+
115
+
116
+ def build_interface() -> gr.Interface:
117
+ return gr.Interface(
118
+ fn=Shell_Command,
119
+ inputs=[
120
+ gr.Textbox(label="Command", placeholder="echo hello || dir", lines=2, info="Shell command to execute"),
121
+ gr.Textbox(label="Workdir", value=".", max_lines=1, info="Working directory (relative to root)"),
122
+ gr.Slider(minimum=0, maximum=600, step=5, value=60, label="Timeout (seconds)", info="Timeout in seconds (0 = no timeout)"),
123
+ ],
124
+ outputs=gr.Textbox(label="Output", lines=20),
125
+ title="Shell Command",
126
+ description=(
127
+ "<div style=\"text-align:center; overflow:hidden;\">"
128
+ "Run a shell command under the same safe root as File System. "
129
+ "Absolute paths are disabled, use relative paths. "
130
+ f"Detected shell: {_DETECTED_SHELL_NAME}. "
131
+ "</div>"
132
+ ),
133
+ api_description=TOOL_SUMMARY,
134
+ flagging_mode="never",
135
+ submit_btn="Run",
136
+ )
137
+
138
+
139
+ __all__ = ["Shell_Command", "build_interface"]
Modules/Web_Fetch.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Annotated, Dict, Literal, Tuple
5
+ from urllib.parse import urlparse, urljoin
6
+
7
+ import gradio as gr
8
+ import requests
9
+ from bs4 import BeautifulSoup
10
+ from markdownify import markdownify as md
11
+ from readability import Document
12
+
13
+ from app import _fetch_rate_limiter, _log_call_end, _log_call_start, _truncate_for_log
14
+ from ._docstrings import autodoc
15
+
16
+
17
+ # Single source of truth for the LLM-facing tool description
18
+ TOOL_SUMMARY = (
19
+ "Fetch a webpage and return clean Markdown, raw HTML, or a list of links, with max length and pagination via "
20
+ "offset; if truncated, the output includes a notice with next_cursor for exact continuation."
21
+ )
22
+
23
+ ModeOption = Literal["markdown", "html", "url_scraper"]
24
+
25
+
26
+ def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
27
+ headers = {
28
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
29
+ "Accept-Language": "en-US,en;q=0.9",
30
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
31
+ "Accept-Encoding": "gzip, deflate, br",
32
+ "DNT": "1",
33
+ "Connection": "keep-alive",
34
+ "Upgrade-Insecure-Requests": "1",
35
+ }
36
+ if not skip_rate_limit:
37
+ _fetch_rate_limiter.acquire()
38
+ try:
39
+ response = requests.get(
40
+ url,
41
+ headers=headers,
42
+ timeout=timeout,
43
+ allow_redirects=True,
44
+ stream=False,
45
+ )
46
+ response.raise_for_status()
47
+ return response
48
+ except requests.exceptions.Timeout as exc:
49
+ raise requests.exceptions.RequestException("Request timed out. The webpage took too long to respond.") from exc
50
+ except requests.exceptions.ConnectionError as exc:
51
+ raise requests.exceptions.RequestException("Connection error. Please check the URL and your internet connection.") from exc
52
+ except requests.exceptions.HTTPError as exc:
53
+ if response.status_code == 403:
54
+ raise requests.exceptions.RequestException("Access forbidden. The website may be blocking automated requests.") from exc
55
+ if response.status_code == 404:
56
+ raise requests.exceptions.RequestException("Page not found. Please check the URL.") from exc
57
+ if response.status_code == 429:
58
+ raise requests.exceptions.RequestException("Rate limited. Please try again in a few minutes.") from exc
59
+ raise requests.exceptions.RequestException(f"HTTP error {response.status_code}: {exc}") from exc
60
+
61
+
62
+ def _normalize_whitespace(text: str) -> str:
63
+ text = re.sub(r"[ \t\u00A0]+", " ", text)
64
+ text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text.strip())
65
+ return text.strip()
66
+
67
+
68
+ def _truncate(text: str, max_chars: int) -> Tuple[str, bool]:
69
+ if max_chars is None or max_chars <= 0 or len(text) <= max_chars:
70
+ return text, False
71
+ return text[:max_chars].rstrip() + " …", True
72
+
73
+
74
+ def _shorten(text: str, limit: int) -> str:
75
+ if limit <= 0 or len(text) <= limit:
76
+ return text
77
+ return text[: max(0, limit - 1)].rstrip() + "…"
78
+
79
+
80
+ def _domain_of(url: str) -> str:
81
+ try:
82
+ return urlparse(url).netloc or ""
83
+ except Exception:
84
+ return ""
85
+
86
+
87
+ def _normalize_mode(mode: str | None) -> ModeOption:
88
+ """Convert UI-supplied labels into canonical mode values."""
89
+ if not mode:
90
+ return "markdown"
91
+ normalized = mode.strip().lower()
92
+ normalized = normalized.replace("-", "_").replace(" ", "_")
93
+ if normalized in {"markdown", "markdown_mode", "md"}:
94
+ return "markdown"
95
+ if normalized in {"html", "html_mode"}:
96
+ return "html"
97
+ if normalized in {"url_scraper", "urlscraper", "url_mode", "scraper", "links", "link_mode"}:
98
+ return "url_scraper"
99
+ return "markdown"
100
+
101
+
102
+ def _extract_links_from_soup(soup: BeautifulSoup, base_url: str) -> str:
103
+ links = []
104
+ for link in soup.find_all("a", href=True):
105
+ href = link.get("href")
106
+ text = link.get_text(strip=True)
107
+ if href.startswith("http"):
108
+ full_url = href
109
+ elif href.startswith("//"):
110
+ full_url = "https:" + href
111
+ elif href.startswith("/"):
112
+ full_url = urljoin(base_url, href)
113
+ else:
114
+ full_url = urljoin(base_url, href)
115
+ if text and href not in ["#", "javascript:void(0)"]:
116
+ links.append(f"- [{text}]({full_url})")
117
+ if not links:
118
+ return "No links found on this page."
119
+ title = soup.find("title")
120
+ title_text = title.get_text(strip=True) if title else "Links from webpage"
121
+ return f"# {title_text}\n\n" + "\n".join(links)
122
+
123
+
124
+ def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str, strip_selectors: str = "") -> str:
125
+ if strip_selectors:
126
+ selectors = [s.strip() for s in strip_selectors.split(",") if s.strip()]
127
+ for selector in selectors:
128
+ try:
129
+ for element in full_soup.select(selector):
130
+ element.decompose()
131
+ except Exception:
132
+ continue
133
+ for element in full_soup.select("script, style, nav, footer, header, aside"):
134
+ element.decompose()
135
+ main = (
136
+ full_soup.find("main")
137
+ or full_soup.find("article")
138
+ or full_soup.find("div", class_=re.compile(r"content|main|post|article", re.I))
139
+ or full_soup.find("body")
140
+ )
141
+ if not main:
142
+ return "No main content found on the webpage."
143
+ markdown_text = md(str(main), heading_style="ATX")
144
+ markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text)
145
+ markdown_text = re.sub(r"\[\s*\]\([^)]*\)", "", markdown_text)
146
+ markdown_text = re.sub(r"[ \t]+", " ", markdown_text)
147
+ markdown_text = markdown_text.strip()
148
+ title = full_soup.find("title")
149
+ if title and title.get_text(strip=True):
150
+ markdown_text = f"# {title.get_text(strip=True)}\n\n{markdown_text}"
151
+ return markdown_text or "No content could be extracted."
152
+
153
+
154
+ def _truncate_with_notice(content: str, max_chars: int) -> Tuple[str, Dict[str, object]]:
155
+ total_chars = len(content)
156
+ if total_chars <= max_chars:
157
+ return content, {
158
+ "truncated": False,
159
+ "returned_chars": total_chars,
160
+ "total_chars_estimate": total_chars,
161
+ "next_cursor": None,
162
+ }
163
+ truncated = content[:max_chars]
164
+ last_paragraph = truncated.rfind("\n\n")
165
+ if last_paragraph > max_chars * 0.7:
166
+ truncated = truncated[:last_paragraph]
167
+ cursor_pos = last_paragraph
168
+ elif "." in truncated[-100:]:
169
+ last_period = truncated.rfind(".")
170
+ if last_period > max_chars * 0.8:
171
+ truncated = truncated[: last_period + 1]
172
+ cursor_pos = last_period + 1
173
+ else:
174
+ cursor_pos = len(truncated)
175
+ else:
176
+ cursor_pos = len(truncated)
177
+ metadata = {
178
+ "truncated": True,
179
+ "returned_chars": len(truncated),
180
+ "total_chars_estimate": total_chars,
181
+ "next_cursor": cursor_pos,
182
+ }
183
+ truncated = truncated.rstrip()
184
+ truncation_notice = (
185
+ "\n\n---\n"
186
+ f"**Content Truncated:** Showing {metadata['returned_chars']:,} of {metadata['total_chars_estimate']:,} characters "
187
+ f"({(metadata['returned_chars']/metadata['total_chars_estimate']*100):.1f}%)\n"
188
+ f"**Next cursor:** {metadata['next_cursor']} (use this value with offset parameter for continuation)\n"
189
+ "---"
190
+ )
191
+ return truncated + truncation_notice, metadata
192
+
193
+
194
+ @autodoc(summary=TOOL_SUMMARY)
195
+ def Web_Fetch(
196
+ url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
197
+ max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
198
+ offset: Annotated[int, "Character offset to start from (for pagination, use next_cursor from previous call)."] = 0,
199
+ strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
200
+ mode: Annotated[
201
+ str,
202
+ "Output mode: 'markdown' (default, clean content), 'html' (raw response), or 'url_scraper' (links list).",
203
+ ] = "markdown",
204
+ ) -> str:
205
+ canonical_mode = _normalize_mode(mode)
206
+ _log_call_start(
207
+ "Web_Fetch",
208
+ url=url,
209
+ max_chars=max_chars,
210
+ strip_selectors=strip_selectors,
211
+ mode=canonical_mode,
212
+ offset=offset,
213
+ )
214
+ if not url or not url.strip():
215
+ result = "Please enter a valid URL."
216
+ _log_call_end("Web_Fetch", _truncate_for_log(result))
217
+ return result
218
+ try:
219
+ resp = _http_get_enhanced(url)
220
+ resp.raise_for_status()
221
+ except requests.exceptions.RequestException as exc:
222
+ result = f"An error occurred: {exc}"
223
+ _log_call_end("Web_Fetch", _truncate_for_log(result))
224
+ return result
225
+ final_url = str(resp.url)
226
+ ctype = resp.headers.get("Content-Type", "")
227
+ if "html" not in ctype.lower():
228
+ result = f"Unsupported content type for extraction: {ctype or 'unknown'}"
229
+ _log_call_end("Web_Fetch", _truncate_for_log(result))
230
+ return result
231
+ resp.encoding = resp.encoding or resp.apparent_encoding
232
+ html = resp.text
233
+ full_soup = BeautifulSoup(html, "lxml")
234
+ if canonical_mode == "html":
235
+ _log_call_end("Web_Fetch", f"chars={len(html)}, mode={canonical_mode}, offset=0 (ignored)")
236
+ return html
237
+ if canonical_mode == "markdown":
238
+ full_result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
239
+ elif canonical_mode == "url_scraper":
240
+ full_result = _extract_links_from_soup(full_soup, final_url)
241
+ else:
242
+ full_result = html
243
+
244
+ if offset > 0:
245
+ if offset >= len(full_result):
246
+ result = (
247
+ f"Offset {offset} exceeds content length ({len(full_result)} characters). "
248
+ f"Content ends at position {len(full_result)}."
249
+ )
250
+ _log_call_end("Web_Fetch", _truncate_for_log(result))
251
+ return result
252
+ result = full_result[offset:]
253
+ else:
254
+ result = full_result
255
+
256
+ if max_chars > 0 and len(result) > max_chars:
257
+ result, metadata = _truncate_with_notice(result, max_chars)
258
+ if offset > 0:
259
+ metadata["total_chars_estimate"] = len(full_result)
260
+ metadata["next_cursor"] = offset + metadata["next_cursor"] if metadata["next_cursor"] else None
261
+
262
+ _log_call_end("Web_Fetch", f"chars={len(result)}, mode={canonical_mode}, offset={offset}")
263
+ return result
264
+
265
+
266
+ def build_interface() -> gr.Interface:
267
+ return gr.Interface(
268
+ fn=Web_Fetch,
269
+ inputs=[
270
+ gr.Textbox(label="URL", placeholder="https://example.com/article", max_lines=1),
271
+ gr.Slider(
272
+ minimum=0,
273
+ maximum=10000,
274
+ value=3000,
275
+ step=100,
276
+ label="Max Characters",
277
+ info="0 = no limit (full page), default 3000",
278
+ ),
279
+ gr.Slider(
280
+ minimum=0,
281
+ maximum=100000,
282
+ value=0,
283
+ step=100,
284
+ label="Offset",
285
+ info="Character offset to start from (use next_cursor from previous call for pagination)",
286
+ ),
287
+ gr.Textbox(
288
+ label="Strip Selectors",
289
+ placeholder=".header, .footer, nav, .sidebar",
290
+ value="",
291
+ max_lines=1,
292
+ info="CSS selectors to remove (comma-separated)",
293
+ ),
294
+ gr.Radio(
295
+ label="Mode",
296
+ choices=["Markdown Mode", "HTML Mode", "URL Scraper"],
297
+ value="Markdown Mode",
298
+ info="Markdown cleans content, HTML returns raw response, URL Scraper lists links.",
299
+ ),
300
+ ],
301
+ outputs=gr.Markdown(label="Extracted Content"),
302
+ title="Web Fetch",
303
+ description=(
304
+ "<div style=\"text-align:center\">Convert any webpage to Markdown, inspect the raw HTML response, or "
305
+ "extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
306
+ ),
307
+ api_description=TOOL_SUMMARY,
308
+ flagging_mode="never",
309
+ )
310
+
311
+
312
+ __all__ = [
313
+ "Web_Fetch",
314
+ "build_interface",
315
+ "_http_get_enhanced",
316
+ "_fullpage_markdown_from_soup",
317
+ ]
Modules/Web_Search.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Annotated, List
4
+ from datetime import datetime
5
+
6
+ import gradio as gr
7
+ from ddgs import DDGS
8
+
9
+ from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
10
+ from ._docstrings import autodoc
11
+
12
+
13
+ # Single source of truth for the LLM-facing tool description
14
+ TOOL_SUMMARY = (
15
+ "Run a DuckDuckGo-backed search across text, news, images, videos, or books. "
16
+ "Readable results include pagination hints and next_offset when more results are available; "
17
+ "Use in combination with `Web_Fetch` to navigate the web."
18
+ )
19
+
20
+
21
+ _SAFESEARCH_LEVEL = "off"
22
+
23
+ # Defaults and choices for newly added parameters
24
+ BACKEND_CHOICES = [
25
+ "auto",
26
+ "duckduckgo",
27
+ "bing",
28
+ "brave",
29
+ "yahoo",
30
+ "wikipedia",
31
+ ]
32
+
33
+ # Allowed backends per type (explicit selection set)
34
+ _ALLOWED_BACKENDS = {
35
+ "text": ["duckduckgo", "bing", "brave", "yahoo", "wikipedia"],
36
+ "news": ["duckduckgo", "bing", "yahoo"],
37
+ "images": ["duckduckgo"],
38
+ "videos": ["duckduckgo"],
39
+ "books": ["annasarchive"],
40
+ }
41
+
42
+ # Auto order per type (used when backend == "auto"); wikipedia excluded for text
43
+ _AUTO_ORDER = {
44
+ "text": ["duckduckgo", "bing", "brave", "yahoo"],
45
+ "news": ["duckduckgo", "bing", "yahoo"],
46
+ "images": ["duckduckgo"],
47
+ "videos": ["duckduckgo"],
48
+ "books": ["annasarchive"],
49
+ }
50
+
51
+ # Date filter choices: canonical values used by resolver
52
+ DATE_FILTER_CHOICES = ["any", "day", "week", "month", "year"]
53
+
54
+
55
+ def _resolve_backend(search_type: str, backend_choice: str) -> str:
56
+ """Resolve backend string for DDGS based on search type and user choice.
57
+
58
+ - If backend_choice is "auto", return a comma-separated fallback order for that type.
59
+ - If backend_choice is not supported by the type, fall back to the first allowed backend.
60
+ - Books endpoint uses only 'annasarchive'.
61
+ """
62
+ stype = search_type if search_type in _ALLOWED_BACKENDS else "text"
63
+ allowed = _ALLOWED_BACKENDS[stype]
64
+ if backend_choice == "auto":
65
+ return ", ".join(_AUTO_ORDER[stype])
66
+ if stype == "books":
67
+ return "annasarchive"
68
+ # Validate backend against allowed set for this type
69
+ if backend_choice in allowed:
70
+ return backend_choice
71
+ # Fallback to first allowed backend
72
+ return allowed[0]
73
+
74
+
75
+ def _resolve_timelimit(date_filter: str, search_type: str) -> str | None:
76
+ """Map UI date filter to DDGS timelimit code per endpoint.
77
+
78
+ Returns one of: None, 'd', 'w', 'm', 'y'. For news/videos (which support d/w/m),
79
+ selecting 'year' will coerce to 'm' to stay within supported range.
80
+ """
81
+ normalized = (date_filter or "any").strip().lower()
82
+ if normalized in ("any", "none", ""):
83
+ return None
84
+ mapping = {
85
+ "day": "d",
86
+ "week": "w",
87
+ "month": "m",
88
+ "year": "y",
89
+ }
90
+ code = mapping.get(normalized)
91
+ if not code:
92
+ return None
93
+ if search_type in ("news", "videos") and code == "y":
94
+ return "m"
95
+ return code
96
+
97
+
98
+ def _extract_date_from_snippet(snippet: str) -> str:
99
+ if not snippet:
100
+ return ""
101
+ import re
102
+
103
+ date_patterns = [
104
+ r"\b(\d{4}[-/]\d{1,2}[-/]\d{1,2})\b",
105
+ r"\b([A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})\b",
106
+ r"\b(\d{1,2}\s+[A-Za-z]{3,9}\s+\d{4})\b",
107
+ r"\b(\d+\s+(?:day|week|month|year)s?\s+ago)\b",
108
+ r"(?:Published|Updated|Posted):\s*([^,\n]+?)(?:[,\n]|$)",
109
+ ]
110
+ for pattern in date_patterns:
111
+ matches = re.findall(pattern, snippet, re.IGNORECASE)
112
+ if matches:
113
+ return matches[0].strip()
114
+ return ""
115
+
116
+
117
+ def _format_search_result(result: dict, search_type: str, index: int) -> List[str]:
118
+ lines: List[str] = []
119
+ if search_type == "text":
120
+ title = result.get("title", "").strip()
121
+ url = result.get("href", "").strip()
122
+ snippet = result.get("body", "").strip()
123
+ date = _extract_date_from_snippet(snippet)
124
+ lines.append(f"{index}. {title}")
125
+ lines.append(f" URL: {url}")
126
+ if snippet:
127
+ lines.append(f" Summary: {snippet}")
128
+ if date:
129
+ lines.append(f" Date: {date}")
130
+ elif search_type == "news":
131
+ title = result.get("title", "").strip()
132
+ url = result.get("url", "").strip()
133
+ body = result.get("body", "").strip()
134
+ date = result.get("date", "").strip()
135
+ source = result.get("source", "").strip()
136
+ lines.append(f"{index}. {title}")
137
+ lines.append(f" URL: {url}")
138
+ if source:
139
+ lines.append(f" Source: {source}")
140
+ if date:
141
+ lines.append(f" Date: {date}")
142
+ if body:
143
+ lines.append(f" Summary: {body}")
144
+ elif search_type == "images":
145
+ title = result.get("title", "").strip()
146
+ image_url = result.get("image", "").strip()
147
+ source_url = result.get("url", "").strip()
148
+ source = result.get("source", "").strip()
149
+ width = result.get("width", "")
150
+ height = result.get("height", "")
151
+ lines.append(f"{index}. {title}")
152
+ lines.append(f" Image: {image_url}")
153
+ lines.append(f" Source: {source_url}")
154
+ if source:
155
+ lines.append(f" Publisher: {source}")
156
+ if width and height:
157
+ lines.append(f" Dimensions: {width}x{height}")
158
+ elif search_type == "videos":
159
+ title = result.get("title", "").strip()
160
+ description = result.get("description", "").strip()
161
+ duration = result.get("duration", "").strip()
162
+ published = result.get("published", "").strip()
163
+ uploader = result.get("uploader", "").strip()
164
+ embed_url = result.get("embed_url", "").strip()
165
+ lines.append(f"{index}. {title}")
166
+ if embed_url:
167
+ lines.append(f" Video: {embed_url}")
168
+ if uploader:
169
+ lines.append(f" Uploader: {uploader}")
170
+ if duration:
171
+ lines.append(f" Duration: {duration}")
172
+ if published:
173
+ lines.append(f" Published: {published}")
174
+ if description:
175
+ lines.append(f" Description: {description}")
176
+ elif search_type == "books":
177
+ title = result.get("title", "").strip()
178
+ url = result.get("url", "").strip()
179
+ body = result.get("body", "").strip()
180
+ lines.append(f"{index}. {title}")
181
+ lines.append(f" URL: {url}")
182
+ if body:
183
+ lines.append(f" Description: {body}")
184
+ return lines
185
+
186
+
187
+ @autodoc(
188
+ summary=TOOL_SUMMARY,
189
+ )
190
+ def Web_Search(
191
+ query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
192
+ max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
193
+ page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
194
+ offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
195
+ search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
196
+ backend: Annotated[str, "Search backend or ordered fallbacks. Use 'auto' for recommended order."] = "auto",
197
+ date_filter: Annotated[str, "Time filter: any, day, week, month, year."] = "any",
198
+ ) -> str:
199
+ _log_call_start(
200
+ "Web_Search",
201
+ query=query,
202
+ max_results=max_results,
203
+ page=page,
204
+ search_type=search_type,
205
+ offset=offset,
206
+ backend=backend,
207
+ date_filter=date_filter,
208
+ )
209
+ if not query or not query.strip():
210
+ result = "No search query provided. Please enter a search term."
211
+ _log_call_end("Web_Search", _truncate_for_log(result))
212
+ return result
213
+ max_results = max(1, min(20, max_results))
214
+ page = max(1, page)
215
+ offset = max(0, offset)
216
+ valid_types = ["text", "news", "images", "videos", "books"]
217
+ if search_type not in valid_types:
218
+ search_type = "text"
219
+ if offset > 0:
220
+ actual_offset = offset
221
+ calculated_page = (offset // max_results) + 1
222
+ else:
223
+ actual_offset = (page - 1) * max_results
224
+ calculated_page = page
225
+ total_needed = actual_offset + max_results
226
+ used_fallback = False
227
+ original_search_type = search_type
228
+ # Prepare cross-cutting parameters
229
+ resolved_backend = _resolve_backend(search_type, (backend or "auto").lower())
230
+ timelimit = _resolve_timelimit(date_filter, search_type)
231
+
232
+ def _perform_search(stype: str) -> list[dict]:
233
+ try:
234
+ _search_rate_limiter.acquire()
235
+ with DDGS() as ddgs:
236
+ if stype == "text":
237
+ user_backend_choice = (backend or "auto").lower()
238
+ if user_backend_choice == "auto":
239
+ # Custom auto: DDG first, then append other engines
240
+ results: list[dict] = []
241
+ seen: set[str] = set()
242
+
243
+ def add_unique(items: list[dict], key_field: str) -> None:
244
+ for it in items or []:
245
+ url = (it.get(key_field, "") or "").strip()
246
+ if url and url not in seen:
247
+ seen.add(url)
248
+ results.append(it)
249
+
250
+ # First: duckduckgo
251
+ try:
252
+ ddg_items = list(
253
+ ddgs.text(
254
+ query,
255
+ max_results=total_needed + 10,
256
+ safesearch=_SAFESEARCH_LEVEL,
257
+ timelimit=timelimit,
258
+ backend="duckduckgo",
259
+ )
260
+ )
261
+ except Exception:
262
+ ddg_items = []
263
+ add_unique(ddg_items, "href")
264
+
265
+ # Then: other engines appended (excluding duckduckgo)
266
+ for eng in [b for b in _AUTO_ORDER["text"] if b != "duckduckgo"]:
267
+ try:
268
+ extra = list(
269
+ ddgs.text(
270
+ query,
271
+ max_results=total_needed + 10,
272
+ safesearch=_SAFESEARCH_LEVEL,
273
+ timelimit=timelimit,
274
+ backend=eng,
275
+ )
276
+ )
277
+ except Exception:
278
+ extra = []
279
+ add_unique(extra, "href")
280
+
281
+ return results
282
+ else:
283
+ raw_gen = ddgs.text(
284
+ query,
285
+ max_results=total_needed + 10,
286
+ safesearch=_SAFESEARCH_LEVEL,
287
+ timelimit=timelimit,
288
+ backend=resolved_backend,
289
+ )
290
+ elif stype == "news":
291
+ user_backend_choice = (backend or "auto").lower()
292
+ if user_backend_choice == "auto":
293
+ # Custom auto: DDG first, then append other engines
294
+ results: list[dict] = []
295
+ seen: set[str] = set()
296
+
297
+ def add_unique(items: list[dict], key_field: str) -> None:
298
+ for it in items or []:
299
+ url = (it.get(key_field, "") or "").strip()
300
+ if url and url not in seen:
301
+ seen.add(url)
302
+ results.append(it)
303
+
304
+ # First: duckduckgo news
305
+ try:
306
+ ddg_news = list(
307
+ ddgs.news(
308
+ query,
309
+ max_results=total_needed + 10,
310
+ safesearch=_SAFESEARCH_LEVEL,
311
+ timelimit=timelimit,
312
+ backend="duckduckgo",
313
+ )
314
+ )
315
+ except Exception:
316
+ ddg_news = []
317
+ add_unique(ddg_news, "url")
318
+
319
+ # Then: other news engines appended
320
+ for eng in [b for b in _AUTO_ORDER["news"] if b != "duckduckgo"]:
321
+ try:
322
+ extra = list(
323
+ ddgs.news(
324
+ query,
325
+ max_results=total_needed + 10,
326
+ safesearch=_SAFESEARCH_LEVEL,
327
+ timelimit=timelimit,
328
+ backend=eng,
329
+ )
330
+ )
331
+ except Exception:
332
+ extra = []
333
+ add_unique(extra, "url")
334
+
335
+ return results
336
+ else:
337
+ raw_gen = ddgs.news(
338
+ query,
339
+ max_results=total_needed + 10,
340
+ safesearch=_SAFESEARCH_LEVEL,
341
+ timelimit=timelimit,
342
+ backend=_resolve_backend("news", (backend or "auto").lower()),
343
+ )
344
+ elif stype == "images":
345
+ raw_gen = ddgs.images(
346
+ query,
347
+ max_results=total_needed + 10,
348
+ safesearch=_SAFESEARCH_LEVEL,
349
+ timelimit=timelimit,
350
+ backend=_resolve_backend("images", (backend or "auto").lower()),
351
+ )
352
+ elif stype == "videos":
353
+ raw_gen = ddgs.videos(
354
+ query,
355
+ max_results=total_needed + 10,
356
+ safesearch=_SAFESEARCH_LEVEL,
357
+ timelimit=timelimit,
358
+ backend=_resolve_backend("videos", (backend or "auto").lower()),
359
+ )
360
+ else:
361
+ raw_gen = ddgs.books(
362
+ query,
363
+ max_results=total_needed + 10,
364
+ backend=_resolve_backend("books", (backend or "auto").lower()),
365
+ )
366
+ try:
367
+ return list(raw_gen)
368
+ except Exception as inner_exc:
369
+ if "no results" in str(inner_exc).lower() or "not found" in str(inner_exc).lower():
370
+ return []
371
+ raise inner_exc
372
+ except Exception as exc:
373
+ error_msg = f"Search failed: {str(exc)[:200]}"
374
+ lowered = str(exc).lower()
375
+ if "blocked" in lowered or "rate" in lowered:
376
+ error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
377
+ elif "timeout" in lowered:
378
+ error_msg = "Search timed out. Please try again with a simpler query."
379
+ elif "network" in lowered or "connection" in lowered:
380
+ error_msg = "Network connection error. Please check your internet connection and try again."
381
+ elif "no results" in lowered or "not found" in lowered:
382
+ return []
383
+ raise Exception(error_msg)
384
+
385
+ try:
386
+ raw = _perform_search(search_type)
387
+ except Exception as exc:
388
+ result = f"Error: {exc}"
389
+ _log_call_end("Web_Search", _truncate_for_log(result))
390
+ return result
391
+
392
+ if not raw and search_type == "news":
393
+ try:
394
+ raw = _perform_search("text")
395
+ if raw:
396
+ used_fallback = True
397
+ search_type = "text"
398
+ except Exception:
399
+ pass
400
+
401
+ if not raw:
402
+ fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else ""
403
+ result = f"No {original_search_type} results found for query: {query}{fallback_note}"
404
+ _log_call_end("Web_Search", _truncate_for_log(result))
405
+ return result
406
+
407
+ paginated_results = raw[actual_offset: actual_offset + max_results]
408
+ if not paginated_results:
409
+ if actual_offset >= len(raw):
410
+ result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning."
411
+ else:
412
+ result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number."
413
+ _log_call_end("Web_Search", _truncate_for_log(result))
414
+ return result
415
+
416
+ total_available = len(raw)
417
+ start_num = actual_offset + 1
418
+ end_num = actual_offset + len(paginated_results)
419
+ next_offset = actual_offset + len(paginated_results)
420
+ search_label = original_search_type.title()
421
+ if used_fallback:
422
+ search_label += " → Text (Smart Fallback)"
423
+
424
+ now_dt = datetime.now().astimezone()
425
+ date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
426
+ if not date_str:
427
+ date_str = now_dt.isoformat()
428
+
429
+ pagination_info = f"Page {calculated_page}"
430
+ if offset > 0:
431
+ pagination_info = f"Offset {actual_offset} (≈ {pagination_info})"
432
+ lines = [f"Current Date: {date_str}", f"{search_label} search results for: {query}"]
433
+ if used_fallback:
434
+ lines.append("📍 Note: News search returned no results, automatically searched general web content instead")
435
+ lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
436
+ for i, result in enumerate(paginated_results, start_num):
437
+ result_lines = _format_search_result(result, search_type, i)
438
+ lines.extend(result_lines)
439
+ lines.append("")
440
+ if total_available > end_num:
441
+ lines.append("💡 More results available:")
442
+ lines.append(f" • Next page: page={calculated_page + 1}")
443
+ lines.append(f" • Next offset: offset={next_offset}")
444
+ lines.append(f" • Use offset={next_offset} to continue exactly from result {next_offset + 1}")
445
+ result = "\n".join(lines)
446
+ search_info = f"type={original_search_type}"
447
+ if used_fallback:
448
+ search_info += "→text"
449
+ _log_call_end("Web_Search", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}")
450
+ return result
451
+
452
+
453
+ def build_interface() -> gr.Interface:
454
+ return gr.Interface(
455
+ fn=Web_Search,
456
+ inputs=[
457
+ gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1, info="The search query"),
458
+ gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results", info="Number of results to return (1–20)"),
459
+ gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
460
+ gr.Slider(
461
+ minimum=0,
462
+ maximum=1000,
463
+ value=0,
464
+ step=1,
465
+ label="Offset",
466
+ info="Result offset to start from (overrides page if > 0, use next_offset from previous search)",
467
+ ),
468
+ gr.Radio(
469
+ label="Search Type",
470
+ choices=["text", "news", "images", "videos", "books"],
471
+ value="text",
472
+ info="Type of content to search for",
473
+ ),
474
+ gr.Radio(
475
+ label="Backend",
476
+ choices=BACKEND_CHOICES,
477
+ value="auto",
478
+ info="Search engine backend or fallback order (auto applies recommended order)",
479
+ ),
480
+ gr.Radio(
481
+ label="Date filter",
482
+ choices=DATE_FILTER_CHOICES,
483
+ value="any",
484
+ info="Limit results to: day, week, month, or year (varies by type)",
485
+ ),
486
+ ],
487
+ outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20),
488
+ title="Web Search",
489
+ description=(
490
+ "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and flexible pagination. "
491
+ "Supports text, news, images, videos, and books. Features smart fallback for news searches and precise offset control.</div>"
492
+ ),
493
+ api_description=TOOL_SUMMARY,
494
+ flagging_mode="never",
495
+ submit_btn="Search",
496
+ )
497
+
498
+
499
+ __all__ = ["Web_Search", "build_interface"]
Modules/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Gradio tool modules bundled for Nymbo-Tools."""
Modules/_docstrings.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from typing import Any, Annotated, get_args, get_origin, get_type_hints
5
+
6
+
7
+ def _typename(tp: Any) -> str:
8
+ """Return a readable type name from a type or annotation."""
9
+ try:
10
+ if hasattr(tp, "__name__"):
11
+ return tp.__name__ # e.g. int, str
12
+ if getattr(tp, "__module__", None) and getattr(tp, "__qualname__", None):
13
+ return f"{tp.__module__}.{tp.__qualname__}"
14
+ return str(tp).replace("typing.", "")
15
+ except Exception:
16
+ return str(tp)
17
+
18
+
19
+ def _extract_base_and_meta(annotation: Any) -> tuple[Any, str | None]:
20
+ """Given an annotation, return (base_type, first string metadata) if Annotated, else (annotation, None)."""
21
+ try:
22
+ if get_origin(annotation) is Annotated:
23
+ args = get_args(annotation)
24
+ base = args[0] if args else annotation
25
+ # Grab the first string metadata if present
26
+ for meta in args[1:]:
27
+ if isinstance(meta, str):
28
+ return base, meta
29
+ return base, None
30
+ return annotation, None
31
+ except Exception:
32
+ return annotation, None
33
+
34
+
35
+ def autodoc(summary: str | None = None, returns: str | None = None, *, force: bool = False):
36
+ """
37
+ Decorator that auto-generates a concise Google-style docstring from a function's
38
+ type hints and Annotated metadata. Useful for Gradio MCP where docstrings are
39
+ used for tool descriptions and parameter docs.
40
+
41
+ Args:
42
+ summary: Optional one-line summary for the function. If not provided,
43
+ will generate a simple sentence from the function name.
44
+ returns: Optional return value description. If not provided, only the
45
+ return type will be listed (if available).
46
+ force: When True, overwrite an existing docstring. Default False.
47
+
48
+ Returns:
49
+ The original function with its __doc__ populated (unless skipped).
50
+ """
51
+
52
+ def decorator(func):
53
+ # Skip if docstring already present and not forcing
54
+ if not force and func.__doc__ and func.__doc__.strip():
55
+ return func
56
+
57
+ try:
58
+ # include_extras=True to retain Annotated metadata
59
+ hints = get_type_hints(func, include_extras=True, globalns=getattr(func, "__globals__", None))
60
+ except Exception:
61
+ hints = {}
62
+
63
+ sig = inspect.signature(func)
64
+
65
+ lines: list[str] = []
66
+ # Summary line
67
+ if summary and summary.strip():
68
+ lines.append(summary.strip())
69
+ else:
70
+ pretty = func.__name__.replace("_", " ").strip().capitalize()
71
+ if not pretty.endswith("."):
72
+ pretty += "."
73
+ lines.append(pretty)
74
+
75
+ # Args section
76
+ if sig.parameters:
77
+ lines.append("")
78
+ lines.append("Args:")
79
+ for name, param in sig.parameters.items():
80
+ if name == "self":
81
+ continue
82
+ annot = hints.get(name, param.annotation)
83
+ base, meta = _extract_base_and_meta(annot)
84
+ tname = _typename(base) if base is not inspect._empty else None
85
+ desc = meta or ""
86
+ if tname and tname != str(inspect._empty):
87
+ lines.append(f" {name} ({tname}): {desc}".rstrip())
88
+ else:
89
+ lines.append(f" {name}: {desc}".rstrip())
90
+
91
+ # Returns section
92
+ ret_hint = hints.get("return", sig.return_annotation)
93
+ if returns or (ret_hint and ret_hint is not inspect.Signature.empty):
94
+ lines.append("")
95
+ lines.append("Returns:")
96
+ if returns:
97
+ lines.append(f" {returns}")
98
+ else:
99
+ base, meta = _extract_base_and_meta(ret_hint)
100
+ rtype = _typename(base)
101
+ if meta:
102
+ lines.append(f" {rtype}: {meta}")
103
+ else:
104
+ lines.append(f" {rtype}")
105
+
106
+ func.__doc__ = "\n".join(lines).strip() + "\n"
107
+ return func
108
+
109
+ return decorator
110
+
111
+
112
+ __all__ = ["autodoc"]
Obsidian/demo.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Hello, World
2
+
3
+ Clone this server locally and set your Obsidian vault's root path as an environment variable `OBSIDIAN_VAULT_ROOT` to access your own files.
README.md CHANGED
@@ -1,12 +1,216 @@
1
  ---
2
- title: Clone Tools
3
- emoji: 📚
4
- colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.0.2
8
  app_file: app.py
9
- pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Nymbo Tools MCP
3
+ emoji: ⚙️
4
+ colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 6.0.0
8
  app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
+ short_description: All-in-one hub of general purpose tools useful for any agent
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ ## Nymbo-Tools MCP Server
17
+
18
+ All-in-one hub of general-purpose tools useful for any agent. Run it as a Gradio web app, or connect to it remotely as a Model Context Protocol (MCP) server to call its tools programmatically.
19
+
20
+ Live Space: https://huggingface.co/spaces/Nymbo/Tools
21
+
22
+ ### What’s inside
23
+
24
+ - Web Fetch: Turn any webpage into clean Markdown with optional link-only scraping, CSS selector stripping, length limits, and pagination via cursor offset.
25
+ - Web Search: DuckDuckGo-backed search across text, news, images, videos, and books with readable, paginated output.
26
+ - Code Interpreter: Execute small Python snippets and capture stdout.
27
+ - Memory Manager: Lightweight JSON-based memory store with save/list/search/delete and tag filters.
28
+ - Generate Speech: Kokoro‑82M TTS with 54 voices and adjustable speed (CPU or CUDA if available).
29
+ - Generate Image: Text-to-image via Hugging Face serverless inference (choose model, steps, CFG, size, seed).
30
+ - Generate Video: Text-to-video via Hugging Face serverless inference (model, steps, guidance, size, fps, duration, seed).
31
+ - Deep Research: Multi-query web research pipeline (DDG search + parallel fetch + LLM report synthesis) with downloadable report.
32
+ - File System: Safe, sandboxed filesystem operations under a tool root.
33
+ - Shell Command: Run shell commands inside the same safe root as File System.
34
+
35
+ ## Quick start
36
+
37
+ Run the following commands in sequence to run the server locally:
38
+
39
+ ```shell
40
+ git clone https://huggingface.co/spaces/Nymbo/Tools
41
+ cd Tools
42
+ python -m venv env
43
+ source env/bin/activate
44
+ pip install -r requirements.txt
45
+ python app.py
46
+ ```
47
+
48
+ Defaults:
49
+ - The Gradio UI typically serves on http://127.0.0.1:7860
50
+ - The MCP endpoint is available at http://127.0.0.1:7860/gradio_api/mcp/
51
+
52
+ ## Using it as an MCP server
53
+
54
+ Remote MCP (hosted):
55
+ - Base URL: https://mcp.nymbo.net/gradio_api/mcp/
56
+ - SSE endpoint (for clients that need it): https://mcp.nymbo.net/gradio_api/mcp/sse
57
+
58
+ Local MCP (when you run app.py):
59
+ - Base URL: http://127.0.0.1:7860/gradio_api/mcp/
60
+ - SSE endpoint: http://127.0.0.1:7860/gradio_api/mcp/sse
61
+
62
+ Example client config (JSON):
63
+
64
+ ```json
65
+ {
66
+ "mcpServers": {
67
+ "nymbo-tools": {
68
+ "url": "https://mcp.nymbo.net/gradio_api/mcp/"
69
+ }
70
+ }
71
+ }
72
+ ```
73
+
74
+ ## Environment variables (optional but recommended)
75
+
76
+ - HF_READ_TOKEN: Enables Image Generation, Video Generation, and Deep Research (Hugging Face serverless inference). These tools stay visible to MCP clients but calls require a valid token to succeed.
77
+ - HF_TOKEN: Alternative token fallback used by some providers (also enables Deep Research/Video).
78
+ - NYMBO_TOOLS_ROOT: Overrides the File System/Shell working root. Defaults to Nymbo-Tools/Filesystem.
79
+ - UNSAFE_ALLOW_ABS_PATHS=1: Allow absolute paths in File System and Shell Command (off by default for safety).
80
+
81
+ Notes:
82
+ - Without a HF API key, you can still use Web Fetch, Web Search, Code Interpreter, Memory Manager, File System, Shell Command, and Generate Speech.
83
+ - Generate Speech requires the kokoro package and its dependencies; it works on CPU and uses CUDA if available. Doesn't require an API key because it's computed on the server itself.
84
+
85
+ ## Persistence and privacy
86
+
87
+ - Memory Manager stores entries in `memories.json` at the Nymbo-Tools folder root when running locally.
88
+ - File System defaults to the `Filesystem/` directory under Nymbo-Tools.
89
+ - In the public demo Space, storage is ephemeral and visible to anyone using the Space; avoid personal or sensitive data.
90
+
91
+ ## Tool reference (signatures and behavior)
92
+
93
+ Below are the MCP tool parameters summarized by inputs, outputs, and notable behaviors.
94
+
95
+ ### Web_Fetch (Webpages, converted to Markdown)
96
+ Inputs:
97
+ - url (str): Absolute URL to fetch (must return HTML).
98
+ - max_chars (int, default 3000): 0 = full page; otherwise truncates with a next_cursor notice.
99
+ - strip_selectors (str): Comma-separated CSS selectors to remove (e.g., .header, .footer, nav).
100
+ - url_scraper (bool): If true, return only a list of links from the page.
101
+ - offset (int): Character offset for pagination; pass the previous next_cursor to continue.
102
+
103
+ Output: Markdown string. If truncated, includes a next_cursor to continue.
104
+
105
+ ### Web_Search (DuckDuckGo backend)
106
+ Inputs:
107
+ - query (str): DuckDuckGo query (supports site:, quotes, OR).
108
+ - max_results (int 1–20, default 5)
109
+ - page (int, default 1) or offset (int) for precise continuation
110
+ - search_type (str): "text" | "news" | "images" | "videos" | "books"
111
+
112
+ Output: Readable text with pagination hints and next_offset.
113
+
114
+ ### Code_Interpreter (Python)
115
+ Inputs:
116
+ - code (str): Python source; stdout is captured.
117
+
118
+ Output: Captured stdout or the exception text.
119
+
120
+ ### Memory_Manager (Simple JSON store)
121
+ Inputs:
122
+ - action: "save" | "list" | "search" | "delete"
123
+ - text (save only), tags (save only)
124
+ - query (search only): supports tag:name terms and AND/OR
125
+ - limit (list/search): default 20
126
+ - memory_id (delete): full UUID or unique prefix
127
+ - include_tags (bool): include tags when listing/searching
128
+
129
+ Output: Confirmation string, listing, search matches, or structured error text.
130
+
131
+ ### Generate_Speech (Kokoro-82M)
132
+ Inputs:
133
+ - text (str)
134
+ - speed (float 0.5–2.0, default 1.25)
135
+ - voice (str): One of 54 voices (e.g., af_heart, am_liam, bf_alice, zf_xiaoyi…)
136
+
137
+ Output: (sample_rate:int, waveform:np.ndarray) – rendered as downloadable WAV in the UI.
138
+
139
+ ### Generate_Image (HF inference)
140
+ Requires: HF_READ_TOKEN
141
+
142
+ Inputs:
143
+ - prompt (str)
144
+ - model_id (str): e.g., black-forest-labs/FLUX.1-Krea-dev
145
+ - negative_prompt (str)
146
+ - steps (1–100), cfg_scale (1–20), sampler (UI label), seed (-1=random), width/height
147
+
148
+ Output: PIL.Image. In UI, displayed and downloadable. Errors guide you to provide a token or fix model id.
149
+
150
+ ### Generate_Video (HF inference)
151
+ Requires: HF_READ_TOKEN or HF_TOKEN
152
+
153
+ Inputs:
154
+ - prompt (str)
155
+ - model_id (str): default akhaliq/sora-2
156
+ - negative_prompt (str)
157
+ - steps (1–100), cfg_scale, seed, width/height, fps, duration (s)
158
+
159
+ Output: Temporary MP4 file path; UI shows a playable/downloadable video.
160
+
161
+ ### Deep_Research (HF inference)
162
+ Requires: HF_READ_TOKEN or HF_TOKEN
163
+
164
+ Inputs:
165
+ - summary (str): One or more sentences describing the research task.
166
+ - query1..query5 (str) with max1..max5 (1–50). Total requested results across queries are capped at 50.
167
+
168
+ Behavior:
169
+ - Parallel DDG searches → fetch pages in budget → filter candidate sources with an LLM → synthesize a long, well-structured Markdown report and list of sources.
170
+
171
+ Output: (report_md, fetched_links_text, report_file_path)
172
+
173
+ ### File_System (safe root)
174
+ Root:
175
+ - Defaults to `Nymbo-Tools/Filesystem` (or NYMBO_TOOLS_ROOT). Absolute paths disabled unless UNSAFE_ALLOW_ABS_PATHS=1.
176
+
177
+ Actions:
178
+ - list, read, write, append, mkdir, move, copy, delete, info, help
179
+
180
+ Key fields:
181
+ - path, content (write/append), dest_path (move/copy), recursive, show_hidden, max_entries, offset, max_chars, create_dirs, overwrite
182
+
183
+ Output:
184
+ - Human-readable listings or JSON-like error strings with code/message/hint.
185
+
186
+ ### Shell_Command (same safe root)
187
+ Inputs:
188
+ - command (str): Single-string shell command (pipelines supported by the host shell).
189
+ - workdir (str): Relative to the root.
190
+ - timeout (s)
191
+
192
+ Output:
193
+ - Combined header + STDOUT/STDERR. Absolute paths disabled by default. Shell is detected automatically (PowerShell on Windows when available; bash/sh on POSIX).
194
+
195
+ ## Running on Hugging Face Spaces
196
+
197
+ 1) Duplicate the Space at https://huggingface.co/spaces/Nymbo/Tools.
198
+ 2) In Space Settings → Secrets, add HF_READ_TOKEN (and/or HF_TOKEN) for model access.
199
+ 3) Both the UI and MCP clients will list every tool. Image/Video/Deep Research still need a valid token when invoked.
200
+
201
+ ## Troubleshooting
202
+
203
+ - Image/Video/Deep Research calls fail immediately:
204
+ - Provide HF_READ_TOKEN (and optionally HF_TOKEN). Restart the app/Space.
205
+ - 401/403 when calling generation tools:
206
+ - Token missing/insufficient permissions. Ensure your token can read the chosen model.
207
+ - Kokoro not found:
208
+ - Install kokoro>=0.9.4. CPU works; CUDA used if available. Torch may be skipped on Apple Silicon by design.
209
+ - Windows PowerShell activation policy blocks venv activation:
210
+ - Run PowerShell as Admin and set a suitable execution policy for the current user (e.g., RemoteSigned), or manually run `python app.py` after installing dependencies.
211
+ - File System or Shell path errors:
212
+ - Paths are relative to the tool root. Set NYMBO_TOOLS_ROOT to customize. Set UNSAFE_ALLOW_ABS_PATHS=1 only if you fully trust the environment.
213
+
214
+ ## License
215
+
216
+ Apache-2.0 (see Space metadata). If you duplicate the Space or use these tools, ensure your usage complies with the licenses and terms of the underlying models and providers.
app.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # Project by Nymbo
4
+
5
+ import json
6
+ import os
7
+ import sys
8
+ import threading
9
+ import time
10
+ from datetime import datetime, timedelta
11
+ from typing import Any
12
+
13
+ import gradio as gr
14
+
15
+
16
+ class RateLimiter:
17
+ """Best-effort in-process rate limiter for HTTP-heavy tools."""
18
+
19
+ def __init__(self, requests_per_minute: int = 30) -> None:
20
+ self.requests_per_minute = requests_per_minute
21
+ self._requests: list[datetime] = []
22
+ self._lock = threading.Lock()
23
+
24
+ def acquire(self) -> None:
25
+ now = datetime.now()
26
+ with self._lock:
27
+ self._requests = [req for req in self._requests if now - req < timedelta(minutes=1)]
28
+ if len(self._requests) >= self.requests_per_minute:
29
+ wait_time = 60 - (now - self._requests[0]).total_seconds()
30
+ if wait_time > 0:
31
+ time.sleep(max(1, wait_time))
32
+ self._requests.append(now)
33
+
34
+
35
+ _search_rate_limiter = RateLimiter(requests_per_minute=20)
36
+ _fetch_rate_limiter = RateLimiter(requests_per_minute=25)
37
+
38
+
39
+ def _truncate_for_log(value: Any, limit: int = 500) -> str:
40
+ if not isinstance(value, str):
41
+ value = str(value)
42
+ if len(value) <= limit:
43
+ return value
44
+ return value[: limit - 1] + "…"
45
+
46
+
47
+ def _serialize_input(val: Any) -> Any:
48
+ try:
49
+ if isinstance(val, (str, int, float, bool)) or val is None:
50
+ return val
51
+ if isinstance(val, (list, tuple)):
52
+ return [_serialize_input(v) for v in list(val)[:10]] + (["…"] if len(val) > 10 else [])
53
+ if isinstance(val, dict):
54
+ out: dict[str, Any] = {}
55
+ for i, (k, v) in enumerate(val.items()):
56
+ if i >= 12:
57
+ out["…"] = "…"
58
+ break
59
+ out[str(k)] = _serialize_input(v)
60
+ return out
61
+ return repr(val)[:120]
62
+ except Exception:
63
+ return "<unserializable>"
64
+
65
+
66
+ def _log_call_start(func_name: str, **kwargs: Any) -> None:
67
+ try:
68
+ compact = {k: _serialize_input(v) for k, v in kwargs.items()}
69
+ print(f"[TOOL CALL] {func_name} inputs: {json.dumps(compact, ensure_ascii=False)[:800]}", flush=True)
70
+ except Exception as exc:
71
+ print(f"[TOOL CALL] {func_name} (failed to log inputs: {exc})", flush=True)
72
+
73
+
74
+ def _log_call_end(func_name: str, output_desc: str) -> None:
75
+ try:
76
+ print(f"[TOOL RESULT] {func_name} output: {output_desc}", flush=True)
77
+ except Exception as exc:
78
+ print(f"[TOOL RESULT] {func_name} (failed to log output: {exc})", flush=True)
79
+
80
+ # Ensure Tools modules can import 'app' when this file is executed as a script
81
+ # (their code does `from app import ...`).
82
+ sys.modules.setdefault("app", sys.modules[__name__])
83
+
84
+ # Import per-tool interface builders from the Tools package
85
+ from Modules.Web_Fetch import build_interface as build_fetch_interface
86
+ from Modules.Web_Search import build_interface as build_search_interface
87
+ from Modules.Agent_Terminal import build_interface as build_agent_terminal_interface
88
+ from Modules.Code_Interpreter import build_interface as build_code_interface
89
+ from Modules.Memory_Manager import build_interface as build_memory_interface
90
+ from Modules.Generate_Speech import build_interface as build_speech_interface
91
+ from Modules.Generate_Image import build_interface as build_image_interface
92
+ from Modules.Generate_Video import build_interface as build_video_interface
93
+ from Modules.Deep_Research import build_interface as build_research_interface
94
+ from Modules.File_System import build_interface as build_fs_interface
95
+ from Modules.Obsidian_Vault import build_interface as build_obsidian_interface
96
+ from Modules.Shell_Command import build_interface as build_shell_interface
97
+
98
+ # Optional environment flags used to conditionally show API schemas (unchanged behavior)
99
+ HF_IMAGE_TOKEN = bool(os.getenv("HF_READ_TOKEN"))
100
+ HF_VIDEO_TOKEN = bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN"))
101
+ HF_TEXTGEN_TOKEN = bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN"))
102
+
103
+ # Load CSS from external file
104
+ _css_path = os.path.join(os.path.dirname(__file__), "styles.css")
105
+ with open(_css_path, "r", encoding="utf-8") as _css_file:
106
+ CSS_STYLES = _css_file.read()
107
+
108
+ # Build each tab interface using modular builders
109
+ fetch_interface = build_fetch_interface()
110
+ web_search_interface = build_search_interface()
111
+ agent_terminal_interface = build_agent_terminal_interface()
112
+ code_interface = build_code_interface()
113
+ memory_interface = build_memory_interface()
114
+ kokoro_interface = build_speech_interface()
115
+ image_generation_interface = build_image_interface()
116
+ video_generation_interface = build_video_interface()
117
+ deep_research_interface = build_research_interface()
118
+ fs_interface = build_fs_interface()
119
+ shell_interface = build_shell_interface()
120
+ obsidian_interface = build_obsidian_interface()
121
+
122
+ _interfaces = [
123
+ agent_terminal_interface,
124
+ fetch_interface,
125
+ web_search_interface,
126
+ code_interface,
127
+ shell_interface,
128
+ fs_interface,
129
+ obsidian_interface,
130
+ memory_interface,
131
+ kokoro_interface,
132
+ image_generation_interface,
133
+ video_generation_interface,
134
+ deep_research_interface,
135
+ ]
136
+ _tab_names = [
137
+ "Agent Terminal",
138
+ "Web Fetch",
139
+ "Web Search",
140
+ "Code Interpreter",
141
+ "Shell Command",
142
+ "File System",
143
+ "Obsidian Vault",
144
+ "Memory Manager",
145
+ "Generate Speech",
146
+ "Generate Image",
147
+ "Generate Video",
148
+ "Deep Research",
149
+ ]
150
+
151
+ with gr.Blocks(title="Nymbo/Tools MCP") as demo:
152
+
153
+ with gr.Sidebar(width=300, elem_classes="app-sidebar"):
154
+ gr.Markdown("## Nymbo/Tools MCP\n<p style='font-size: 0.7rem; opacity: 0.85; margin-top: 2px;'>General purpose tools useful for any agent.</p>\n<code style='font-size: 0.7rem; word-break: break-all;'>https://nymbo.net/gradio_api/mcp/</code>")
155
+
156
+ with gr.Accordion("Information", open=False):
157
+ gr.HTML(
158
+ """
159
+ <div class="info-accordion">
160
+ <div class="info-grid" style="grid-template-columns: 1fr;">
161
+ <section class="info-card">
162
+ <div class="info-card__body">
163
+ <h3>Connecting from an MCP Client</h3>
164
+ <p>
165
+ This Space also runs as a Model Context Protocol (MCP) server. Point your client to:
166
+ <br/>
167
+ <code>https://nymbo.net/gradio_api/mcp/</code>
168
+ </p>
169
+ <p>Example client configuration:</p>
170
+ <pre><code class="language-json">{
171
+ "mcpServers": {
172
+ "nymbo-tools": {
173
+ "url": "https://nymbo.net/gradio_api/mcp/"
174
+ }
175
+ }
176
+ }</code></pre>
177
+ <p>Run the following commands in sequence to run the server locally:</p>
178
+ <pre><code>git clone https://huggingface.co/spaces/Nymbo/Tools
179
+ cd Tools
180
+ python -m venv env
181
+ source env/bin/activate
182
+ pip install -r requirements.txt
183
+ python app.py</code></pre>
184
+ </div>
185
+ </section>
186
+
187
+ <section class="info-card">
188
+ <div class="info-card__body">
189
+ <h3>Enable Image Gen, Video Gen, and Deep Research</h3>
190
+ <p>
191
+ The <code>Generate_Image</code>, <code>Generate_Video</code>, and <code>Deep_Research</code> tools require a
192
+ <code>HF_READ_TOKEN</code> set as a secret or environment variable.
193
+ </p>
194
+ <ul class="info-list">
195
+ <li>Duplicate this Space and add a HF token with model read access.</li>
196
+ <li>Or run locally with <code>HF_READ_TOKEN</code> in your environment.</li>
197
+ </ul>
198
+ <div class="info-hint">
199
+ MCP clients can see these tools even without tokens, but calls will fail until a valid token is provided.
200
+ </div>
201
+ </div>
202
+ </section>
203
+
204
+ <section class="info-card">
205
+ <div class="info-card__body">
206
+ <h3>Persistent Memories and Files</h3>
207
+ <p>
208
+ In this public demo, memories and files created with the <code>Memory_Manager</code> and <code>File_System</code> are stored in the Space's running container and are cleared when the Space restarts. Content is visible to everyone—avoid personal data.
209
+ </p>
210
+ <p>
211
+ When running locally, memories are saved to <code>memories.json</code> at the repo root for privacy, and files are saved to the <code>Tools/Filesystem</code> directory on disk.
212
+ </p>
213
+ </div>
214
+ </section>
215
+
216
+ <section class="info-card">
217
+ <div class="info-card__body">
218
+ <h3>Tool Notes &amp; Kokoro Voice Legend</h3>
219
+ <p><strong>No authentication required for:</strong></p>
220
+ <ul class="info-list">
221
+ <li><code>Web_Fetch</code></li>
222
+ <li><code>Web_Search</code></li>
223
+ <li><code>Agent_Terminal</code></li>
224
+ <li><code>Code_Interpreter</code></li>
225
+ <li><code>Memory_Manager</code></li>
226
+ <li><code>Generate_Speech</code></li>
227
+ <li><code>File_System</code></li>
228
+ <li><code>Shell_Command</code></li>
229
+ </ul>
230
+ <p><strong>Kokoro voice prefixes</strong></p>
231
+ <table style="width:100%; border-collapse:collapse; font-size:0.9em; margin-top:8px;">
232
+ <thead>
233
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.15);">
234
+ <th style="padding:6px 8px; text-align:left;">Accent</th>
235
+ <th style="padding:6px 8px; text-align:center;">Female</th>
236
+ <th style="padding:6px 8px; text-align:center;">Male</th>
237
+ </tr>
238
+ </thead>
239
+ <tbody>
240
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
241
+ <td style="padding:6px 8px; font-weight:600;">American</td>
242
+ <td style="padding:6px 8px; text-align:center;"><code>af</code></td>
243
+ <td style="padding:6px 8px; text-align:center;"><code>am</code></td>
244
+ </tr>
245
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
246
+ <td style="padding:6px 8px; font-weight:600;">British</td>
247
+ <td style="padding:6px 8px; text-align:center;"><code>bf</code></td>
248
+ <td style="padding:6px 8px; text-align:center;"><code>bm</code></td>
249
+ </tr>
250
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
251
+ <td style="padding:6px 8px; font-weight:600;">European</td>
252
+ <td style="padding:6px 8px; text-align:center;"><code>ef</code></td>
253
+ <td style="padding:6px 8px; text-align:center;"><code>em</code></td>
254
+ </tr>
255
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
256
+ <td style="padding:6px 8px; font-weight:600;">French</td>
257
+ <td style="padding:6px 8px; text-align:center;"><code>ff</code></td>
258
+ <td style="padding:6px 8px; text-align:center;">—</td>
259
+ </tr>
260
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
261
+ <td style="padding:6px 8px; font-weight:600;">Hindi</td>
262
+ <td style="padding:6px 8px; text-align:center;"><code>hf</code></td>
263
+ <td style="padding:6px 8px; text-align:center;"><code>hm</code></td>
264
+ </tr>
265
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
266
+ <td style="padding:6px 8px; font-weight:600;">Italian</td>
267
+ <td style="padding:6px 8px; text-align:center;"><code>if</code></td>
268
+ <td style="padding:6px 8px; text-align:center;"><code>im</code></td>
269
+ </tr>
270
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
271
+ <td style="padding:6px 8px; font-weight:600;">Japanese</td>
272
+ <td style="padding:6px 8px; text-align:center;"><code>jf</code></td>
273
+ <td style="padding:6px 8px; text-align:center;"><code>jm</code></td>
274
+ </tr>
275
+ <tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
276
+ <td style="padding:6px 8px; font-weight:600;">Portuguese</td>
277
+ <td style="padding:6px 8px; text-align:center;"><code>pf</code></td>
278
+ <td style="padding:6px 8px; text-align:center;"><code>pm</code></td>
279
+ </tr>
280
+ <tr>
281
+ <td style="padding:6px 8px; font-weight:600;">Chinese</td>
282
+ <td style="padding:6px 8px; text-align:center;"><code>zf</code></td>
283
+ <td style="padding:6px 8px; text-align:center;"><code>zm</code></td>
284
+ </tr>
285
+ </tbody>
286
+ </table>
287
+ </div>
288
+ </section>
289
+ </div>
290
+ </div>
291
+ """
292
+ )
293
+
294
+ gr.Markdown("### Tools")
295
+ tool_selector = gr.Radio(
296
+ choices=_tab_names,
297
+ value=_tab_names[0],
298
+ label="Select Tool",
299
+ show_label=False,
300
+ container=False,
301
+ elem_classes="sidebar-nav"
302
+ )
303
+
304
+ with gr.Tabs(elem_classes="hidden-tabs", selected=_tab_names[0]) as tool_tabs:
305
+ for name, interface in zip(_tab_names, _interfaces):
306
+ with gr.TabItem(label=name, id=name, elem_id=f"tab-{name}"):
307
+ interface.render()
308
+
309
+ # Use JavaScript to click the hidden tab button when the radio selection changes
310
+ tool_selector.change(
311
+ fn=None,
312
+ inputs=tool_selector,
313
+ outputs=None,
314
+ js="(selected_tool) => { const buttons = document.querySelectorAll('.hidden-tabs button'); buttons.forEach(btn => { if (btn.innerText.trim() === selected_tool) { btn.click(); } }); }"
315
+ )
316
+
317
+ if __name__ == "__main__":
318
+ demo.launch(mcp_server=True, theme="Nymbo/Nymbo_Theme", css=CSS_STYLES, ssr_mode=False)
memories.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "c8e3965d-270c-4baf-836f-33c6ed57f527",
4
+ "text": "The user's personal website is driven by Markdown and Vue, hosted on Vercel.",
5
+ "timestamp": "2025-09-06 02:21:17",
6
+ "tags": "website,markdown,vue,vercel"
7
+ },
8
+ {
9
+ "id": "17806073-cb86-472f-9b39-c1aaaf3ac058",
10
+ "text": "The user lives in New York City.",
11
+ "timestamp": "2025-09-06 17:07:27",
12
+ "tags": "location,address"
13
+ },
14
+ {
15
+ "id": "86e9f249-b43d-4aaa-bca0-b55fcb0c03be",
16
+ "text": "The user has a pet Russian tortoise who is 8 years old.",
17
+ "timestamp": "2025-09-06 02:20:59",
18
+ "tags": "pet,tortoise,animals"
19
+ }
20
+ ]
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio[mcp]==6.0.0
2
+ requests
3
+ beautifulsoup4
4
+ lxml
5
+ readability-lxml
6
+ ddgs
7
+ kokoro>=0.9.4
8
+ numpy
9
+ torch; platform_system != "Darwin" or platform_machine != "arm64"
10
+ Pillow
11
+ huggingface_hub>=0.30.0
12
+ markdownify
13
+ scipy
14
+ onnxruntime
styles.css ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Style only the top-level app title to avoid affecting headings elsewhere */
2
+ .app-title {
3
+ text-align: center;
4
+ /* Ensure main title appears first, then our two subtitle lines */
5
+ display: grid;
6
+ justify-items: center;
7
+ }
8
+ .app-title::after {
9
+ grid-row: 2;
10
+ content: "General purpose tools useful for any agent.";
11
+ display: block;
12
+ font-size: 1rem;
13
+ font-weight: 400;
14
+ opacity: 0.9;
15
+ margin-top: 2px;
16
+ white-space: pre-wrap;
17
+ }
18
+
19
+ /* Sidebar Container */
20
+ .app-sidebar {
21
+ background: var(--body-background-fill) !important;
22
+ border-right: 1px solid rgba(255, 255, 255, 0.08) !important;
23
+ }
24
+ @media (prefers-color-scheme: light) {
25
+ .app-sidebar {
26
+ border-right: 1px solid rgba(0, 0, 0, 0.08) !important;
27
+ }
28
+ }
29
+
30
+ /* Historical safeguard: if any h1 appears inside tabs, don't attach pseudo content */
31
+ .gradio-container [role="tabpanel"] h1::before,
32
+ .gradio-container [role="tabpanel"] h1::after {
33
+ content: none !important;
34
+ }
35
+
36
+ /* Information accordion - modern info cards */
37
+ .info-accordion {
38
+ margin: 8px 0 2px;
39
+ }
40
+ .info-grid {
41
+ display: grid;
42
+ gap: 12px;
43
+ /* Force a 2x2 layout on medium+ screens */
44
+ grid-template-columns: repeat(2, minmax(0, 1fr));
45
+ align-items: stretch;
46
+ }
47
+ /* On narrow screens, stack into a single column */
48
+ @media (max-width: 800px) {
49
+ .info-grid {
50
+ grid-template-columns: 1fr;
51
+ }
52
+ }
53
+ .info-card {
54
+ display: flex;
55
+ gap: 14px;
56
+ padding: 14px 16px;
57
+ border: 1px solid rgba(255, 255, 255, 0.08);
58
+ background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03));
59
+ border-radius: 12px;
60
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
61
+ position: relative;
62
+ overflow: hidden;
63
+ -webkit-backdrop-filter: blur(2px);
64
+ backdrop-filter: blur(2px);
65
+ }
66
+ .info-card::before {
67
+ content: "";
68
+ position: absolute;
69
+ inset: 0;
70
+ border-radius: 12px;
71
+ pointer-events: none;
72
+ background: linear-gradient(90deg, rgba(99,102,241,0.06), rgba(59,130,246,0.05));
73
+ }
74
+ .info-card__icon {
75
+ font-size: 24px;
76
+ flex: 0 0 28px;
77
+ line-height: 1;
78
+ filter: saturate(1.1);
79
+ }
80
+ .info-card__body {
81
+ min-width: 0;
82
+ }
83
+ .info-card__body h3 {
84
+ margin: 0 0 6px;
85
+ font-size: 1.05rem;
86
+ }
87
+ .info-card__body p {
88
+ margin: 6px 0;
89
+ opacity: 0.95;
90
+ }
91
+ /* Readable code blocks inside info cards */
92
+ .info-card pre {
93
+ margin: 8px 0;
94
+ padding: 10px 12px;
95
+ background: rgba(20, 20, 30, 0.55);
96
+ border: 1px solid rgba(255, 255, 255, 0.08);
97
+ border-radius: 10px;
98
+ overflow-x: auto;
99
+ white-space: pre;
100
+ }
101
+ .info-card code {
102
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
103
+ font-size: 0.95em;
104
+ }
105
+ .info-card pre code {
106
+ display: block;
107
+ }
108
+ .info-card p {
109
+ word-wrap: break-word;
110
+ overflow-wrap: break-word;
111
+ }
112
+ .info-card p code {
113
+ word-break: break-all;
114
+ }
115
+ .info-list {
116
+ margin: 6px 0 0 18px;
117
+ padding: 0;
118
+ }
119
+ .info-hint {
120
+ margin-top: 8px;
121
+ font-size: 0.9em;
122
+ opacity: 0.9;
123
+ }
124
+
125
+ /* Light theme adjustments */
126
+ @media (prefers-color-scheme: light) {
127
+ .info-card {
128
+ border-color: rgba(0, 0, 0, 0.08);
129
+ background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.9));
130
+ }
131
+ .info-card::before {
132
+ background: linear-gradient(90deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06));
133
+ }
134
+ .info-card pre {
135
+ background: rgba(245, 246, 250, 0.95);
136
+ border-color: rgba(0, 0, 0, 0.08);
137
+ }
138
+ }
139
+
140
+ /* Sidebar Navigation - styled like the previous tabs */
141
+ .sidebar-nav {
142
+ background: transparent !important;
143
+ border: none !important;
144
+ padding: 0 !important;
145
+ }
146
+ .sidebar-nav .form {
147
+ gap: 8px !important;
148
+ display: flex !important;
149
+ flex-direction: column !important;
150
+ border: none !important;
151
+ background: transparent !important;
152
+ }
153
+ .sidebar-nav label {
154
+ display: flex !important;
155
+ align-items: center !important;
156
+ padding: 10px 12px !important;
157
+ border-radius: 10px !important;
158
+ border: 1px solid rgba(255, 255, 255, 0.08) !important;
159
+ background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03)) !important;
160
+ transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
161
+ cursor: pointer !important;
162
+ margin-bottom: 0 !important;
163
+ width: 100% !important;
164
+ justify-content: flex-start !important;
165
+ text-align: left !important;
166
+ }
167
+ .sidebar-nav label:hover {
168
+ border-color: rgba(99,102,241,0.28) !important;
169
+ background: linear-gradient(180deg, rgba(99,102,241,0.10), rgba(59,130,246,0.08)) !important;
170
+ }
171
+ /* Selected state - Gradio adds 'selected' class to the label in some versions, or we check input:checked */
172
+ .sidebar-nav label.selected {
173
+ border-color: rgba(99,102,241,0.35) !important;
174
+ box-shadow: inset 0 0 0 1px rgba(99,102,241,0.25), 0 1px 2px rgba(0,0,0,0.25) !important;
175
+ background: linear-gradient(180deg, rgba(99,102,241,0.18), rgba(59,130,246,0.14)) !important;
176
+ color: rgba(255, 255, 255, 0.95) !important;
177
+ }
178
+
179
+ /* Light theme adjustments for sidebar */
180
+ @media (prefers-color-scheme: light) {
181
+ .sidebar-nav label {
182
+ border-color: rgba(0, 0, 0, 0.08) !important;
183
+ background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.90)) !important;
184
+ color: rgba(0, 0, 0, 0.85) !important;
185
+ }
186
+ .sidebar-nav label:hover {
187
+ border-color: rgba(99,102,241,0.25) !important;
188
+ background: linear-gradient(180deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06)) !important;
189
+ }
190
+ .sidebar-nav label.selected {
191
+ border-color: rgba(99,102,241,0.35) !important;
192
+ background: linear-gradient(180deg, rgba(99,102,241,0.16), rgba(59,130,246,0.12)) !important;
193
+ color: rgba(0, 0, 0, 0.85) !important;
194
+ }
195
+ }
196
+
197
+ /* Hide scrollbars/arrows that can appear on the description block in some browsers */
198
+ /* stylelint-disable compat-api/css */
199
+ article.prose, .prose, .gr-prose {
200
+ overflow: visible !important;
201
+ max-height: none !important;
202
+ -ms-overflow-style: none !important; /* IE/Edge */
203
+ scrollbar-width: none !important; /* Firefox */
204
+ }
205
+ /* stylelint-enable compat-api/css */
206
+ article.prose::-webkit-scrollbar,
207
+ .prose::-webkit-scrollbar,
208
+ .gr-prose::-webkit-scrollbar {
209
+ display: none !important; /* Chrome/Safari */
210
+ }
211
+
212
+ /* Fix for white background on single-line inputs in dark mode */
213
+ .gradio-container input[type="text"],
214
+ .gradio-container input[type="password"],
215
+ .gradio-container input[type="number"],
216
+ .gradio-container input[type="email"] {
217
+ background-color: var(--input-background-fill) !important;
218
+ color: var(--body-text-color) !important;
219
+ }
220
+
221
+ /* Custom glossy purple styling for primary action buttons */
222
+ .gradio-container button.primary {
223
+ border: 1px solid rgba(99, 102, 241, 0.35) !important;
224
+ background: linear-gradient(180deg, rgba(99, 102, 241, 0.25), rgba(59, 130, 246, 0.20)) !important;
225
+ box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.12), 0 2px 4px rgba(0, 0, 0, 0.15) !important;
226
+ color: rgba(255, 255, 255, 0.95) !important;
227
+ transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
228
+ }
229
+ .gradio-container button.primary:hover {
230
+ border-color: rgba(99, 102, 241, 0.5) !important;
231
+ background: linear-gradient(180deg, rgba(99, 102, 241, 0.35), rgba(59, 130, 246, 0.28)) !important;
232
+ box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 3px 6px rgba(0, 0, 0, 0.2) !important;
233
+ }
234
+ .gradio-container button.primary:active {
235
+ transform: scale(0.98) !important;
236
+ box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.2), 0 1px 2px rgba(0, 0, 0, 0.1) !important;
237
+ }
238
+ @media (prefers-color-scheme: light) {
239
+ .gradio-container button.primary {
240
+ border-color: rgba(99, 102, 241, 0.4) !important;
241
+ background: linear-gradient(180deg, rgba(99, 102, 241, 0.85), rgba(79, 70, 229, 0.75)) !important;
242
+ box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.25), 0 2px 4px rgba(0, 0, 0, 0.12) !important;
243
+ color: rgba(255, 255, 255, 0.98) !important;
244
+ }
245
+ .gradio-container button.primary:hover {
246
+ background: linear-gradient(180deg, rgba(99, 102, 241, 0.95), rgba(79, 70, 229, 0.85)) !important;
247
+ box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.3), 0 3px 6px rgba(0, 0, 0, 0.15) !important;
248
+ }
249
+ }
250
+
251
+ /* Hide the actual tabs since we use the sidebar to control them */
252
+ .hidden-tabs .tab-nav,
253
+ .hidden-tabs [role="tablist"] {
254
+ display: none !important;
255
+ }
256
+ /* Hide the entire first row of the tabs container (contains tab buttons + overflow) */
257
+ .hidden-tabs > div:first-child {
258
+ display: none !important;
259
+ }
260
+ /* Ensure audio component buttons remain visible - they're inside tab panels, not the first row */
261
+ .hidden-tabs [role="tabpanel"] button {
262
+ display: inline-flex !important;
263
+ }
264
+
265
+ /* Custom scrollbar styling - Progressive enhancement, falls back to default scrollbars */
266
+ /* stylelint-disable compat-api/css */
267
+ * {
268
+ scrollbar-width: thin;
269
+ scrollbar-color: rgba(61, 212, 159, 0.4) rgba(255, 255, 255, 0.05);
270
+ }
271
+ *::-webkit-scrollbar {
272
+ width: 8px;
273
+ height: 8px;
274
+ }
275
+ *::-webkit-scrollbar-track {
276
+ background: rgba(255, 255, 255, 0.05);
277
+ border-radius: 4px;
278
+ }
279
+ *::-webkit-scrollbar-thumb {
280
+ background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
281
+ border-radius: 4px;
282
+ border: 1px solid rgba(119, 247, 209, 0.2);
283
+ }
284
+ *::-webkit-scrollbar-thumb:hover {
285
+ background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
286
+ }
287
+ *::-webkit-scrollbar-corner {
288
+ background: rgba(255, 255, 255, 0.05);
289
+ }
290
+ @media (prefers-color-scheme: light) {
291
+ * {
292
+ scrollbar-color: rgba(61, 212, 159, 0.4) rgba(0, 0, 0, 0.05);
293
+ }
294
+ *::-webkit-scrollbar-track {
295
+ background: rgba(0, 0, 0, 0.05);
296
+ }
297
+ *::-webkit-scrollbar-thumb {
298
+ background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
299
+ border-color: rgba(0, 0, 0, 0.1);
300
+ }
301
+ *::-webkit-scrollbar-thumb:hover {
302
+ background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
303
+ }
304
+ *::-webkit-scrollbar-corner {
305
+ background: rgba(0, 0, 0, 0.05);
306
+ }
307
+ }
308
+ /* stylelint-enable compat-api/css */