revert: remove accidentally merged video-quote-finder skill (#18550)
This commit is contained in:
parent
a78839e60c
commit
f79cf3a01d
13
README.md
13
README.md
@ -262,19 +262,6 @@ ClawHub is a minimal skill registry. With ClawHub enabled, the agent can search
|
|||||||
|
|
||||||
[ClawHub](https://clawhub.com)
|
[ClawHub](https://clawhub.com)
|
||||||
|
|
||||||
### Example skill: video-quote-finder
|
|
||||||
|
|
||||||
Use `skills/video-quote-finder` to locate where a quote appears in a YouTube video and return timestamp links.
|
|
||||||
|
|
||||||
Original prompt:
|
|
||||||
|
|
||||||
> "OK I want to make a PR with this skill back to openclaw... make a new one that
|
|
||||||
> will search for a point on the video and give me the timestamp. For example I
|
|
||||||
> want to find the timestamp in this video where Peter says 'I think vibe coding
|
|
||||||
> is a slur'"
|
|
||||||
>
|
|
||||||
> Video: [https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV](https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV)
|
|
||||||
|
|
||||||
## Chat commands
|
## Chat commands
|
||||||
|
|
||||||
Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only):
|
Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only):
|
||||||
|
|||||||
@ -1,35 +0,0 @@
|
|||||||
---
|
|
||||||
name: video-quote-finder
|
|
||||||
description: Find where a quote appears in a YouTube video and return timestamped links. Use when users ask "where in this video does X say Y", "find the timestamp for this line", or "locate quote in this YouTube video".
|
|
||||||
---
|
|
||||||
|
|
||||||
# Video Quote Finder
|
|
||||||
|
|
||||||
Find quote timestamps in YouTube videos using the `summarize` CLI transcript extraction with timestamps.
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \
|
|
||||||
"https://youtu.be/YFjfBk8HI5o" \
|
|
||||||
"I think vibe coding is a slur"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
1. Extract transcript with timestamps via `summarize --extract --timestamps`.
|
|
||||||
2. Score transcript lines against the requested quote.
|
|
||||||
3. Return best match + top alternatives.
|
|
||||||
4. Include direct YouTube links with `t=<seconds>`.
|
|
||||||
|
|
||||||
## Output format
|
|
||||||
|
|
||||||
- `best_match` timestamp + line + score
|
|
||||||
- `best_link` with timestamp
|
|
||||||
- up to 5 candidate timestamps with links
|
|
||||||
|
|
||||||
## Notes
|
|
||||||
|
|
||||||
- Requires `summarize` CLI (`@steipete/summarize`) in PATH.
|
|
||||||
- Works best when YouTube captions are available.
|
|
||||||
- If no exact match is found, uses fuzzy matching and suggests alternatives.
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
# Usage
|
|
||||||
|
|
||||||
## Find timestamp for a quote
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \
|
|
||||||
"https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV" \
|
|
||||||
"I think vibe coding is a slur"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Tips
|
|
||||||
|
|
||||||
- Start with exact quote text.
|
|
||||||
- If no match, use a distinctive 3-8 word fragment.
|
|
||||||
- Prefer phrase fragments unlikely to repeat frequently.
|
|
||||||
@ -1,96 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import argparse
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from difflib import SequenceMatcher
|
|
||||||
|
|
||||||
TS_LINE = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*(.*)$")
|
|
||||||
|
|
||||||
|
|
||||||
def ts_to_seconds(ts: str) -> int:
|
|
||||||
parts = [int(x) for x in ts.split(':')]
|
|
||||||
if len(parts) == 2:
|
|
||||||
m, s = parts
|
|
||||||
return m * 60 + s
|
|
||||||
h, m, s = parts
|
|
||||||
return h * 3600 + m * 60 + s
|
|
||||||
|
|
||||||
|
|
||||||
def with_timestamp_url(url: str, ts: str) -> str:
|
|
||||||
sec = ts_to_seconds(ts)
|
|
||||||
base_url = url.split('#', 1)[0] # drop fragment so query params are honored
|
|
||||||
joiner = '&' if '?' in base_url else '?'
|
|
||||||
return f"{base_url}{joiner}t={sec}s"
|
|
||||||
|
|
||||||
|
|
||||||
def run_extract(url: str) -> str:
|
|
||||||
cmd = ["summarize", url, "--extract", "--timestamps"]
|
|
||||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
|
||||||
if p.returncode != 0:
|
|
||||||
raise RuntimeError(p.stderr.strip() or "summarize failed")
|
|
||||||
return p.stdout
|
|
||||||
|
|
||||||
|
|
||||||
def normalize(s: str) -> str:
|
|
||||||
return re.sub(r"\s+", " ", s.lower()).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def score(quote: str, line: str) -> float:
|
|
||||||
q = normalize(quote)
|
|
||||||
l = normalize(line)
|
|
||||||
if not q or not l:
|
|
||||||
return 0.0
|
|
||||||
if q in l:
|
|
||||||
return 1.0
|
|
||||||
|
|
||||||
q_words = set(q.split())
|
|
||||||
l_words = set(l.split())
|
|
||||||
overlap = len(q_words & l_words) / max(1, len(q_words))
|
|
||||||
ratio = SequenceMatcher(None, q, l).ratio()
|
|
||||||
return 0.6 * overlap + 0.4 * ratio
|
|
||||||
|
|
||||||
|
|
||||||
def find_matches(text: str, quote: str):
|
|
||||||
matches = []
|
|
||||||
for line in text.splitlines():
|
|
||||||
m = TS_LINE.match(line)
|
|
||||||
if not m:
|
|
||||||
continue
|
|
||||||
ts, body = m.group(1), m.group(2)
|
|
||||||
s = score(quote, body)
|
|
||||||
if s >= 0.35:
|
|
||||||
matches.append((s, ts, body))
|
|
||||||
matches.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
return matches[:5]
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser(description="Find quote timestamp in YouTube transcript")
|
|
||||||
ap.add_argument("url")
|
|
||||||
ap.add_argument("quote")
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
try:
|
|
||||||
text = run_extract(args.url)
|
|
||||||
matches = find_matches(text, args.quote)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"ERROR: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if not matches:
|
|
||||||
print("No matches found. Try a shorter quote fragment.")
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
best = matches[0]
|
|
||||||
best_link = with_timestamp_url(args.url, best[1])
|
|
||||||
print(f"best_match: [{best[1]}] score={best[0]:.2f} :: {best[2]}")
|
|
||||||
print(f"best_link: {best_link}")
|
|
||||||
print("candidates:")
|
|
||||||
for s, ts, body in matches:
|
|
||||||
print(f"- [{ts}] score={s:.2f} :: {body}")
|
|
||||||
print(f" link: {with_timestamp_url(args.url, ts)}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
x
Reference in New Issue
Block a user