fetch_youtube_video_metadata.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import requests
  2. import re
  3. import json
  4. from time import sleep
  5. from argparse import ArgumentParser
  6. # function to extract video title and description using regex
  7. def get_youtube_video_info(video_url):
  8. response = requests.get(video_url)
  9. if response.status_code == 200:
  10. html = response.text
  11. # extract the title using a regular expression
  12. title_search = re.search(r'"title":"(.*?)"', html)
  13. title = title_search.group(1) if title_search else "No title found"
  14. # extract the description using a regular expression
  15. description_search = re.search(r'"shortDescription":"(.*?)"', html)
  16. description = description_search.group(1).replace('\\n', '\n') if description_search else "No description found"
  17. # cleanup the description
  18. description = re.sub('\\n', ' ', description)
  19. # extract author using a regular expression
  20. author_search = re.search(r'"ownerChannelName":"(.*?)"', html)
  21. author = author_search.group(1) if author_search else "No author found"
  22. # prepare the JSON structure
  23. video_info = {
  24. "path": video_url,
  25. "embed": f"https://www.youtube.com/embed/{video_url.split('=')[1]}",
  26. "name": title,
  27. "author": author,
  28. "description": description
  29. }
  30. return json.dumps(video_info, indent=4)
  31. else:
  32. return f"Failed to retrieve video info, status code: {response.status_code}"
  33. parser = ArgumentParser()
  34. parser.add_argument("url", nargs="+", help="YouTube video URL(s)")
  35. args = parser.parse_args()
  36. # process each URL passed in the arguments
  37. for video_url in args.url:
  38. # call the function with the provided URL
  39. video_json = get_youtube_video_info(video_url)
  40. print("{},".format(video_json))
  41. sleep(1) # sleep for 1 second to avoid being blocked by YouTube