From 54ec76037403d914e50f867c910f1bd5564474ef Mon Sep 17 00:00:00 2001 From: Admin Date: Mon, 18 May 2026 19:14:39 +0100 Subject: [PATCH] BDA capstone activity --- reports/sequential_report.md | 20 ++++++++ solutions/library.py | 58 +++++++++++++++++++++++ solutions/main.py | 92 ++++++++++++++++++++++++++++++++++++ solutions/test_yt_dlp.py | 2 + 4 files changed, 172 insertions(+) create mode 100644 reports/sequential_report.md create mode 100644 solutions/library.py create mode 100644 solutions/main.py create mode 100644 solutions/test_yt_dlp.py diff --git a/reports/sequential_report.md b/reports/sequential_report.md new file mode 100644 index 0000000..dd618fa --- /dev/null +++ b/reports/sequential_report.md @@ -0,0 +1,20 @@ +# Report + +## Serial execution + +Total time: 9.01 seconds + +## Complexity + +Time complexity: O(n), because the program downloads each video one by one. + +Space complexity: O(n), because the program stores the list of video URLs from the CSV file. + + +## Parallel execution + +Total time: 4.28 seconds + +## Comparison + +Speed improvement: 0% diff --git a/solutions/library.py b/solutions/library.py new file mode 100644 index 0000000..b056180 --- /dev/null +++ b/solutions/library.py @@ -0,0 +1,58 @@ +# # import yt_dlp + + +# # def download_video(url): +# # ydl_opts = { +# # "outtmpl": "videos/%(title)s.%(ext)s" +# # } + +# # with yt_dlp.YoutubeDL(ydl_opts) as ydl: +# # ydl.download([url]) + +# import csv +# import yt_dlp + + +# def download_video(url): +# ydl_opts = { +# "outtmpl": "videos/%(title)s.%(ext)s" +# } + +# with yt_dlp.YoutubeDL(ydl_opts) as ydl: +# ydl.download([url]) + + +# def read_video_urls(csv_path): +# urls = [] + +# with open(csv_path, newline="") as file: +# reader = csv.DictReader(file) + +# for row in reader: +# urls.append(row["url"]) + +# return urls + +import csv +import yt_dlp + + +def read_video_urls(csv_path): + urls = [] + + with open(csv_path, newline="") as file: + reader = csv.DictReader(file) + + for row in reader: + urls.append(row["url"]) + + return urls + + +def download_video(url): + ydl_opts = { + "outtmpl": "videos/%(title)s.%(ext)s" + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) \ No newline at end of file diff --git a/solutions/main.py b/solutions/main.py new file mode 100644 index 0000000..7ce983c --- /dev/null +++ b/solutions/main.py @@ -0,0 +1,92 @@ +# # # from library import download_video + + +# # # def main(): +# # # url = input("Enter YouTube URL: ") +# # # download_video(url) + + +# # # if __name__ == "__main__": +# # # main() + +# # from library import read_video_urls + + +# # def main(): +# # urls = read_video_urls("data/video_urls.csv") + +# # print(urls) + + +# # if __name__ == "__main__": +# # main() + +# import time +# from library import read_video_urls, download_video + + +# def main(): +# urls = read_video_urls("data/video_urls.csv") + +# total_start = time.perf_counter() + +# for url in urls: +# start = time.perf_counter() +# download_video(url) +# end = time.perf_counter() + +# elapsed = round(end - start, 2) +# print(f"Downloaded one video in: {elapsed}") + +# total_end = time.perf_counter() +# serial_time = round(total_end - total_start, 2) + +# print(f"Serial execution: {serial_time}") + +# with open("reports/sequential_report.md", "w") as file: +# file.write("# Report\n\n") +# file.write("## Serial execution\n\n") +# file.write(f"Total time: {serial_time} seconds\n\n") +# file.write("## Complexity\n\n") +# file.write("Time complexity: O(n), because the program downloads each video one by one.\n\n") +# file.write("Space complexity: O(n), because the program stores the list of video URLs from the CSV file.\n") + + +# if __name__ == "__main__": +# main() + +import time +from multiprocessing import Pool +from library import read_video_urls, download_video + + +def main(): + urls = read_video_urls("data/video_urls.csv") + + # Keep your serial time from Phase 05 here + serial_time = 0 # replace 0 with your Phase 05 time + + start = time.perf_counter() + + with Pool() as pool: + pool.map(download_video, urls) + + end = time.perf_counter() + parallel_time = round(end - start, 2) + + print(f"Parallel execution: {parallel_time}") + + if serial_time > 0: + speed_improvement = round(((serial_time - parallel_time) / serial_time) * 100, 2) + else: + speed_improvement = 0 + + with open("reports/sequential_report.md", "a") as file: + file.write("\n\n## Parallel execution\n\n") + file.write(f"Total time: {parallel_time} seconds\n\n") + file.write("## Comparison\n\n") + file.write(f"Speed improvement: {speed_improvement}%\n") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/solutions/test_yt_dlp.py b/solutions/test_yt_dlp.py new file mode 100644 index 0000000..cf1d381 --- /dev/null +++ b/solutions/test_yt_dlp.py @@ -0,0 +1,2 @@ +import yt_dlp +print("Import successful") \ No newline at end of file