From 54ec76037403d914e50f867c910f1bd5564474ef Mon Sep 17 00:00:00 2001
From: Admin <admin@MacBook-Pro.local>
Date: Mon, 18 May 2026 19:14:39 +0100
Subject: [PATCH] BDA capstone activity

---
 reports/sequential_report.md | 20 ++++++++
 solutions/library.py         | 58 +++++++++++++++++++++++
 solutions/main.py            | 92 ++++++++++++++++++++++++++++++++++++
 solutions/test_yt_dlp.py     |  2 +
 4 files changed, 172 insertions(+)
 create mode 100644 reports/sequential_report.md
 create mode 100644 solutions/library.py
 create mode 100644 solutions/main.py
 create mode 100644 solutions/test_yt_dlp.py

diff --git a/reports/sequential_report.md b/reports/sequential_report.md
new file mode 100644
index 0000000..dd618fa
--- /dev/null
+++ b/reports/sequential_report.md
@@ -0,0 +1,20 @@
+# Report
+
+## Serial execution
+
+Total time: 9.01 seconds
+
+## Complexity
+
+Time complexity: O(n), because the program downloads each video one by one.
+
+Space complexity: O(n), because the program stores the list of video URLs from the CSV file.
+
+
+## Parallel execution
+
+Total time: 4.28 seconds
+
+## Comparison
+
+Speed improvement: 0%
diff --git a/solutions/library.py b/solutions/library.py
new file mode 100644
index 0000000..b056180
--- /dev/null
+++ b/solutions/library.py
@@ -0,0 +1,58 @@
+# # import yt_dlp
+
+
+# # def download_video(url):
+# #     ydl_opts = {
+# #         "outtmpl": "videos/%(title)s.%(ext)s"
+# #     }
+
+# #     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+# #         ydl.download([url])
+
+# import csv
+# import yt_dlp
+
+
+# def download_video(url):
+#     ydl_opts = {
+#         "outtmpl": "videos/%(title)s.%(ext)s"
+#     }
+
+#     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+#         ydl.download([url])
+
+
+# def read_video_urls(csv_path):
+#     urls = []
+
+#     with open(csv_path, newline="") as file:
+#         reader = csv.DictReader(file)
+
+#         for row in reader:
+#             urls.append(row["url"])
+
+#     return urls
+
+import csv
+import yt_dlp
+
+
+def read_video_urls(csv_path):
+    urls = []
+
+    with open(csv_path, newline="") as file:
+        reader = csv.DictReader(file)
+
+        for row in reader:
+            urls.append(row["url"])
+
+    return urls
+
+
+def download_video(url):
+    ydl_opts = {
+        "outtmpl": "videos/%(title)s.%(ext)s"
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
\ No newline at end of file
diff --git a/solutions/main.py b/solutions/main.py
new file mode 100644
index 0000000..7ce983c
--- /dev/null
+++ b/solutions/main.py
@@ -0,0 +1,92 @@
+# # # from library import download_video
+
+
+# # # def main():
+# # #     url = input("Enter YouTube URL: ")
+# # #     download_video(url)
+
+
+# # # if __name__ == "__main__":
+# # #     main()
+
+# # from library import read_video_urls
+
+
+# # def main():
+# #     urls = read_video_urls("data/video_urls.csv")
+
+# #     print(urls)
+
+
+# # if __name__ == "__main__":
+# #     main()
+
+# import time
+# from library import read_video_urls, download_video
+
+
+# def main():
+#     urls = read_video_urls("data/video_urls.csv")
+
+#     total_start = time.perf_counter()
+
+#     for url in urls:
+#         start = time.perf_counter()
+#         download_video(url)
+#         end = time.perf_counter()
+
+#         elapsed = round(end - start, 2)
+#         print(f"Downloaded one video in: {elapsed}")
+
+#     total_end = time.perf_counter()
+#     serial_time = round(total_end - total_start, 2)
+
+#     print(f"Serial execution: {serial_time}")
+
+#     with open("reports/sequential_report.md", "w") as file:
+#         file.write("# Report\n\n")
+#         file.write("## Serial execution\n\n")
+#         file.write(f"Total time: {serial_time} seconds\n\n")
+#         file.write("## Complexity\n\n")
+#         file.write("Time complexity: O(n), because the program downloads each video one by one.\n\n")
+#         file.write("Space complexity: O(n), because the program stores the list of video URLs from the CSV file.\n")
+
+
+# if __name__ == "__main__":
+#     main()
+
+import time
+from multiprocessing import Pool
+from library import read_video_urls, download_video
+
+
+def main():
+    urls = read_video_urls("data/video_urls.csv")
+
+    # Keep your serial time from Phase 05 here
+    serial_time = 0  # replace 0 with your Phase 05 time
+
+    start = time.perf_counter()
+
+    with Pool() as pool:
+        pool.map(download_video, urls)
+
+    end = time.perf_counter()
+    parallel_time = round(end - start, 2)
+
+    print(f"Parallel execution: {parallel_time}")
+
+    if serial_time > 0:
+        speed_improvement = round(((serial_time - parallel_time) / serial_time) * 100, 2)
+    else:
+        speed_improvement = 0
+
+    with open("reports/sequential_report.md", "a") as file:
+        file.write("\n\n## Parallel execution\n\n")
+        file.write(f"Total time: {parallel_time} seconds\n\n")
+        file.write("## Comparison\n\n")
+        file.write(f"Speed improvement: {speed_improvement}%\n")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/solutions/test_yt_dlp.py b/solutions/test_yt_dlp.py
new file mode 100644
index 0000000..cf1d381
--- /dev/null
+++ b/solutions/test_yt_dlp.py
@@ -0,0 +1,2 @@
+import yt_dlp
+print("Import successful")
\ No newline at end of file