ChatGPT Tutorial#
Run on Google Colab | View source on GitHub | Download notebook |
Connect to EvaDB#
%pip install --quiet "evadb[document,notebook]"
import evadb
cursor = evadb.connect().cursor()
Note: you may need to restart the kernel to use updated packages.
Download News Video and ChatGPT UDF#
# Download News Video
!wget -nc "https://www.dropbox.com/s/rfm1kds2mv77pca/russia_ukraine.mp4?dl=0" -O russia_ukraine.mp4
# Download ChatGPT UDF if needed
!wget -nc https://raw.githubusercontent.com/georgia-tech-db/eva/master/evadb/udfs/chatgpt.py -O chatgpt.py
File ‘russia_ukraine.mp4’ already there; not retrieving.
File ‘chatgpt.py’ already there; not retrieving.
Visualize Video Frame#
import cv2
from matplotlib import pyplot as plt
def show_video_frame(input_video_path, show_frame_number = 100):
vcap = cv2.VideoCapture(input_video_path)
vcap.set(1, show_frame_number) #1: CAP_PROP_POS_FRAMES
ret, frame = vcap.read() # Read the frame
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
plt.imshow(frame)
plt.show()
vcap.release()
show_video_frame('russia_ukraine.mp4')
Set your OpenAI API key here#
# Set your OpenAI key as an environment variable
import os
#os.environ['OPENAI_KEY'] = 'sk-....................'
open_ai_key = os.environ.get('OPENAI_KEY')
# Drop the UDF if it already exists
cursor.query("DROP UDF IF EXISTS ChatGPT;").df()
# Register the ChatGPT UDF in EvaDB
create_udf_query = f"""CREATE UDF ChatGPT
IMPL 'chatgpt.py' """
cursor.query(create_udf_query).df()
0 | |
---|---|
0 | UDF ChatGPT successfully added to the database. |
Run the ChatGPT UDF#
#load the video
cursor.drop_table("VIDEOS", if_exists=True).df()
cursor.query("LOAD VIDEO 'russia_ukraine.mp4' INTO VIDEOS;").df()
0 | |
---|---|
0 | Number of loaded VIDEO: 1 |
# Drop the Text Summarization UDF if needed
cursor.query("DROP UDF IF EXISTS SpeechRecognizer;").df()
# Create a Text Summarization UDF using Hugging Face
text_summarizer_udf_creation = """
CREATE UDF SpeechRecognizer
TYPE HuggingFace
'task' 'automatic-speech-recognition'
'model' 'openai/whisper-base';
"""
cursor.query(text_summarizer_udf_creation).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
0 | |
---|---|
0 | UDF SpeechRecognizer successfully added to the... |
Configure Pandas Display#
import pandas as pd
pd.set_option('display.max_colwidth', None)
# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS TEXT_SUMMARY;").df()
# Create a materialized view of the text summarization output
text_summarization_query = """
CREATE TABLE
TEXT_SUMMARY AS
SELECT SpeechRecognizer(audio) FROM VIDEOS;
"""
cursor.query(text_summarization_query).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
# Run ChatGPT over the Text Summary extracted by Whisper
chatgpt_udf = """
SELECT ChatGPT('Is this video summary related to Ukraine russia war',text)
FROM TEXT_SUMMARY;
"""
cursor.query(chatgpt_udf).df()
chatgpt.response | |
---|---|
0 | Yes, the video summary is related to the Ukraine-Russia war as it discusses how US oil companies are profiting from the conflict. |
Check if it works on an SNL Video#
# Download Entertainment Video
!wget -nc "https://www.dropbox.com/s/u66im8jw2s1dmuw/snl.mp4?dl=0" -O snl.mp4
cursor.query("DROP TABLE IF EXISTS SNL_VIDEO;").df()
cursor.query("LOAD VIDEO 'snl.mp4' INTO SNL_VIDEO;").df()
File ‘snl.mp4’ already there; not retrieving.
0 | |
---|---|
0 | Number of loaded VIDEO: 1 |
show_video_frame('snl.mp4')
# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS SNL_TEXT_SUMMARY;").df()
# Create a materialized view of the text summarization output
text_summarization_query = """
CREATE TABLE
SNL_TEXT_SUMMARY AS
SELECT SpeechRecognizer(audio) FROM SNL_VIDEO;
"""
cursor.query(text_summarization_query).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(