ChatGPT Tutorial#

Run on Google Colab View source on GitHub Download notebook


Connect to EvaDB#

%pip install --quiet "evadb[document,notebook]"
import evadb
cursor = evadb.connect().cursor()
Note: you may need to restart the kernel to use updated packages.

Download News Video and ChatGPT UDF#

# Download News Video
!wget -nc "https://www.dropbox.com/s/rfm1kds2mv77pca/russia_ukraine.mp4?dl=0" -O russia_ukraine.mp4

# Download ChatGPT UDF if needed
!wget -nc https://raw.githubusercontent.com/georgia-tech-db/eva/master/evadb/udfs/chatgpt.py -O chatgpt.py
File ‘russia_ukraine.mp4’ already there; not retrieving.
File ‘chatgpt.py’ already there; not retrieving.

Visualize Video Frame#

import cv2
from matplotlib import pyplot as plt

def show_video_frame(input_video_path, show_frame_number = 100):
    vcap = cv2.VideoCapture(input_video_path)
    vcap.set(1, show_frame_number) #1: CAP_PROP_POS_FRAMES    
    ret, frame = vcap.read()  # Read the frame
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)    
    plt.imshow(frame)
    plt.show()
    vcap.release()
show_video_frame('russia_ukraine.mp4')
../../_images/5240433cd8ce1881c5855cd5bfab7159b08cb17e230895b5b6f39e9268e37a83.png

Set your OpenAI API key here#

# Set your OpenAI key as an environment variable
import os
#os.environ['OPENAI_KEY'] = 'sk-....................'
open_ai_key = os.environ.get('OPENAI_KEY')
# Drop the UDF if it already exists
cursor.query("DROP UDF IF EXISTS ChatGPT;").df()

# Register the ChatGPT UDF in EvaDB
create_udf_query = f"""CREATE UDF ChatGPT
                       IMPL 'chatgpt.py' """
cursor.query(create_udf_query).df()
0
0 UDF ChatGPT successfully added to the database.

Run the ChatGPT UDF#

OPENAI UDF

#load the video
cursor.drop_table("VIDEOS", if_exists=True).df()
cursor.query("LOAD VIDEO 'russia_ukraine.mp4' INTO VIDEOS;").df()
0
0 Number of loaded VIDEO: 1
# Drop the Text Summarization UDF if needed
cursor.query("DROP UDF IF EXISTS SpeechRecognizer;").df()

# Create a Text Summarization UDF using Hugging Face
text_summarizer_udf_creation = """
        CREATE UDF SpeechRecognizer 
        TYPE HuggingFace 
        'task' 'automatic-speech-recognition' 
        'model' 'openai/whisper-base';
        """
cursor.query(text_summarizer_udf_creation).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
  warnings.warn(
0
0 UDF SpeechRecognizer successfully added to the...

Configure Pandas Display#

import pandas as pd
pd.set_option('display.max_colwidth', None)
# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS TEXT_SUMMARY;").df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE TABLE
    TEXT_SUMMARY AS 
    SELECT SpeechRecognizer(audio) FROM VIDEOS; 
    """
cursor.query(text_summarization_query).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
  warnings.warn(
# Run ChatGPT over the Text Summary extracted by Whisper
chatgpt_udf = """
      SELECT ChatGPT('Is this video summary related to Ukraine russia war',text) 
      FROM TEXT_SUMMARY;
      """
cursor.query(chatgpt_udf).df()
chatgpt.response
0 Yes, the video summary is related to the Ukraine-Russia war as it discusses how US oil companies are profiting from the conflict.

Check if it works on an SNL Video#

# Download Entertainment Video
!wget -nc "https://www.dropbox.com/s/u66im8jw2s1dmuw/snl.mp4?dl=0" -O snl.mp4

cursor.query("DROP TABLE IF EXISTS SNL_VIDEO;").df()

cursor.query("LOAD VIDEO 'snl.mp4' INTO SNL_VIDEO;").df()
File ‘snl.mp4’ already there; not retrieving.
0
0 Number of loaded VIDEO: 1
show_video_frame('snl.mp4')
../../_images/2f573e5d593bda947352b089e1d34581f82aec048f6590ea3532a5b4513d1794.png
# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS SNL_TEXT_SUMMARY;").df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE TABLE
    SNL_TEXT_SUMMARY AS 
    SELECT SpeechRecognizer(audio) FROM SNL_VIDEO;
    """
cursor.query(text_summarization_query).df()
/home/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
  warnings.warn(