ChatGPT Tutorial#

Run on Google Colab View source on GitHub Download notebook


Connect to EvaDB#

%pip install --quiet evadb
import evadb
cursor = evadb.connect().cursor()

Download News Video and ChatGPT UDF#

# Download News Video
!wget -nc "https://www.dropbox.com/s/rfm1kds2mv77pca/russia_ukraine.mp4?dl=0" -O russia_ukraine.mp4

# Download ChatGPT UDF if needed
!wget -nc https://raw.githubusercontent.com/georgia-tech-db/eva/master/evadb/udfs/chatgpt.py -O chatgpt.py
--2023-06-04 16:33:58--  https://www.dropbox.com/s/rfm1kds2mv77pca/russia_ukraine.mp4?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.81.18, 2620:100:6031:18::a27d:5112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.81.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/rfm1kds2mv77pca/russia_ukraine.mp4 [following]
--2023-06-04 16:33:58--  https://www.dropbox.com/s/raw/rfm1kds2mv77pca/russia_ukraine.mp4
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com/cd/0/inline/B9WbTfMnot4ocyTi1x4NyHyJX7D9SCSL9ZWhHTqQu0aHbqLcPPn0OUMN0DjN2yAZ3AJqTwe9JTxAc-tn4B83OPFhyk9z9uw6kYgU3gjnirTDcBaQEGrEgcO9barLYe9r_m4aV3PInAEKmQfPVZcYuH5UEngNpxm027bp7-Tr5zAkGg/file# [following]
--2023-06-04 16:33:59--  https://uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com/cd/0/inline/B9WbTfMnot4ocyTi1x4NyHyJX7D9SCSL9ZWhHTqQu0aHbqLcPPn0OUMN0DjN2yAZ3AJqTwe9JTxAc-tn4B83OPFhyk9z9uw6kYgU3gjnirTDcBaQEGrEgcO9barLYe9r_m4aV3PInAEKmQfPVZcYuH5UEngNpxm027bp7-Tr5zAkGg/file
Resolving uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com (uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com)... 162.125.81.15, 2620:100:6031:15::a27d:510f
Connecting to uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com (uc56e71d89460f5d56c763d04279.dl.dropboxusercontent.com)|162.125.81.15|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5341582 (5.1M) [video/mp4]
Saving to: ‘russia_ukraine.mp4’

russia_ukraine.mp4  100%[===================>]   5.09M  6.21MB/s    in 0.8s    

2023-06-04 16:34:01 (6.21 MB/s) - ‘russia_ukraine.mp4’ saved [5341582/5341582]

--2023-06-04 16:34:01--  https://raw.githubusercontent.com/georgia-tech-db/eva/master/eva/udfs/chatgpt.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3102 (3.0K) [text/plain]
Saving to: ‘chatgpt.py’

chatgpt.py          100%[===================>]   3.03K  --.-KB/s    in 0s      

2023-06-04 16:34:01 (29.7 MB/s) - ‘chatgpt.py’ saved [3102/3102]

Set your OpenAI API key here#

from eva.configuration.configuration_manager import ConfigurationManager
import os

# Assuming that the key is stored as an environment variable
open_ai_key = os.environ.get('OPENAI_KEY')
ConfigurationManager().update_value("third_party", "openai_api_key", open_ai_key)
# Drop the UDF if it already exists
cursor.query("DROP UDF IF EXISTS ChatGPT;").df()

# Register the ChatGPT UDF in EvaDB
create_udf_query = f"""CREATE UDF ChatGPT
                       IMPL 'chatgpt.py' """
cursor.query(create_udf_query).df()
                                  0
0  UDF ChatGPT successfully dropped
0
0 UDF ChatGPT successfully added to the database.


Run the ChatGPT UDF#

OPENAI UDF

#load the video
cursor.drop(item_name="VIDEOS", item_type="TABLE", if_exists=True).df()
cursor.query("LOAD VIDEO 'russia_ukraine.mp4' INTO VIDEOS;").df()
0
0 Number of loaded VIDEO: 1


# Drop the Text Summarization UDF if needed
cursor.query("DROP UDF IF EXISTS SpeechRecognizer;").df()

# Create a Text Summarization UDF using Hugging Face
text_summarizer_udf_creation = """
        CREATE UDF SpeechRecognizer 
        TYPE HuggingFace 
        'task' 'automatic-speech-recognition' 
        'model' 'openai/whisper-base';
        """
cursor.query(text_summarizer_udf_creation).df()
0
0 UDF SpeechRecognizer successfully added to the...


# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS TEXT_SUMMARY;").df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE MATERIALIZED VIEW 
    TEXT_SUMMARY(text) AS 
    SELECT SpeechRecognizer(audio) FROM VIDEOS; 
    """
cursor.query(text_summarization_query).df()


# Run ChatGPT over the Text Summary extracted by Whisper
chatgpt_udf = """
      SELECT ChatGPT('Is this video summary related to Ukraine russia war',text) 
      FROM TEXT_SUMMARY;
      """
cursor.query(chatgpt_udf).df()
chatgpt.response
0 Yes, the video summary is related to the Ukrai...


Check if it works on an SNL Video#

# Download Entertainment Video
!wget -nc "https://www.dropbox.com/s/u66im8jw2s1dmuw/snl.mp4?dl=0" -O snl.mp4

cursor.query("DROP TABLE IF EXISTS SNL_VIDEO;").df()

cursor.query("LOAD VIDEO 'snl.mp4' INTO SNL_VIDEO;").df()
File 'snl.mp4' already there; not retrieving.
0
0 Number of loaded VIDEO: 1


# Drop the table if needed
cursor.query("DROP TABLE IF EXISTS SNL_TEXT_SUMMARY;").df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE MATERIALIZED VIEW 
    SNL_TEXT_SUMMARY(text) AS 
    SELECT SpeechRecognizer(audio) FROM SNL_VIDEO;
    """
cursor.query(text_summarization_query).df()