ChatGPT Tutorial#

Run on Google Colab View source on GitHub Download notebook

Start EVA server#

We are reusing the start server notebook for launching the EVA server

!wget -nc "https://raw.githubusercontent.com/georgia-tech-db/eva/master/tutorials/00-start-eva-server.ipynb"
%run 00-start-eva-server.ipynb

cursor = connect_to_server()
File ‘00-start-eva-server.ipynb’ already there; not retrieving.
[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.
nohup eva_server > eva.log 2>&1 &
[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.

Download News Video and ChatGPT UDF#

# Download News Video
!wget -nc "https://www.dropbox.com/s/rfm1kds2mv77pca/russia_ukraine.mp4?dl=0" -O russia_ukraine.mp4

# Download ChatGPT UDF if needed
!wget -nc https://raw.githubusercontent.com/georgia-tech-db/eva/master/eva/udfs/chatgpt.py -O chatgpt.py
File ‘russia_ukraine.mp4’ already there; not retrieving.
File ‘chatgpt.py’ already there; not retrieving.

Visualize Video#

from IPython.display import Video
Video("russia_ukraine.mp4", height=450, width=800, embed=True)

Set your OpenAI API key here#

from eva.configuration.configuration_manager import ConfigurationManager
import os

# Assuming that the key is stored as an environment variable
open_ai_key = os.environ.get('OPENAI_KEY')

ConfigurationManager().update_value("third_party", "openai_api_key", open_ai_key)
# Drop the UDF if it already exists
drop_udf_query = f"DROP UDF IF EXISTS ChatGPT;" 
cursor.execute(drop_udf_query)
response = cursor.fetch_all()
response.as_df()

# Register the ChatGPT UDF in EVA
create_udf_query = f"""CREATE UDF ChatGPT
                       IMPL 'chatgpt.py' """
cursor.execute(create_udf_query)
response = cursor.fetch_all()
response.as_df()
0
0 UDF ChatGPT successfully added to the database.

Run the ChatGPT UDF#

OPENAI UDF

#load the video
cursor.execute("LOAD VIDEO 'russia_ukraine.mp4' INTO VIDEOS;")
response = cursor.fetch_all()
response.as_df()
0
0 Number of loaded VIDEO: 1
# Drop the Text Summarization UDF if needed
cursor.execute("DROP UDF IF EXISTS SpeechRecognizer;")
response = cursor.fetch_all()
response.as_df()

# Create a Text Summarization UDF using Hugging Face
text_summarizer_udf_creation = """
        CREATE UDF SpeechRecognizer 
        TYPE HuggingFace 
        'task' 'automatic-speech-recognition' 
        'model' 'openai/whisper-base';
        """
cursor.execute(text_summarizer_udf_creation)
response = cursor.fetch_all()
response.as_df()
0
0 UDF SpeechRecognizer successfully added to the...
# Drop the table if needed
cursor.execute("DROP TABLE IF EXISTS TEXT_SUMMARY;")
response = cursor.fetch_all()
response.as_df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE MATERIALIZED VIEW 
    TEXT_SUMMARY(text) AS 
    SELECT SpeechRecognizer(audio) FROM VIDEOS; 
    """
cursor.execute(text_summarization_query)
response = cursor.fetch_all()
response.as_df()
# Run ChatGPT over the Text Summary extracted by Whisper
chatgpt_udf = """
      SELECT ChatGPT('Is this video summary related to Ukraine russia war',text) 
      FROM TEXT_SUMMARY;
      """
cursor.execute(chatgpt_udf)
response = cursor.fetch_all()
response.as_df()
chatgpt.response
0 No, this video summary is not related to the U...
1 Yes, the video summary is related to the Ukrai...

Check if it works on an SNL Video#

# Download Entertainment Video
!wget -nc "https://www.dropbox.com/s/u66im8jw2s1dmuw/snl.mp4?dl=0" -O snl.mp4

cursor.execute("DROP TABLE IF EXISTS SNL_VIDEO;")
response = cursor.fetch_all()
response.as_df()

cursor.execute("LOAD VIDEO 'snl.mp4' INTO SNL_VIDEO;")
response = cursor.fetch_all()
response.as_df()
File ‘snl.mp4’ already there; not retrieving.
0
0 Number of loaded VIDEO: 1
from IPython.display import Video
Video("snl.mp4", height=450, width=800, embed=True)
# Drop the table if needed
cursor.execute("DROP TABLE IF EXISTS SNL_TEXT_SUMMARY;")
response = cursor.fetch_all()
response.as_df()


# Create a materialized view of the text summarization output
text_summarization_query = """
    CREATE MATERIALIZED VIEW 
    SNL_TEXT_SUMMARY(text) AS 
    SELECT SpeechRecognizer(audio) FROM SNL_VIDEO;
    """
cursor.execute(text_summarization_query)
response = cursor.fetch_all()
response.as_df()