Skip to content

Youtube doc

Shabbir Governor edited this page Nov 29, 2021 · 2 revisions

Goal : Add audience retention score to the overall consolidation score for creative production

Problem Statement:

The consolidation score which is used in the prediction of next scene is a mixture of global performance, cosine similarity, tag_presence, color_presence, our goal was to somehow add audience_retention to as a part of this consolidated score to get clips which are likely to retain the audience mostly. Resulting in increased watchtime of the creative.

Final Approach:

Video clip name is passed through an sql function which returns the audience score ranging between 0-100

Inputs Required:

  • clip_name

Pseudo Code

  • Step 1: Pass the clip names from files_mid, files_end to audience_retention function
  • Step 2: The function calculates the audience retention score based on percent difference
  • Step 3: A score is returned
  • Step 4: A final score is calculated based on the new formula cons_score = cosine_similarity * global_performance * tag_res * color_res * audience_score
  • Step 5: Best scene with highest score is chosen


Code which returns the audience score

def audience(clip_name):
    conn = psycopg2.connect(user = "ds_readonly", password = "blkbox2020!",host = "",port = "5432",database = "blkbox")

    sql = """select asset_name, value1::json->'value' as audience_curve from 
    (SELECT a.video_id as asset_id, b.asset_name, cast( as text) as dates,
    cast(a.metadata ->> 'video_play_curve_actions' as jsonb) ->> 0 as value1 from
    fb_adaccount_video_insights a inner join fb_app_assets b on a.video_id = b.asset_id
        where b.asset_name like '%{asset_name}%')s;""".format(asset_name=clip_name)
    data = pd.read_sql_query(sql, conn)
    conn = None
    data = data.dropna()
    a = list(map(sum, zip(*list(data.audience_curve))))
    a1 = [x/len(list(data.audience_curve)) for x in a] 
    a1 = [x for x in a1 if x != 0]
    res = np.mean(a1)
    return res

Function which recommends scenes

def retrieve_most_similar_videos(arr,files,given_video,position,tag_list,color_list,nb_closest_videos):

    cos_similarities_df = cos(arr,files)
    print("Original Add Video:")
    print("Most Similar Video Recommended:")

    closest_videos = cos_similarities_df[given_video].sort_values(ascending=False)[1:nb_closest_videos+1].index
    closest_videos_scores = cos_similarities_df[given_video].sort_values(ascending=False)[1:nb_closest_videos+1]

    for i in range(0,len(closest_videos)):
        result = closest_videos[i]

        #Get performance data from the performance dataframe
          if position == 'mid':
            perf = float(df_mid_performance.query("files_path == '{a}'".format(a=closest_videos[i])).performance)
            print('The global performance of video :',perf)

            #Audience Retention score for the indiviual
            audience_score = float(df_mid_performance.query("files_path == '{a}'".format(a=closest_videos[i])).audience_retention)
            print("The audience_retention score for the clip is :",audience_score)

            #Function call to check tag & color present or not
            tag_check = tag_func(result,position)
            #tag_check = tag_list in tag_check[0]
            tag_check = any(item in tag_check[0] for item in tag_list)
            tag_res = 1 if tag_check == True else 0.8
            print('Tag Score is :',tag_res)

            color_check = color_func(result,position)
            #color_check = color_list in color_check[0]
            color_check = any(item in color_check[0] for item in color_list)
            color_res = 1 if color_check == True else 0.8
            print('Color Score is :',color_res)

            print("Cosine Similarity Score: ",closest_videos_scores[i])
            # Here is the changeg new formula to calculate the new cons score
            final_score = closest_videos_scores[i] * perf * tag_res * color_res * audience_score
            print("Consolidated Score :", final_score)
          elif position == 'end':
            perf = float(df_end_performance.query("files_path == '{a}'".format(a=closest_videos[i])).performance)
            print('The global performance of video :',perf)

            #Check Audience retention score for the scene
            audience_score = float(df_end_performance.query("files_path == '{a}'".format(a=closest_videos[i])).audience_retention)
            print("The audience_retention score for the clip is :",audience_score)

            #Function call to check tag & color present or not
            tag_check = tag_func(result,position)
            #tag_check = tag_list in tag_check[0]
            tag_check = any(item in tag_check[0] for item in tag_list)
            tag_res = 1 if tag_check == True else 0.8
            print('Tag score is',tag_res)

            color_check = color_func(result,position)
            #color_check = color_list in color_check[0]
            color_check = any(item in color_check[0] for item in color_list)
            color_res = 1 if color_check == True else 0.8
            print('Color Score is :',color_res)

            print("Cosine Similarity Score: ",closest_videos_scores[i])
            # Here is the changeg new formula to calculate the new cons score
            final_score = closest_videos_scores[i] * perf * tag_res * color_res * audience_score
            print("Consolidated Score :", final_score)


       # print("Cosine Similarity Score: ",closest_videos_scores[i])
       # print('The global performance of video :',perf)
       # print('The tag score is :',tag_res)
       # print('The primary color score:',color_res)
       # print("Consolidated Score :", final_score)

        if position == 'mid':
        elif position == 'end':
console.log('Hi this is javascript');

Final complete code Link

Click to view the code

Example Results:
