Skip to content

Youtube doc

Shabbir Governor edited this page Nov 29, 2021 · 2 revisions

Goal : Add audience retention score to the overall consolidation score for creative production

Problem Statement:

The consolidation score which is used in the prediction of next scene is a mixture of global performance, cosine similarity, tag_presence, color_presence, our goal was to somehow add audience_retention to as a part of this consolidated score to get clips which are likely to retain the audience mostly. Resulting in increased watchtime of the creative.

Final Approach:

Video clip name is passed through an sql function which returns the audience score ranging between 0-100

Inputs Required:

  • clip_name

Pseudo Code

  • Step 1: Pass the clip names from files_mid, files_end to audience_retention function
  • Step 2: The function calculates the audience retention score based on percent difference
  • Step 3: A score is returned
  • Step 4: A final score is calculated based on the new formula cons_score = cosine_similarity * global_performance * tag_res * color_res * audience_score
  • Step 5: Best scene with highest score is chosen

Code_reference

Code which returns the audience score

def audience(clip_name):
    conn = psycopg2.connect(user = "ds_readonly", password = "blkbox2020!",host = "db.blkbox.ai",port = "5432",database = "blkbox")

    sql = """select asset_name, value1::json->'value' as audience_curve from 
    (SELECT a.video_id as asset_id, b.asset_name, cast(a.date as text) as dates,
    cast(a.metadata ->> 'video_play_curve_actions' as jsonb) ->> 0 as value1 from
    fb_adaccount_video_insights a inner join fb_app_assets b on a.video_id = b.asset_id
        where b.asset_name like '%{asset_name}%')s;""".format(asset_name=clip_name)
    data = pd.read_sql_query(sql, conn)
    conn = None
    data[['asset_name','audience_curve']].head()
    data = data.dropna()
    data['audience_curve'].str.strip('[]').astype(str)
    
    a = list(map(sum, zip(*list(data.audience_curve))))
    a1 = [x/len(list(data.audience_curve)) for x in a] 
    a1 = [x for x in a1 if x != 0]
    a1.append(0)
    res = np.mean(a1)
    #print(res)
    return res

Function which recommends scenes

def retrieve_most_similar_videos(arr,files,given_video,position,tag_list,color_list,nb_closest_videos):

    cos_similarities_df = cos(arr,files)
    print("*********************************")
    print("Original Add Video:")
    print("*********************************")
    print(given_video)
    print("-----------------------------------------------------------------------")
    print("Most Similar Video Recommended:")

    closest_videos = cos_similarities_df[given_video].sort_values(ascending=False)[1:nb_closest_videos+1].index
    closest_videos_scores = cos_similarities_df[given_video].sort_values(ascending=False)[1:nb_closest_videos+1]

    for i in range(0,len(closest_videos)):
        #print(closest_videos[i])
        result = closest_videos[i]
        print(result)

        #Get performance data from the performance dataframe
        try:
          if position == 'mid':
            perf = float(df_mid_performance.query("files_path == '{a}'".format(a=closest_videos[i])).performance)
            print('The global performance of video :',perf)

            #Audience Retention score for the indiviual
            audience_score = float(df_mid_performance.query("files_path == '{a}'".format(a=closest_videos[i])).audience_retention)
            print("The audience_retention score for the clip is :",audience_score)

            #Function call to check tag & color present or not
            tag_check = tag_func(result,position)
            #tag_check = tag_list in tag_check[0]
            tag_check = any(item in tag_check[0] for item in tag_list)
            tag_res = 1 if tag_check == True else 0.8
            print('Tag Score is :',tag_res)

            color_check = color_func(result,position)
            #color_check = color_list in color_check[0]
            color_check = any(item in color_check[0] for item in color_list)
            color_res = 1 if color_check == True else 0.8
            print('Color Score is :',color_res)

            print("Cosine Similarity Score: ",closest_videos_scores[i])
            ''' 
            # Here is the changeg new formula to calculate the new cons score
            final_score = closest_videos_scores[i] * perf * tag_res * color_res * audience_score
            '''
            cons_score.append(final_score)
            print("Consolidated Score :", final_score)
            print()
            
          elif position == 'end':
            perf = float(df_end_performance.query("files_path == '{a}'".format(a=closest_videos[i])).performance)
            print('The global performance of video :',perf)

            #Check Audience retention score for the scene
            audience_score = float(df_end_performance.query("files_path == '{a}'".format(a=closest_videos[i])).audience_retention)
            print("The audience_retention score for the clip is :",audience_score)

            #Function call to check tag & color present or not
            tag_check = tag_func(result,position)
            #tag_check = tag_list in tag_check[0]
            tag_check = any(item in tag_check[0] for item in tag_list)
            tag_res = 1 if tag_check == True else 0.8
            print('Tag score is',tag_res)

            color_check = color_func(result,position)
            #color_check = color_list in color_check[0]
            color_check = any(item in color_check[0] for item in color_list)
            color_res = 1 if color_check == True else 0.8
            print('Color Score is :',color_res)

            print("Cosine Similarity Score: ",closest_videos_scores[i])
            '''
            # Here is the changeg new formula to calculate the new cons score
            final_score = closest_videos_scores[i] * perf * tag_res * color_res * audience_score
            '''
            cons_score.append(final_score)
            print("Consolidated Score :", final_score)

        except:
            cons_score.append(0)

        
       # print("Cosine Similarity Score: ",closest_videos_scores[i])
       # print('The global performance of video :',perf)
       # print('The tag score is :',tag_res)
       # print('The primary color score:',color_res)
        
       # print("Consolidated Score :", final_score)
        print('---------------------------------------------------------------------')

        if position == 'mid':
          M.append(closest_videos[i])
        elif position == 'end':
          E.append(closest_videos[i])
console.log('Hi this is javascript');

Final complete code Link

Click to view the code

Example Results:

Result_Link