Skip to content

Commit

Permalink
add report tables in output folder
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Feb 3, 2024
1 parent ca383a0 commit 67b7d1d
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
model_configuration_id,prompt_variation_id,number_of_answers,model_name
mc019,prompt3,1.5,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc024,prompt2,1.0607142857142857,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
mc021,prompt1,1.167857142857143,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc023,prompt2,1.6535714285714285,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc015,prompt2,1.0613026819923372,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc019,prompt2,1.0,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc018,prompt1,1.7633587786259541,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc016,prompt2,1.0190839694656488,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc015,prompt1,1.0766283524904214,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc021,prompt3,1.042857142857143,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc024,prompt1,1.0571428571428572,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
mc023,prompt3,1.4392857142857143,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc015,prompt3,1.0,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc020,prompt1,1.0214285714285714,"Google Gemini Pro {""temperature"": 0.01}"
mc018,prompt2,1.6641221374045803,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc014,prompt2,1.0240384615384615,"Alibaba Qianwen Plus {""top_p"": 0.1, ""top_k"": 100}"
mc014,prompt1,1.0096153846153846,"Alibaba Qianwen Plus {""top_p"": 0.1, ""top_k"": 100}"
mc021,prompt2,1.1,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc009,prompt2,1.7644787644787645,"Google PaLM (Text Bison) {""temperature"": 0.01}"
mc022,prompt3,1.1321428571428571,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
mc016,prompt3,1.0,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc009,prompt1,1.9034749034749034,"Google PaLM (Text Bison) {""temperature"": 0.01}"
mc023,prompt1,1.4214285714285715,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc024,prompt3,1.05,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
mc017,prompt1,1.1603053435114503,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc020,prompt3,1.0178571428571428,"Google Gemini Pro {""temperature"": 0.01}"
mc020,prompt2,1.0,"Google Gemini Pro {""temperature"": 0.01}"
mc019,prompt1,1.0,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc017,prompt2,1.1488549618320612,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc016,prompt1,1.0458015267175573,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc018,prompt3,1.5,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc017,prompt3,1.0,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc022,prompt1,1.0857142857142856,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
mc022,prompt2,1.1178571428571429,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
model_configuration_id,prompt_variation_id,total_questions_asked,Correct Rate %,Wrong Rate %,Very Wrong Rate %,Indecisive Rate %,model_name
mc019,prompt1,2,50.0,0.0,50.0,0.0,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc021,prompt2,280,82.5,12.5,2.5,2.5,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc022,prompt2,280,65.0,25.0,8.928571428571429,1.0714285714285714,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
mc024,prompt1,280,79.64285714285714,16.785714285714285,3.571428571428571,0.0,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
mc015,prompt2,261,50.191570881226056,36.7816091954023,9.961685823754788,3.065134099616858,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc019,prompt3,2,50.0,50.0,0.0,0.0,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc020,prompt2,280,63.92857142857142,26.071428571428573,9.642857142857144,0.35714285714285715,"Google Gemini Pro {""temperature"": 0.01}"
mc023,prompt1,280,64.64285714285715,15.714285714285714,8.928571428571429,10.714285714285714,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc024,prompt3,280,74.28571428571429,23.57142857142857,2.142857142857143,0.0,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
mc014,prompt1,208,36.53846153846153,32.21153846153847,12.01923076923077,19.230769230769234,"Alibaba Qianwen Plus {""top_p"": 0.1, ""top_k"": 100}"
mc023,prompt3,280,62.142857142857146,21.428571428571427,8.928571428571429,7.5,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc016,prompt2,262,67.93893129770993,14.50381679389313,11.068702290076336,6.488549618320611,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc016,prompt1,262,38.93129770992366,45.80152671755725,15.267175572519085,0.0,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc017,prompt1,262,59.16030534351145,27.099236641221374,2.2900763358778624,11.450381679389313,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc018,prompt2,262,61.06870229007634,13.740458015267176,9.923664122137405,15.267175572519085,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc018,prompt1,262,34.73282442748092,41.603053435114504,17.17557251908397,6.488549618320611,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc016,prompt3,2,50.0,50.0,0.0,0.0,"OpenAI GPT3.5 June 2023 {""temperature"": 0.01}"
mc017,prompt2,262,63.358778625954194,15.267175572519085,0.7633587786259541,20.610687022900763,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc017,prompt3,2,50.0,0.0,0.0,50.0,"OpenAI GPT4 June 2023 {""temperature"": 0.01}"
mc018,prompt3,2,0.0,100.0,0.0,0.0,"OpenAI GPT3.5 June 2023 {""temperature"": 1}"
mc014,prompt2,208,40.38461538461539,21.153846153846153,13.461538461538462,25.0,"Alibaba Qianwen Plus {""top_p"": 0.1, ""top_k"": 100}"
mc020,prompt1,280,53.57142857142857,37.857142857142854,8.571428571428571,0.0,"Google Gemini Pro {""temperature"": 0.01}"
mc023,prompt2,280,65.0,17.857142857142858,3.571428571428571,13.571428571428571,"Alibaba Qianwen Max {""temperature"": 0.01}"
mc015,prompt1,261,31.800766283524908,50.191570881226056,16.091954022988507,1.9157088122605364,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc019,prompt2,2,100.0,0.0,0.0,0.0,"Google PaLM (Chat Bison) {""temperature"": 0.01}"
mc020,prompt3,280,50.357142857142854,39.64285714285714,10.0,0.0,"Google Gemini Pro {""temperature"": 0.01}"
mc021,prompt1,280,78.57142857142857,18.21428571428571,2.5,0.7142857142857143,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc022,prompt1,280,37.142857142857146,48.214285714285715,14.285714285714285,0.35714285714285715,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
mc009,prompt1,259,38.996138996138995,27.7992277992278,22.393822393822393,10.81081081081081,"Google PaLM (Text Bison) {""temperature"": 0.01}"
mc009,prompt2,259,36.293436293436294,42.084942084942085,14.671814671814673,6.94980694980695,"Google PaLM (Text Bison) {""temperature"": 0.01}"
mc015,prompt3,2,50.0,50.0,0.0,0.0,"Meta llama2 (hosted on replicate) {""temperature"": 0.01}"
mc021,prompt3,280,76.42857142857142,21.071428571428573,2.5,0.0,"OpenAI GPT4 Turbo Nov 2023 {""temperature"": 0.01}"
mc022,prompt3,280,35.0,50.0,15.0,0.0,"OpenAI GPT3.5 Nov 2023' {""temperature"": 0.01}"
mc024,prompt2,280,84.28571428571429,13.214285714285715,2.142857142857143,0.35714285714285715,"OpenAI GPT4 Turbo Jan 2024 {""temperature"": 0.01}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
prompt_variation_id,total_questions_asked,Correct Rate %,Wrong Rate %,Very Wrong Rate %,Indecisive Rate %
prompt1,1410,62.5531914893617,27.4468085106383,7.588652482269503,2.4113475177304964
prompt2,1410,72.12765957446808,18.79432624113475,5.319148936170213,3.7588652482269502
prompt3,1410,59.50354609929078,31.27659574468085,7.659574468085106,1.5602836879432624

0 comments on commit 67b7d1d

Please sign in to comment.