From 5ccf26b39153d6bb7cc8a465f6a7f3e66554351d Mon Sep 17 00:00:00 2001 From: woodrock Date: Mon, 30 Sep 2024 01:35:51 +1300 Subject: [PATCH] Run clf on new instance recognition dataset --- code/clf/__pycache__/data.cpython-310.pyc | Bin 3312 -> 4016 bytes code/clf/data.py | 23 ++- code/clf/logs/out.log | 231 +++------------------- code/clf/main.py | 3 +- code/gp/data.py | 15 +- code/gp/logs/results_0.log | 126 ++---------- code/transformer/util.py | 4 - 7 files changed, 77 insertions(+), 325 deletions(-) diff --git a/code/clf/__pycache__/data.cpython-310.pyc b/code/clf/__pycache__/data.cpython-310.pyc index 6d1db674563d30236c0acf9bea4d1064a3a47edc..cec28d7903bc105e8d30265efb4a3115770e592f 100644 GIT binary patch delta 1867 zcmaJ?O>7%Q6rP#=vE%%DF`Hx+7N9#Yj>0F+L_%{ zO&DWDT9FRY-845)v1LkhlQHJ;9M1y&!>l;Jr;#il{o)d-J`S_uhQ- zW@i0I@r#|ARyv&^(7NTn8t)d?Ghbzgz|uh!5U-)E|NJBUKDbHC^y}abEpPu!5+0jp z;uEQGrARiCJjYX)NRM_@_tW4m_rI(ITyXL>iQr`S7K0mTwf%XL(#^86x&x}AZDqJ)Sw>Yb4pAd!pcWC6h_C2Mry zSd=x>qxbV?F~8qGSL#BV0^i9;t}2kWzca8(=E}STB_51W0g&Mjqif^|6k*r-DBVDh zN~26V1xYt_zDo|=Abhthz`mkoqtQKb5ce4oLa$M2{Kp>JEAtSx@1}w-TLr4@|MIwU zkH)lR_Wq$T7M#$&(fi?^iT*&>kI}KRx*1%K*Cc0|Ji(cA-NAj*R}9Gr?xypRD@ zQKv*2hjXYXs>eniMUO)8ZES4@BmUNEr$@%eL}?3;9TCG1@VMB4SO=f6gX3ZZ4Hb>w z@qs*kp=xkDeyqD2{3_Wgu?PAl2((|TVidaj)=yQj7p?2pu-&POeg9gws$xG{?-{pW z{%!eERXl;#y<2yGsm;D#6&18VqV{MqSfQw3uOfH~wmlqN>l^Q;(2R#g zRE36pmb-Y~5~goew2&Pf3pI1b<`+Y1hT7~Vn!QCs`p#yp@K%P23CH&Qdh2|9k|QGq zYLYCEI4zUUaMOo<_y;GPR^4>G$t0jH@4!gxPQ#+AdC~UX0v68)Q;8+IJ@_GU=A>#v zM5vpdztpzExZj!qX6>a=#c(l(3p2MJChIM?4s>0RP_bPfLfUQ1<-lABGuV|hy2`TM z4mI0zO*hmRU3+2C3N`PX33(OAaw|guD;nxftJ$=Kco7VN2OZL-Fy63Ce^FQ-Oq;Wo zMo@dWG)=b;9g(sfDki)2spn4XAQ(MgPKn>_b;UT-|g>11=~M@8&TvFUIQ$uIH0#pmz$nA9vM(?+ieOwMS*{W;w4m=XYdhrTx{fkV|7R>6ZoBC{A7#qf>xkkPr!dfmci%l!1vATKR8ThABYX?^qsULTgfkcuxgjBja3O*mF)sh1 z^&HUKR(6MEHKS>;JPra=tipC*vp{ep^Mt)zX61^J<#jX#k)KZNFrm3Mk`=U^HzdNZ z8d)I7z+qM4Z5ZqY5x{};M|E~u_m3cS+jh*Xn!* z0Y&f+UX|y6@0k2)3aNTb3m;OUH8vJEM zSI2Ljy0^04;0F;szVYC%h0i{0@K+Fl>GkYp^cZn&r@{CkaQgJptG}Pej2>Hn5e`F< z1wDvoYat7z&VeO4a6Z9@5_!hoKcxj~MA7g{J6Y7i=V~TDEU)@~GASSW=U2Ut!%nX` z&v}(YQRbCInf-*1V#@uz6Uvjpd~N&6Ar-Hb@;lv- zEwL~SlV*y=$*17FQqGjOfSt#zlcos-C!YOWfJ`0<&b>I9r n#Qb$kIB^)UFyU`%koMzt)Z!+-w$Qze4t2JN{lC8CdhyjK+uIHe diff --git a/code/clf/data.py b/code/clf/data.py index dfa3a267..98da038b 100644 --- a/code/clf/data.py +++ b/code/clf/data.py @@ -22,7 +22,10 @@ def load_dataset( """ logger = logging.getLogger(__name__) - path = ["/", "vol", "ecrg-solar", "woodj4", "fishy-business", "data", "REIMS_data.xlsx"] + # Path for university computers + # path = ["/", "vol", "ecrg-solar", "woodj4", "fishy-business", "data", "REIMS_data.xlsx"] + # Path for home computer + path = ["~/", "Desktop", "fishy-business", "data", "REIMS_data.xlsx"] path = os.path.join(*path) # Load the dataset @@ -86,6 +89,24 @@ def load_dataset( else (1 if 'H' in x else (2 if 'M' in x else None))) + elif dataset == "instance-recognition": + data = data[~data.iloc[:, 0].astype(str).str.contains('QC|HM|MO|fillet|frames|gonads|livers|skins|guts|frame|heads', case=False, na=False)] + X = data.iloc[:, 1:].to_numpy() + # Take only the class label column. + y = data.iloc[:, 0].to_numpy() + features = list() + labels = list() + + all_possible_pairs = [((a, a_idx), (b, b_idx)) for a_idx, a in enumerate(X) for b_idx, b in enumerate(X[a_idx + 1:])] + for (a, a_idx), (b, b_idx) in all_possible_pairs: + concatenated = np.concatenate((a, b)) + label = int(y[a_idx] == y[b_idx]) + features.append(concatenated) + labels.append(label) + X,y = np.array(features), np.array(labels) + # We don't want onehot encoding for multi-tree GP. + # y = np.eye(2)[y] + return X,y else: # Return an excpetion if the dataset is not valid. raise ValueError(f"No valid dataset was specified: {dataset}") diff --git a/code/clf/logs/out.log b/code/clf/logs/out.log index e9e2a5dd..075efd33 100644 --- a/code/clf/logs/out.log +++ b/code/clf/logs/out.log @@ -1,217 +1,50 @@ -INFO:data:Reading dataset fish: species -INFO:data:Class Counts: [48 60], Class Ratios: [0.44444444 0.55555556] -INFO:data:Number of features: 1023 -INFO:data:Number of instances: 108 -INFO:data:Number of classes 2. +INFO:data:Reading dataset fish: instance-recognition INFO:__main__:Running 30 experiments INFO:__main__:Classifier: knn-n2 -INFO:__main__:training: 0.9527777777777777 +\- 0.011002069109551964 -INFO:__main__:test: 0.8758333333333336 +\- 0.05628753891437283 +INFO:__main__:training: 0.5286821705426356 +\- 0.013081036457896262 +INFO:__main__:test: 0.49884353741496595 +\- 0.0009024829361511283 INFO:__main__:Classifier: knn-n3 -INFO:__main__:training: 0.9534173976608188 +\- 0.01723183051719675 -INFO:__main__:test: 0.8766666666666667 +\- 0.055377492419453826 +INFO:__main__:training: 0.5575187463873118 +\- 0.0161101153403825 +INFO:__main__:test: 0.49665120593692014 +\- 0.006210058412903558 INFO:__main__:Classifier: knn-n5 -INFO:__main__:training: 0.9324926900584796 +\- 0.024361990937911384 -INFO:__main__:test: 0.8369444444444445 +\- 0.06914435159884495 +INFO:__main__:training: 0.5028879730147041 +\- 0.0028623156569091267 +INFO:__main__:test: 0.4995918367346938 +\- 0.0005650764532597398 INFO:__main__:Classifier: knn-n10 -INFO:__main__:training: 0.8787097953216372 +\- 0.030790937763697183 -INFO:__main__:test: 0.7755555555555556 +\- 0.07468865417001698 +INFO:__main__:training: 0.5 +\- 0.0 +INFO:__main__:test: 0.5 +\- 0.0 INFO:__main__:Classifier: knn-n20 -INFO:__main__:training: 0.8241959064327486 +\- 0.033395414033700305 -INFO:__main__:test: 0.7627777777777779 +\- 0.09957626272744641 +INFO:__main__:training: 0.5 +\- 0.0 +INFO:__main__:test: 0.5 +\- 0.0 INFO:__main__:Classifier: dt INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.9913888888888888 +\- 0.017278135044537787 +INFO:__main__:test: 0.5228324056895485 +\- 0.03321271992135387 INFO:__main__:Classifier: lda-lsqr -INFO:__main__:training: 0.5 +\- 0.22360679774997896 -INFO:__main__:test: 0.49666666666666665 +\- 0.0842614977317636 +INFO:__main__:training: 0.5758153787680832 +\- 0.010202436867029334 +INFO:__main__:test: 0.5414100185528757 +\- 0.02779486220462678 INFO:__main__:Classifier: lda-svd -INFO:__main__:training: 0.9867690058479531 +\- 0.007708956334230479 -INFO:__main__:test: 0.9647222222222224 +\- 0.03674759587332395 +INFO:__main__:training: 0.5753616647267026 +\- 0.009965074512577594 +INFO:__main__:test: 0.5407884972170687 +\- 0.026418540943669976 INFO:__main__:Classifier: nb -INFO:__main__:training: 0.93406432748538 +\- 0.06991194784028316 -INFO:__main__:test: 0.8797222222222222 +\- 0.09571087183624606 +INFO:__main__:training: 0.6300918117680595 +\- 0.012486962333534867 +INFO:__main__:test: 0.5394124922696351 +\- 0.03679158695133945 INFO:__main__:Classifier: rf -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.9569444444444443 +\- 0.043678292204212175 -INFO:__main__:Classifier: svm-linear -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.9597222222222223 +\- 0.0506478096000602 -INFO:__main__:Classifier: svm-rbf -INFO:__main__:training: 0.6612024853801169 +\- 0.10198463707186878 -INFO:__main__:test: 0.605 +\- 0.1043187778274238 -INFO:__main__:Classifier: svm-poly -INFO:__main__:training: 0.5811403508771928 +\- 0.012749510398025113 -INFO:__main__:test: 0.5719444444444445 +\- 0.05237345124260695 -INFO:__main__:Classifier: svm-sigmoid -INFO:__main__:training: 0.507986111111111 +\- 0.0269201747117072 -INFO:__main__:test: 0.5016666666666667 +\- 0.008975274678557512 -INFO:__main__:Classifier: lor -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.9716666666666665 +\- 0.03906310184721544 -INFO:__main__:Classifier: ensemble -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.9752777777777778 +\- 0.03425688179938864 -INFO:data:Reading dataset fish: part -INFO:data:Class Counts: [6 6 3 6 6 3], Class Ratios: [0.2 0.2 0.1 0.2 0.2 0.1] -INFO:data:Number of features: 1023 -INFO:data:Number of instances: 30 -INFO:data:Number of classes 6. -INFO:__main__:Running 30 experiments -INFO:__main__:Classifier: knn-n2 -INFO:__main__:training: 0.7288888888888889 +\- 0.09802997777827903 -INFO:__main__:test: 0.3777777777777778 +\- 0.21052550357218244 -INFO:__main__:Classifier: knn-n3 -INFO:__main__:training: 0.6155555555555555 +\- 0.09218874894529575 -INFO:__main__:test: 0.23888888888888885 +\- 0.13391078659104388 -INFO:__main__:Classifier: knn-n5 -INFO:__main__:training: 0.428888888888889 +\- 0.05373689906375 -INFO:__main__:test: 0.3166666666666666 +\- 0.14497764834110988 -INFO:__main__:Classifier: knn-n10 -INFO:__main__:training: 0.3100000000000001 +\- 0.036666666666666674 -INFO:__main__:test: 0.21111111111111114 +\- 0.14865653511399612 -INFO:__main__:Classifier: knn-n20 -INFO:__main__:training: 0.23111111111111113 +\- 0.04121608220220313 -INFO:__main__:test: 0.21111111111111114 +\- 0.07370277311900889 -INFO:__main__:Classifier: dt -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.26111111111111107 +\- 0.13391078659104388 -INFO:__main__:Classifier: lda-lsqr -INFO:__main__:training: 0.1611111111111111 +\- 0.19945914523351377 -INFO:__main__:test: 0.17777777777777776 +\- 0.17177360926378119 -INFO:__main__:Classifier: lda-svd -INFO:__main__:training: 0.756111111111111 +\- 0.032054159415003676 -INFO:__main__:test: 0.4555555555555555 +\- 0.16063146994223287 -INFO:__main__:Classifier: nb -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.45 +\- 0.15605079894653484 -INFO:__main__:Classifier: rf -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.39444444444444443 +\- 0.16377114414426308 +INFO:__main__:training: 0.9994186046511627 +\- 0.0017441860465116311 +INFO:__main__:test: 0.5148453927025355 +\- 0.015902287334217096 INFO:__main__:Classifier: svm-linear -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.5611111111111111 +\- 0.14582671942674097 +INFO:__main__:training: 0.5284319547711238 +\- 0.012751572179624037 +INFO:__main__:test: 0.5182776747062462 +\- 0.02048959807121518 INFO:__main__:Classifier: svm-rbf -INFO:__main__:training: 0.45333333333333337 +\- 0.0678232998312527 -INFO:__main__:test: 0.29999999999999993 +\- 0.1319371343004213 +INFO:__main__:training: 0.5 +\- 0.0 +INFO:__main__:test: 0.5 +\- 0.0 INFO:__main__:Classifier: svm-poly -INFO:__main__:training: 0.4099999999999999 +\- 0.0636250591209796 -INFO:__main__:test: 0.26111111111111107 +\- 0.12680791345014805 +INFO:__main__:training: 0.5 +\- 0.0 +INFO:__main__:test: 0.5 +\- 0.0 INFO:__main__:Classifier: svm-sigmoid -INFO:__main__:training: 0.23777777777777778 +\- 0.038232556742411675 -INFO:__main__:test: 0.2 +\- 0.13877773329774218 +INFO:__main__:training: 0.49999386337902146 +\- 0.0025073340422464506 +INFO:__main__:test: 0.5010420531849104 +\- 0.00947277334452803 INFO:__main__:Classifier: lor -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.5166666666666667 +\- 0.13158576980363346 +INFO:__main__:training: 0.5345137420718816 +\- 0.011747930016377086 +INFO:__main__:test: 0.5158256029684601 +\- 0.015391314796416273 INFO:__main__:Classifier: ensemble -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.48888888888888893 +\- 0.12120791238484127 -INFO:data:Reading dataset fish: oil -INFO:data:Class Counts: [18 18 18 18 18 18 18], Class Ratios: [0.14285714 0.14285714 0.14285714 0.14285714 0.14285714 0.14285714 - 0.14285714] -INFO:data:Number of features: 1023 -INFO:data:Number of instances: 126 -INFO:data:Number of classes 7. -INFO:__main__:Running 30 experiments -INFO:__main__:Classifier: knn-n2 -INFO:__main__:training: 0.6419501133786848 +\- 0.030262722860613895 -INFO:__main__:test: 0.3027777777777778 +\- 0.07336549318875274 -INFO:__main__:Classifier: knn-n3 -INFO:__main__:training: 0.5841950113378686 +\- 0.03290561971494403 -INFO:__main__:test: 0.2944444444444444 +\- 0.06425141158098126 -INFO:__main__:Classifier: knn-n5 -INFO:__main__:training: 0.5170068027210885 +\- 0.030877660177486103 -INFO:__main__:test: 0.3079365079365079 +\- 0.08741334837670278 -INFO:__main__:Classifier: knn-n10 -INFO:__main__:training: 0.4426984126984127 +\- 0.03441248354836296 -INFO:__main__:test: 0.3194444444444444 +\- 0.09343953168834142 -INFO:__main__:Classifier: knn-n20 -INFO:__main__:training: 0.3720861678004536 +\- 0.028860690645296887 -INFO:__main__:test: 0.30952380952380953 +\- 0.07056354517585789 -INFO:__main__:Classifier: dt -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.2849206349206349 +\- 0.0718199156024838 -INFO:__main__:Classifier: lda-lsqr -INFO:__main__:training: 0.13809523809523808 +\- 0.04492371967646001 -INFO:__main__:test: 0.14603174603174604 +\- 0.05152010275275391 -INFO:__main__:Classifier: lda-svd -INFO:__main__:training: 0.7148979591836737 +\- 0.03246608884501496 -INFO:__main__:test: 0.3186507936507937 +\- 0.06654490960021799 -INFO:__main__:Classifier: nb -INFO:__main__:training: 0.6189342403628119 +\- 0.028449195034018385 -INFO:__main__:test: 0.32579365079365075 +\- 0.0684210303179565 -INFO:__main__:Classifier: rf -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.3845238095238096 +\- 0.07592650203494354 -INFO:__main__:Classifier: svm-linear -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.35634920634920625 +\- 0.07806070987896381 -INFO:__main__:Classifier: svm-rbf -INFO:__main__:training: 0.35482993197278917 +\- 0.01953743391506102 -INFO:__main__:test: 0.30476190476190473 +\- 0.07160032261124785 -INFO:__main__:Classifier: svm-poly -INFO:__main__:training: 0.3606802721088435 +\- 0.035413113026182726 -INFO:__main__:test: 0.26626984126984127 +\- 0.0616664623804097 -INFO:__main__:Classifier: svm-sigmoid -INFO:__main__:training: 0.18571428571428578 +\- 0.03759593422960956 -INFO:__main__:test: 0.1841269841269841 +\- 0.057318952651643766 -INFO:__main__:Classifier: lor -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.31071428571428567 +\- 0.08209683550907845 -INFO:__main__:Classifier: ensemble -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.3873015873015873 +\- 0.0827492760136064 -INFO:data:Reading dataset fish: cross-species -INFO:data:Class Counts: [45 60 48], Class Ratios: [0.29411765 0.39215686 0.31372549] -INFO:data:Number of features: 1023 -INFO:data:Number of instances: 153 -INFO:data:Number of classes 3. -INFO:__main__:Running 30 experiments -INFO:__main__:Classifier: knn-n2 -INFO:__main__:training: 0.8351730019493178 +\- 0.024011699076368492 -INFO:__main__:test: 0.6511728395061727 +\- 0.06056901978402761 -INFO:__main__:Classifier: knn-n3 -INFO:__main__:training: 0.8680636777128005 +\- 0.01949741398622453 -INFO:__main__:test: 0.6868518518518518 +\- 0.06897305027603483 -INFO:__main__:Classifier: knn-n5 -INFO:__main__:training: 0.7919022092267707 +\- 0.03072416164416948 -INFO:__main__:test: 0.6419135802469135 +\- 0.07088906196899114 -INFO:__main__:Classifier: knn-n10 -INFO:__main__:training: 0.6974577647823262 +\- 0.03379281965459206 -INFO:__main__:test: 0.6105555555555555 +\- 0.07624016573639837 -INFO:__main__:Classifier: knn-n20 -INFO:__main__:training: 0.6808723196881094 +\- 0.025882377795062247 -INFO:__main__:test: 0.5770987654320988 +\- 0.09217248315752967 -INFO:__main__:Classifier: dt -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.6985185185185185 +\- 0.05968289929470005 -INFO:__main__:Classifier: lda-lsqr -INFO:__main__:training: 0.3222222222222223 +\- 0.10482201257840669 -INFO:__main__:test: 0.3219753086419753 +\- 0.0745382575367192 -INFO:__main__:Classifier: lda-svd -INFO:__main__:training: 0.9090358999350227 +\- 0.012567308814802999 -INFO:__main__:test: 0.8137037037037037 +\- 0.06605099801878506 -INFO:__main__:Classifier: nb -INFO:__main__:training: 0.678646848602989 +\- 0.054130418439003306 -INFO:__main__:test: 0.5570987654320987 +\- 0.08341160019138115 -INFO:__main__:Classifier: rf -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.8078395061728398 +\- 0.0547381916355756 -INFO:__main__:Classifier: svm-linear -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.8553703703703703 +\- 0.058444143234135916 -INFO:__main__:Classifier: svm-rbf -INFO:__main__:training: 0.5070622157244965 +\- 0.08520565587822512 -INFO:__main__:test: 0.44271604938271597 +\- 0.1126984692793106 -INFO:__main__:Classifier: svm-poly -INFO:__main__:training: 0.4406595191682911 +\- 0.023277377156510872 -INFO:__main__:test: 0.395925925925926 +\- 0.0492685408502482 -INFO:__main__:Classifier: svm-sigmoid -INFO:__main__:training: 0.34616228070175453 +\- 0.02149617147348475 -INFO:__main__:test: 0.3472222222222223 +\- 0.02703428871538132 -INFO:__main__:Classifier: lor -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.865185185185185 +\- 0.05623128149657574 -INFO:__main__:Classifier: ensemble -INFO:__main__:training: 1.0 +\- 0.0 -INFO:__main__:test: 0.8593209876543211 +\- 0.05621315199801734 +INFO:__main__:training: 0.5856824714350192 +\- 0.009988811576600533 +INFO:__main__:test: 0.5148206555349413 +\- 0.01599609897689686 diff --git a/code/clf/main.py b/code/clf/main.py index 91ad1182..3ac1a0db 100644 --- a/code/clf/main.py +++ b/code/clf/main.py @@ -14,7 +14,8 @@ from data import load_dataset if __name__ == "__main__": - datasets = ["species", "part", "oil", "cross-species"] + # datasets = ["species", "part", "oil", "cross-species"] + datasets = ["instance-recognition"] logger = logging.getLogger(__name__) # Run argument for numbered log files. output = f"logs/out.log" diff --git a/code/gp/data.py b/code/gp/data.py index 120d69ab..92a55cd7 100644 --- a/code/gp/data.py +++ b/code/gp/data.py @@ -22,7 +22,10 @@ def load_dataset( """ logger = logging.getLogger(__name__) - path = ["/", "vol", "ecrg-solar", "woodj4", "fishy-business", "data", "REIMS_data.xlsx"] + # Path for university computers + # path = ["/", "vol", "ecrg-solar", "woodj4", "fishy-business", "data", "REIMS_data.xlsx"] + # Path for home computer + path = ["~/", "Desktop", "fishy-business", "data", "REIMS_data.xlsx"] path = os.path.join(*path) @@ -83,13 +86,15 @@ def load_dataset( features = list() labels = list() - for i, (x_1, x_2) in enumerate(zip(X, X[1:])): - concatenated = np.concatenate((x_1, x_2)) + all_possible_pairs = [((a, a_idx), (b, b_idx)) for a_idx, a in enumerate(X) for b_idx, b in enumerate(X[a_idx + 1:])] + for (a, a_idx), (b, b_idx) in all_possible_pairs: + concatenated = np.concatenate((a, b)) + label = int(y[a_idx] == y[b_idx]) features.append(concatenated) - label = int(y[i] == y[i+1]) labels.append(label) - X,y = np.array(features), np.array(labels) + # We don't want onehot encoding for multi-tree GP. + # y = np.eye(2)[y] return X,y else: # Return an excpetion if the dataset is not valid. diff --git a/code/gp/logs/results_0.log b/code/gp/logs/results_0.log index acd2aa2f..43171587 100644 --- a/code/gp/logs/results_0.log +++ b/code/gp/logs/results_0.log @@ -1,117 +1,13 @@ INFO:data:Reading dataset fish: instance-recognition INFO:__main__:No model found. Train from scratch. -INFO:gp: fitness size - ------------------------------------------------------------------------- ----------------------------------------------- -gen nevals avg gen max min nevals std avg gen max min nevals std -0 1023 0.501865 0 0.736077 0.279776 1023 0.0637286 3.53275 0 7 2 1023 1.35153 -INFO:gp:1 766 0.557746 1 0.762395 0.320005 766 0.0648651 3.64321 1 10 2 766 1.4337 -INFO:gp:2 789 0.586646 2 0.810396 0.311427 789 0.0801738 3.6999 2 12 2 789 1.4446 -INFO:gp:3 773 0.623747 3 0.810396 0.336792 773 0.0843636 3.93451 3 10 2 773 1.48456 -INFO:gp:4 777 0.657198 4 0.845924 0.299175 777 0.0925178 3.9306 4 12 2 777 1.49357 -INFO:gp:5 781 0.686689 5 0.845924 0.330069 781 0.0950191 3.65689 5 14 1 781 1.31929 -INFO:gp:6 786 0.698472 6 0.845924 0.33412 786 0.0980579 3.51711 6 11 1 786 1.24334 -INFO:gp:7 784 0.711305 7 0.867492 0.368732 784 0.105549 3.42717 7 11 2 784 1.04087 -INFO:gp:8 758 0.75403 8 0.867492 0.430717 758 0.0978027 3.64418 8 11 2 758 1.13096 -INFO:gp:9 768 0.78853 9 0.867492 0.471543 768 0.0866798 4.19844 9 12 2 768 1.35011 -INFO:gp:10 781 0.788459 10 0.867492 0.468219 781 0.0966201 4.66667 10 12 2 781 1.17623 -INFO:gp:11 774 0.790583 11 0.867492 0.373275 774 0.100658 4.67155 11 12 2 774 1.29577 -INFO:gp:12 770 0.79208 12 0.867492 0.368358 770 0.0989054 4.67644 12 14 2 770 1.60763 -INFO:gp:13 752 0.802522 13 0.867492 0.372834 752 0.10012 4.65591 13 12 2 752 1.60792 -INFO:gp:14 774 0.809461 14 0.867492 0.449962 774 0.0963298 4.41544 14 12 2 774 1.33618 -INFO:gp:15 785 0.805227 15 0.867538 0.414169 785 0.10107 4.37146 15 12 2 785 1.25484 -INFO:gp:16 772 0.807623 16 0.867538 0.410934 772 0.101041 4.33138 16 12 2 772 1.22879 -INFO:gp:17 756 0.810934 17 0.867538 0.402035 756 0.0939553 4.41153 17 12 2 756 1.25519 -INFO:gp:18 786 0.806602 18 0.869455 0.35615 786 0.0976673 4.48583 18 12 2 786 1.32114 -INFO:gp:19 766 0.812057 19 0.869455 0.404119 766 0.0924966 4.75367 19 12 2 766 1.46977 -INFO:gp:20 793 0.809269 20 0.869455 0.467838 793 0.0928612 5.37146 20 12 2 793 1.54656 -INFO:gp:21 769 0.811888 21 0.869455 0.433064 769 0.0895481 5.63832 21 12 2 769 1.57764 -INFO:gp:22 789 0.807402 22 0.869455 0.410249 789 0.0980531 5.20528 22 14 2 789 1.56729 -INFO:gp:23 763 0.800998 23 0.869455 0.385224 763 0.104133 4.55327 23 14 2 763 1.35666 -INFO:gp:24 781 0.810686 24 0.869455 0.414845 781 0.0994444 4.34409 24 14 2 781 1.21045 -INFO:gp:25 762 0.80059 25 0.869455 0.385503 762 0.109398 4.32258 25 14 2 762 1.23865 -INFO:gp:26 782 0.801258 26 0.869455 0.439678 782 0.107063 4.34018 26 14 2 782 1.2737 -INFO:gp:27 768 0.806303 27 0.869455 0.446962 768 0.103012 4.30401 27 14 2 768 1.17918 -INFO:gp:28 755 0.802747 28 0.869455 0.407663 755 0.107816 4.30987 28 14 2 755 1.18428 -INFO:gp:29 760 0.806014 29 0.869455 0.373552 760 0.104221 4.31672 29 14 2 760 1.2378 -INFO:gp:30 793 0.803594 30 0.869455 0.403604 793 0.103339 4.30792 30 11 2 793 1.20686 -INFO:gp:31 766 0.803579 31 0.869455 0.3295 766 0.105355 4.31867 31 11 2 766 1.15475 -INFO:gp:32 761 0.808152 32 0.869455 0.463949 761 0.102964 4.31574 32 11 2 761 1.15682 -INFO:gp:33 740 0.802555 33 0.869455 0.407806 740 0.107038 4.30792 33 11 2 740 1.16146 -INFO:gp:34 771 0.803168 34 0.869455 0.359145 771 0.10551 4.29912 34 11 2 771 1.16249 -INFO:gp:35 789 0.798732 35 0.869455 0.426531 789 0.109702 4.34995 35 11 2 789 1.18673 -INFO:gp:36 764 0.810387 36 0.869455 0.392511 764 0.0972804 4.30596 36 11 2 764 1.19678 -INFO:gp:37 780 0.80235 37 0.869455 0.345097 780 0.107269 4.3304 37 11 2 780 1.22467 -INFO:gp:38 770 0.811491 38 0.869455 0.449084 770 0.0981664 4.34213 38 11 2 770 1.2616 -INFO:gp:39 789 0.80783 39 0.869455 0.443975 789 0.101757 4.30694 39 11 2 789 1.15792 -INFO:gp:40 785 0.80277 40 0.869455 0.342758 785 0.104347 4.30694 40 11 2 785 1.17635 -INFO:gp:41 763 0.805446 41 0.869455 0.466835 763 0.103197 4.28837 41 11 2 763 1.16058 -INFO:gp:42 797 0.804897 42 0.869455 0.452513 797 0.102768 4.32454 42 11 2 797 1.20534 -INFO:gp:43 787 0.806813 43 0.869455 0.424232 787 0.102053 4.28739 43 11 2 787 1.14429 -INFO:gp:44 753 0.808327 44 0.869455 0.333887 753 0.103054 4.34115 44 11 2 753 1.16981 -INFO:gp:45 761 0.806137 45 0.869455 0.415001 761 0.105252 4.30987 45 11 2 761 1.17849 -INFO:gp:46 782 0.805522 46 0.869455 0.386334 782 0.103385 4.30694 46 11 2 782 1.14177 -INFO:gp:47 764 0.804044 47 0.889873 0.404539 764 0.105987 4.29912 47 11 2 764 1.12315 -INFO:gp:48 753 0.810456 48 0.889873 0.3726 753 0.101331 4.27468 48 11 2 753 1.12634 -INFO:gp:49 773 0.806837 49 0.889873 0.479256 773 0.100893 4.30499 49 11 2 773 1.18925 -INFO:gp:50 778 0.805549 50 0.889873 0.403292 778 0.10353 4.33333 50 11 2 778 1.20982 -INFO:gp:51 767 0.802045 51 0.889873 0.439696 767 0.108673 4.30694 51 11 2 767 1.19121 -INFO:gp:52 748 0.805231 52 0.889873 0.443649 748 0.11465 4.32356 52 11 2 748 1.1698 -INFO:gp:53 766 0.804349 53 0.889873 0.45728 766 0.122701 4.30205 53 11 2 766 1.14691 -INFO:gp:54 751 0.794958 54 0.889873 0.448979 751 0.131436 4.33138 54 11 2 751 1.2055 -INFO:gp:55 775 0.803678 55 0.889873 0.459016 775 0.124598 4.33138 55 11 2 775 1.20469 -INFO:gp:56 787 0.795699 56 0.889873 0.465565 787 0.131824 4.33431 56 11 2 787 1.1375 -INFO:gp:57 770 0.798089 57 0.889873 0.469345 770 0.129428 4.32356 57 11 2 770 1.19869 -INFO:gp:58 784 0.787373 58 0.889873 0.428091 784 0.134831 4.32942 58 11 2 784 1.20685 -INFO:gp:59 768 0.803946 59 0.889873 0.419398 768 0.127704 4.31281 59 11 2 768 1.15551 -INFO:gp:60 787 0.795104 60 0.889873 0.454183 787 0.131731 4.31867 60 11 2 787 1.19059 -INFO:gp:61 772 0.79597 61 0.889873 0.47084 772 0.131764 4.31769 61 11 2 772 1.16973 -INFO:gp:62 775 0.79329 62 0.889873 0.428632 775 0.130584 4.31476 62 11 2 775 1.19899 -INFO:gp:63 788 0.792645 63 0.889873 0.421921 788 0.130523 4.31281 63 11 2 788 1.18722 -INFO:gp:64 762 0.795894 64 0.889873 0.451727 762 0.131821 4.30108 64 11 2 762 1.14333 -INFO:gp:65 768 0.799057 65 0.889873 0.454902 768 0.129439 4.33431 65 11 2 768 1.1747 -INFO:gp:66 788 0.802954 66 0.889873 0.46034 788 0.127476 4.31867 66 11 2 788 1.1454 -INFO:gp:67 748 0.804383 67 0.889873 0.477406 748 0.126883 4.35777 67 11 2 748 1.23213 -INFO:gp:68 780 0.793975 68 0.889873 0.447952 780 0.132133 4.35386 68 11 2 780 1.22131 -INFO:gp:69 794 0.79447 69 0.889873 0.473449 794 0.130965 4.33529 69 11 2 794 1.17982 -INFO:gp:70 753 0.800463 70 0.889873 0.464626 753 0.127932 4.31476 70 11 2 753 1.20793 -INFO:gp:71 804 0.795542 71 0.889873 0.401947 804 0.130554 4.35582 71 11 2 804 1.203 -INFO:gp:72 778 0.804389 72 0.889873 0.46051 778 0.123986 4.34506 72 11 2 778 1.21862 -INFO:gp:73 795 0.798499 73 0.889873 0.450908 795 0.130707 4.35875 73 11 2 795 1.24722 -INFO:gp:74 734 0.80068 74 0.889873 0.428069 734 0.130476 4.33236 74 11 2 734 1.18849 -INFO:gp:75 769 0.804122 75 0.889873 0.462358 769 0.12591 4.31281 75 11 2 769 1.17563 -INFO:gp:76 758 0.797969 76 0.889873 0.449392 758 0.129243 4.31672 76 11 2 758 1.17625 -INFO:gp:77 776 0.799807 77 0.889873 0.433596 776 0.130008 4.30108 77 11 2 776 1.13819 -INFO:gp:78 766 0.801384 78 0.889873 0.447564 766 0.129799 4.34702 78 11 2 766 1.21001 -INFO:gp:79 764 0.799078 79 0.889873 0.44742 764 0.126424 4.31965 79 11 2 764 1.13871 -INFO:gp:80 773 0.795323 80 0.889873 0.466794 773 0.129665 4.30401 80 11 2 773 1.19483 -INFO:gp:81 756 0.795613 81 0.889873 0.460847 756 0.130983 4.34311 81 11 2 756 1.1916 -INFO:gp:82 784 0.801331 82 0.889873 0.460884 784 0.127892 4.33333 82 11 2 784 1.21627 -INFO:gp:83 778 0.794375 83 0.889873 0.476683 778 0.13003 4.32063 83 11 2 778 1.19253 -INFO:gp:84 760 0.807523 84 0.889873 0.44281 760 0.125488 4.35582 84 11 2 760 1.21994 -INFO:gp:85 779 0.796581 85 0.889873 0.465778 779 0.129484 4.3089 85 11 2 779 1.19317 -INFO:gp:86 770 0.792887 86 0.889873 0.447327 770 0.132521 4.32942 86 11 2 770 1.21894 -INFO:gp:87 745 0.80765 87 0.889873 0.436411 745 0.126061 4.34213 87 11 2 745 1.22784 -INFO:gp:88 775 0.796157 88 0.889873 0.45036 775 0.130186 4.32942 88 11 2 775 1.23329 -INFO:gp:89 788 0.794953 89 0.889873 0.460788 788 0.13054 4.32258 89 11 2 788 1.20099 -INFO:gp:90 770 0.792923 90 0.889873 0.421043 770 0.132837 4.33627 90 11 2 770 1.22308 -INFO:gp:91 783 0.790215 91 0.889873 0.45218 783 0.132892 4.34115 91 11 2 783 1.24353 -INFO:gp:92 762 0.783876 92 0.889873 0.473892 762 0.138048 4.32454 92 11 2 762 1.21744 -INFO:gp:93 794 0.798174 93 0.889873 0.45131 794 0.128862 4.31378 93 11 2 794 1.19884 -INFO:gp:94 764 0.800137 94 0.903624 0.425581 764 0.129707 4.33138 94 11 2 764 1.14989 -INFO:gp:95 791 0.794952 95 0.903624 0.467236 791 0.130984 4.34409 95 11 2 791 1.21045 -INFO:gp:96 792 0.795872 96 0.903624 0.39439 792 0.129799 4.34115 96 11 2 792 1.24902 -INFO:gp:97 764 0.799758 97 0.903624 0.472564 764 0.130615 4.33236 97 11 2 764 1.18272 -INFO:gp:98 776 0.79192 98 0.903624 0.460086 776 0.131029 4.3089 98 11 2 776 1.19726 -INFO:gp:99 776 0.788471 99 0.903624 0.455255 776 0.135591 4.31672 99 11 2 776 1.18371 -INFO:gp:100 767 0.796592 100 0.903624 0.459739 767 0.129333 4.32747 100 11 2 767 1.22745 -INFO:__main__:Saving model to file: checkpoints/embedded-gp.pth -INFO:util:Train accuracy: 0.9868421052631579 +- 0.013157894736842146 -INFO:util:Val accuracy: 0.9868421052631579 +- 0.013157894736842146 -INFO:util:Test accuracy: 0.95 +- 0.0 -INFO:util:Train intra-class: 0.12105066702668055, Train Inter-class: 0.26254886532460786 -INFO:util:Val intra-class: 0.140120537624651, Val Inter-class: 0.28263808140300345 -INFO:util:Test intra-class: 0.1658756121532309, Test inter-class: 0.3389416824058929 -INFO:plot:Saving t-SNE to file: figures/tsne.png -INFO:plot:Saving t-SNE to file: figures/tsne.png -INFO:plot:Saving tree to file: figures/tree-0.pdf -INFO:plot:Saving tree to file: figures/tree-1.pdf +INFO:gp: fitness size + ----------------------------------------------------------------- ----------------------------------------------- +gen nevals avg gen max min nevals std avg gen max min nevals std +0 1023 0.499647 0 0.582622 0.41918 1023 0.0163417 3.53275 0 7 2 1023 1.35153 +INFO:gp:1 765 0.512642 1 0.582622 0.434432 765 0.0191528 3.73216 1 13 2 765 1.47067 +INFO:gp:2 772 0.522667 2 0.589708 0.438966 772 0.0234648 3.82698 2 15 1 772 1.72168 +INFO:gp:3 745 0.533985 3 0.589708 0.437525 745 0.0267792 3.88954 3 14 2 745 1.9864 +INFO:gp:4 764 0.54139 4 0.589708 0.422879 764 0.030371 3.74194 4 14 2 764 2.15705 +INFO:gp:5 766 0.54872 5 0.590745 0.452665 766 0.0317252 3.38612 5 13 2 766 1.71032 +INFO:gp:6 789 0.550586 6 0.595958 0.430755 789 0.0339392 3.36168 6 13 2 789 1.69761 +INFO:gp:7 794 0.554186 7 0.595958 0.461672 794 0.0335533 3.52981 7 13 2 794 1.67345 diff --git a/code/transformer/util.py b/code/transformer/util.py index ba7ea57d..fa9b576b 100644 --- a/code/transformer/util.py +++ b/code/transformer/util.py @@ -145,7 +145,6 @@ def filter_dataset( if dataset == "instance-recognition": data = data[~data.iloc[:, 0].astype(str).str.contains('QC|HM|MO|fillet|frames|gonads|livers|skins|guts|frame|heads', case=False, na=False)] - print(f"Third: len(data): {len(data)}") return data def one_hot_encoded_labels(dataset, data): @@ -211,7 +210,6 @@ def one_hot_encoded_labels(dataset, data): else None))) elif dataset == "instance-recognition": X = data.iloc[:, 1:].to_numpy() - print(f"First: len(X): {len(X)}") # Take only the class label column. y = data.iloc[:, 0].to_numpy() features = list() @@ -223,7 +221,6 @@ def one_hot_encoded_labels(dataset, data): label = int(y[a_idx] == y[b_idx]) features.append(concatenated) labels.append(label) - print(f"Second: len(features): {len(features)}") X,y = np.array(features), np.array(labels) y = np.eye(2)[y] return X,y @@ -342,7 +339,6 @@ def preprocess_dataset( y = one_hot_encoded_labels(dataset=dataset, data=data) X = data.drop('m/z', axis=1) X,y = remove_instances_with_none_labels(X,y) - print(f"batch_size: {batch_size}") train_loader, val_loader, train_steps, val_steps = train_test_split_to_data_loader( X, y,