-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path3-zScoreOf-TFxTissue.py
40 lines (36 loc) · 1.58 KB
/
3-zScoreOf-TFxTissue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
tfFPKMInTissuesPath = "../results/tfFPKMinTissues.tsv"
zScoreOfTFxTissuePath = "../results/zScoreOfTFxTissue.tsv"
tissueNames = ["adipose_tissue", "adrenal_gland", "brain", "breast", "colon",
"heart", "kidney", "leukocyte", "liver", "lung", "lymph_node", "ovary",
"prostate", "skeletal_muscle", "testis", "thyriod"]
def getZScoreRows(row, newDFRows):
mean = row["mean"]
std = row["stDeviation"]
for tissueName in tissueNames:
newRow = dict()
newRow["tfName"] = row["tfName"]
newRow["tissue"] = tissueName
newRow["bindingSites"] = row["bindingSites"]
newRow["fpkmSum"] = row[tissueName]
newRow["mean"] = mean
newRow["stDeviation"] = std
if(std != 0.0):
newRow["zScore"] = (row[tissueName] - mean) / std
else:
newRow["zScore"] = 0.0
newDFRows.append(newRow)
def calcExpressionForEachTissue(tFactorDF):
newDFRows = []
tFactorDF.apply(lambda row: getZScoreRows(row, newDFRows), axis=1)
zScoreDf = pd.DataFrame(newDFRows, columns=['tfName', 'tissue', 'bindingSites',
'fpkmSum', 'mean', 'stDeviation',
'zScore'])
#zScoreDf.sort_values()
zScoreDf.sort_values(['zScore'], inplace=True, ascending=False)
return zScoreDf
fpkmDf = pd.read_csv(tfFPKMInTissuesPath, sep='\t')
zScoreDf = calcExpressionForEachTissue(fpkmDf)
zScoreDf.to_csv(zScoreOfTFxTissuePath, sep='\t', index=False)