| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- # normalisation and results for wilcoxon test
- import numpy as np
- import pandas as pd
- from numpy import load
- from numpy import save
- import os
- from scipy.stats import wilcoxon
- # folder where expression activation per frame of each pair is stored
- expressions_folder = '/home/valapil/Project/ForkCausal_Adithya/test/'
- # EXP = ["CASE", "Happiness", "Surprise", "Disgust", "Fear", "Sadness", "Anger"]
- EXP = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper",
- "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
- np_files = [file for file in os.listdir(expressions_folder)]
- expr_sum = None
- frames = {}
- # get frames
- for file in np_files:
- file_path = os.path.join(expressions_folder, file)
- data = np.load(file_path)
- base_name, extension = os.path.splitext(file)
- new_name = base_name.split('.')[0]
- frames[new_name] = data.shape[0]
- # normalisation 1
- for file in np_files:
- file_path = os.path.join(expressions_folder, file)
- data = np.load(file_path)
- data = data[1:, :].astype(np.float64)
- base_name, extension = os.path.splitext(file)
- new_name = base_name.split('.')[0]
- frame = min(frames[new_name[:-3] + 'S' + new_name[-2:]], frames[new_name[:-3] + 'E' + new_name[-2:]])
- expr_sum = np.sum(data, axis=0)
- norm_frames = np.divide(100 * expr_sum, frame)
- total = np.hstack((new_name[:-3], new_name[-3], new_name[-1], norm_frames))
- EXP = np.row_stack((EXP, total))
- # normalisation 2
- df = pd.DataFrame(EXP[1:, :])
- df.columns = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower",
- "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
- num_col = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower",
- "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
- df[num_col] = df[num_col].apply(pd.to_numeric, errors='coerce')
- # aumax1 = df.groupby(['pair', 'role']).sum()
- aumax2 = df.groupby(['pair', 'role'])[num_col].transform('sum')
- result = EXP[1:,:].copy()
- result[:, 3:] = df[num_col] / aumax2
- # need to group by case
- s1 = np.sum(EXP[1:, 3:].astype(np.float64), axis=0)
- s2 = np.multiply((np.divide(s1, sum(s1))), 100)
- output = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper",
- "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
- # output = np.row_stack((output , s2))
- print(output, s2)
- benjhoch = []
- testresult = {}
- intersect = []
- # Wilcoxon test
- # for expr in range(0, output.shape[1]):
- for expr, value in enumerate(output):
- for cond in [['g', 'a'], ['a', 'o'], ['g', 'o']]:
- index = np.where(result[:, 2] == cond[0])[0]
- x = result[index, 3+expr].astype(np.float64)
- index = np.where(result[:, 2] == cond[1])[0]
- y = result[index, 3+expr].astype(np.float64)
- res = wilcoxon(x=x, y=y, zero_method='pratt')
- benjhoch += [[value, res[1], cond[0], cond[1]]]
- testresult[value] = [cond, res]
- Q = 0.3
- benjhoch = np.array(benjhoch,dtype=object)
- benjhoch = np.column_stack((benjhoch[benjhoch[:, 1].argsort()],
- np.arange(1, len(benjhoch) + 1)))
- print('Benjamini Hochberg Table: \n', benjhoch)
- m = len(output) * 3
- # Find maximum rank where actual p-value is smaller
- # than hochberg-benjamini p-value
- maxi = 0
- for i, val in enumerate(benjhoch):
- if float(val[1]) < (int(val[4]) / m) * Q:
- maxi = i
- # Check if test results is significant when using a Benjamini Hochberg
- # p-value corretion
- print(maxi + 1, 'significantly changing expression(s): \n')
- sigexpr = []
- for val in benjhoch[:(maxi + 1)]:
- au = val[0]
- sigexpr += [au]
- index = np.where(result[:, 2] == [val[2]])[0]
- x = result[index, 3 + output.index(au)].astype(np.float64)
- index = np.where(result[:, 2] == [val[3]])[0]
- y = result[index, 3 + output.index(au)].astype(np.float64)
- res = wilcoxon(x=x, y=y, zero_method='pratt')
- print('Expression: ', val[0], 'significantly changes between '
- 'conditions', val[2], 'and', val[3])
- print('Average expression activation \n',
- val[2], ':', np.nanmean(x), val[3], ':', np.nanmean(y), '\n')
- # df.to_csv('/home/valapil/Project/ForkCausal_Adithya/analysis.csv', index=False)
|