# normalisation and results for wilcoxon test import numpy as np import pandas as pd from numpy import load from numpy import save import os from scipy.stats import wilcoxon # folder where expression activation per frame of each pair is stored expressions_folder = '/home/valapil/Project/ForkCausal_Adithya/test/' # EXP = ["CASE", "Happiness", "Surprise", "Disgust", "Fear", "Sadness", "Anger"] EXP = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"] np_files = [file for file in os.listdir(expressions_folder)] expr_sum = None frames = {} # get frames for file in np_files: file_path = os.path.join(expressions_folder, file) data = np.load(file_path) base_name, extension = os.path.splitext(file) new_name = base_name.split('.')[0] frames[new_name] = data.shape[0] # normalisation 1 for file in np_files: file_path = os.path.join(expressions_folder, file) data = np.load(file_path) data = data[1:, :].astype(np.float64) base_name, extension = os.path.splitext(file) new_name = base_name.split('.')[0] frame = min(frames[new_name[:-3] + 'S' + new_name[-2:]], frames[new_name[:-3] + 'E' + new_name[-2:]]) expr_sum = np.sum(data, axis=0) norm_frames = np.divide(100 * expr_sum, frame) total = np.hstack((new_name[:-3], new_name[-3], new_name[-1], norm_frames)) EXP = np.row_stack((EXP, total)) # normalisation 2 df = pd.DataFrame(EXP[1:, :]) df.columns = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"] num_col = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"] df[num_col] = df[num_col].apply(pd.to_numeric, errors='coerce') # aumax1 = df.groupby(['pair', 'role']).sum() aumax2 = df.groupby(['pair', 'role'])[num_col].transform('sum') result = EXP[1:,:].copy() result[:, 3:] = df[num_col] / aumax2 # need to group by case s1 = np.sum(EXP[1:, 3:].astype(np.float64), axis=0) s2 = np.multiply((np.divide(s1, sum(s1))), 100) output = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"] # output = np.row_stack((output , s2)) print(output, s2) benjhoch = [] testresult = {} intersect = [] # Wilcoxon test # for expr in range(0, output.shape[1]): for expr, value in enumerate(output): for cond in [['g', 'a'], ['a', 'o'], ['g', 'o']]: index = np.where(result[:, 2] == cond[0])[0] x = result[index, 3+expr].astype(np.float64) index = np.where(result[:, 2] == cond[1])[0] y = result[index, 3+expr].astype(np.float64) res = wilcoxon(x=x, y=y, zero_method='pratt') benjhoch += [[value, res[1], cond[0], cond[1]]] testresult[value] = [cond, res] Q = 0.3 benjhoch = np.array(benjhoch,dtype=object) benjhoch = np.column_stack((benjhoch[benjhoch[:, 1].argsort()], np.arange(1, len(benjhoch) + 1))) print('Benjamini Hochberg Table: \n', benjhoch) m = len(output) * 3 # Find maximum rank where actual p-value is smaller # than hochberg-benjamini p-value maxi = 0 for i, val in enumerate(benjhoch): if float(val[1]) < (int(val[4]) / m) * Q: maxi = i # Check if test results is significant when using a Benjamini Hochberg # p-value corretion print(maxi + 1, 'significantly changing expression(s): \n') sigexpr = [] for val in benjhoch[:(maxi + 1)]: au = val[0] sigexpr += [au] index = np.where(result[:, 2] == [val[2]])[0] x = result[index, 3 + output.index(au)].astype(np.float64) index = np.where(result[:, 2] == [val[3]])[0] y = result[index, 3 + output.index(au)].astype(np.float64) res = wilcoxon(x=x, y=y, zero_method='pratt') print('Expression: ', val[0], 'significantly changes between ' 'conditions', val[2], 'and', val[3]) print('Average expression activation \n', val[2], ':', np.nanmean(x), val[3], ':', np.nanmean(y), '\n') # df.to_csv('/home/valapil/Project/ForkCausal_Adithya/analysis.csv', index=False)