05_wilcoxon.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # normalisation and results for wilcoxon test
  2. import numpy as np
  3. import pandas as pd
  4. from numpy import load
  5. from numpy import save
  6. import os
  7. from scipy.stats import wilcoxon
  8. # folder where expression activation per frame of each pair is stored
  9. expressions_folder = '/home/valapil/Project/ForkCausal_Adithya/test/'
  10. # EXP = ["CASE", "Happiness", "Surprise", "Disgust", "Fear", "Sadness", "Anger"]
  11. EXP = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper",
  12. "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
  13. np_files = [file for file in os.listdir(expressions_folder)]
  14. expr_sum = None
  15. frames = {}
  16. # get frames
  17. for file in np_files:
  18. file_path = os.path.join(expressions_folder, file)
  19. data = np.load(file_path)
  20. base_name, extension = os.path.splitext(file)
  21. new_name = base_name.split('.')[0]
  22. frames[new_name] = data.shape[0]
  23. # normalisation 1
  24. for file in np_files:
  25. file_path = os.path.join(expressions_folder, file)
  26. data = np.load(file_path)
  27. data = data[1:, :].astype(np.float64)
  28. base_name, extension = os.path.splitext(file)
  29. new_name = base_name.split('.')[0]
  30. frame = min(frames[new_name[:-3] + 'S' + new_name[-2:]], frames[new_name[:-3] + 'E' + new_name[-2:]])
  31. expr_sum = np.sum(data, axis=0)
  32. norm_frames = np.divide(100 * expr_sum, frame)
  33. total = np.hstack((new_name[:-3], new_name[-3], new_name[-1], norm_frames))
  34. EXP = np.row_stack((EXP, total))
  35. # normalisation 2
  36. df = pd.DataFrame(EXP[1:, :])
  37. df.columns = ["pair", "role", "case", "Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower",
  38. "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
  39. num_col = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower",
  40. "Disgust lower", "Fear upper", "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
  41. df[num_col] = df[num_col].apply(pd.to_numeric, errors='coerce')
  42. # aumax1 = df.groupby(['pair', 'role']).sum()
  43. aumax2 = df.groupby(['pair', 'role'])[num_col].transform('sum')
  44. result = EXP[1:,:].copy()
  45. result[:, 3:] = df[num_col] / aumax2
  46. # need to group by case
  47. s1 = np.sum(EXP[1:, 3:].astype(np.float64), axis=0)
  48. s2 = np.multiply((np.divide(s1, sum(s1))), 100)
  49. output = ["Happiness upper", "Happiness lower", "Surprise upper", "Surprise lower", "Disgust lower", "Fear upper",
  50. "Fear lower", "Sadness upper", "Sadness lower", "Anger upper", "Anger lower"]
  51. # output = np.row_stack((output , s2))
  52. print(output, s2)
  53. benjhoch = []
  54. testresult = {}
  55. intersect = []
  56. # Wilcoxon test
  57. # for expr in range(0, output.shape[1]):
  58. for expr, value in enumerate(output):
  59. for cond in [['g', 'a'], ['a', 'o'], ['g', 'o']]:
  60. index = np.where(result[:, 2] == cond[0])[0]
  61. x = result[index, 3+expr].astype(np.float64)
  62. index = np.where(result[:, 2] == cond[1])[0]
  63. y = result[index, 3+expr].astype(np.float64)
  64. res = wilcoxon(x=x, y=y, zero_method='pratt')
  65. benjhoch += [[value, res[1], cond[0], cond[1]]]
  66. testresult[value] = [cond, res]
  67. Q = 0.3
  68. benjhoch = np.array(benjhoch,dtype=object)
  69. benjhoch = np.column_stack((benjhoch[benjhoch[:, 1].argsort()],
  70. np.arange(1, len(benjhoch) + 1)))
  71. print('Benjamini Hochberg Table: \n', benjhoch)
  72. m = len(output) * 3
  73. # Find maximum rank where actual p-value is smaller
  74. # than hochberg-benjamini p-value
  75. maxi = 0
  76. for i, val in enumerate(benjhoch):
  77. if float(val[1]) < (int(val[4]) / m) * Q:
  78. maxi = i
  79. # Check if test results is significant when using a Benjamini Hochberg
  80. # p-value corretion
  81. print(maxi + 1, 'significantly changing expression(s): \n')
  82. sigexpr = []
  83. for val in benjhoch[:(maxi + 1)]:
  84. au = val[0]
  85. sigexpr += [au]
  86. index = np.where(result[:, 2] == [val[2]])[0]
  87. x = result[index, 3 + output.index(au)].astype(np.float64)
  88. index = np.where(result[:, 2] == [val[3]])[0]
  89. y = result[index, 3 + output.index(au)].astype(np.float64)
  90. res = wilcoxon(x=x, y=y, zero_method='pratt')
  91. print('Expression: ', val[0], 'significantly changes between '
  92. 'conditions', val[2], 'and', val[3])
  93. print('Average expression activation \n',
  94. val[2], ':', np.nanmean(x), val[3], ':', np.nanmean(y), '\n')
  95. # df.to_csv('/home/valapil/Project/ForkCausal_Adithya/analysis.csv', index=False)