P2DFlow / analysis /Ramachandran_plot.py
Holmes
test
ca7299e
import MDAnalysis as mda
import numpy as np
from MDAnalysis.analysis.dihedrals import Ramachandran
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
def ramachandran_eval(all_paths, pdb_file, output_dir):
angle_results_all = []
for dirpath in all_paths:
pdb_path = os.path.join(dirpath,pdb_file)
u = mda.Universe(pdb_path)
protein = u.select_atoms('protein')
# print('There are {} residues in the protein'.format(len(protein.residues)))
ramachandran = Ramachandran(protein)
ramachandran.run()
angle_results = ramachandran.results.angles
# print(angle_results.shape)
# ramachandran.plot(color='black', marker='.')
angle_results_all.append(angle_results.reshape([-1,2]))
# df = pd.DataFrame(angle_results.reshape([-1,2]))
# df.to_csv(os.path.join(output_dir, os.path.basename(dirpath)+'_'+pdb_file.split('.')[0]+'.csv'), index=False)
points1 = angle_results_all[0]
grid_size = 360 # 网格的大小
x_bins = np.linspace(-180, 180, grid_size)
y_bins = np.linspace(-180, 180, grid_size)
result_tmp={}
for idx in range(len(angle_results_all[1:])):
idx = idx + 1
points2 = angle_results_all[idx]
# 使用2D直方图统计每组点在网格上的分布
hist1, _, _ = np.histogram2d(points1[:, 0], points1[:, 1], bins=[x_bins, y_bins])
hist2, _, _ = np.histogram2d(points2[:, 0], points2[:, 1], bins=[x_bins, y_bins])
# 将直方图转换为布尔值,表示某个网格是否有点落入
mask1 = hist1 > 0
mask2 = hist2 > 0
intersection = np.logical_and(mask1, mask2).sum()
all_mask2 = mask2.sum()
val_ratio = intersection / all_mask2
print(os.path.basename(all_paths[idx]), "val_ratio:", val_ratio)
result_tmp[os.path.basename(all_paths[idx])] = val_ratio
result_tmp['file'] = pdb_file
return result_tmp
if __name__ == "__main__":
key1 = 'P2DFlow_epoch19'
all_paths = [
"/cluster/home/shiqian/frame-flow-test1/valid/evaluate/ATLAS_valid",
# "/cluster/home/shiqian/frame-flow-test1/valid/evaluate/esm_n_pred",
"/cluster/home/shiqian/frame-flow-test1/valid/evaluate/alphaflow_pred",
"/cluster/home/shiqian/frame-flow-test1/valid/evaluate/Str2Str_pred",
f'/cluster/home/shiqian/frame-flow-test1/valid/evaluate/{key1}',
]
output_dir = '/cluster/home/shiqian/frame-flow-test1/valid/evaluate/Ramachandran'
os.makedirs(output_dir, exist_ok=True)
results={
'file':[],
# 'esm_n_pred':[],
'alphaflow_pred':[],
'Str2Str_pred':[],
key1:[],
}
for file in os.listdir(all_paths[0]):
if re.search('\.pdb',file):
pdb_file = file
print(file)
result_tmp = ramachandran_eval(
all_paths=all_paths,
pdb_file=pdb_file,
output_dir=output_dir
)
for key in results.keys():
results[key].append(result_tmp[key])
out_total_df = pd.DataFrame(results)
out_total_df.to_csv(os.path.join(output_dir,f'Ramachandran_plot_validity_{key1}.csv'),index=False)