Tutorials: Chromosome Nuclear Landmarks Optimization¶
Import the necessary packages¶
[1]:
import sys
import numpy as np
import sklearn.cluster as sk
import matplotlib.pyplot as plt
import copy
import subprocess
import scipy.stats
import warnings
import MDAnalysis as mda
from openNucleome.utils import DamID_TSASeq_calculation
Compute the simulated DamID and TSASeq¶
In the next step, we calculated the DamID and TSASeq with the function “openNucleome.utils.DamID_TSASeq_calculation” and the trajectory “OpenNucleome/tutorials/HFF_100KB/SphericalNucleus /LangevinDynamics/reduced_traj.dcd”, which contains 50 frames totally
[2]:
### DamID_TSASeq_calculation(traj_data, gLength, maternalIdx, paternalIdx, start_frame, end_frame)
# gLengthFile.txt: The difference between each two neighboring values in the file represents the length of the chromosome
# maternalIdxFile.txt: Index of each maternal chromosome
# paternalIdxFile.txt: Index of each paternal chromosome
damid_simulated, tsaseq_simulated, n_clusters = DamID_TSASeq_calculation("reduced_traj.dcd", "mol_info/gLengthFile.txt", "mol_info/maternalIdxFile.txt",
"mol_info/paternalIdxFile.txt", 0, 50)
[3]:
# Defined some important variables
run_number = 1
N_chr_beads = 60642 #Number of chromosome particles
N_nucleolus_particles = 300 #The number of nucleolus particles
N_speckles_particles = 1600 #The number of speckle particles
N_lamina_particles = 8000 #The number of lamina particles
radius_nucleus = 13.0 #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm
"""Info files"""
gLength = np.loadtxt("mol_info/gLengthFile.txt",dtype=int) #The difference between each two neighboring values
#in the file represents the length of the chromosome
maternalIdx = np.loadtxt("mol_info/maternalIdxFile.txt",dtype=int) #Index of each maternal chromosome
paternalIdx = np.loadtxt("mol_info/paternalIdxFile.txt",dtype=int) #Index of each paternal chromosome
Load the experimental DamID and TSASeq¶
[4]:
gw_lamina = np.mean(damid_simulated)
gw_speckles = np.mean(tsaseq_simulated)
damid_data_low_res = np.loadtxt("DamID-OE.txt", usecols=[1])
tsa_data_low_res = np.loadtxt("TSA-Seq-OE.txt", usecols=[1])
damid_data_low_res_haploid = np.zeros(30321) #Haploid results
tsa_data_low_res_haploid = np.zeros(30321) #Haploid results
for i in range(23):
damid_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(damid_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
damid_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
)
tsa_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(tsa_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
tsa_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
)
expt_dam = damid_data_low_res_haploid*gw_lamina
expt_tsa = tsa_data_low_res_haploid*gw_speckles
update_chr_list = np.array([i for i in range(1,23)])
update_chr_list -= 1
Adam optimization¶
[5]:
##DamID part
m_dw_dam = np.loadtxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number-1))
v_dw_dam = np.loadtxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number-1))
m_db_dam = np.loadtxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number-1))
v_db_dam = np.loadtxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number-1))
beta1_dam = 0.9
beta2_dam = 0.999
epsilon_dam = 1e-8
eta_dam = 0.01
t_dam = int(np.loadtxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number-1)))
grad_dam = -damid_simulated + expt_dam
[6]:
# START TO DO THE ADAM OPTIMIZATION
# momentum beta 1
# *** weights *** #
m_dw_dam = beta1_dam*m_dw_dam + (1-beta1_dam)*grad_dam
# *** biases *** #
m_db_dam = beta1_dam*m_db_dam + (1-beta1_dam)*grad_dam
# rms beta 2
# *** weights *** #
v_dw_dam = beta2_dam*v_dw_dam + (1-beta2_dam)*(grad_dam**2)
# *** biases *** #
v_db_dam = beta2_dam*v_db_dam + (1-beta2_dam)*grad_dam
# We save the updated parameters in Adam for the next iteration
subprocess.call(["mkdir -p adam_chr_NL_param/%02d"%(run_number)],shell=True,stdout=subprocess.PIPE)
np.savetxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number), m_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number), v_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number), m_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number), v_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number), np.array([t_dam+1]).reshape((-1,1)), fmt='%d')
## bias correction
m_dw_corr_dam = m_dw_dam/(1-beta1_dam**t_dam)
m_db_corr_dam = m_db_dam/(1-beta1_dam**t_dam)
v_dw_corr_dam = v_dw_dam/(1-beta2_dam**t_dam)
v_db_corr_dam = v_db_dam/(1-beta2_dam**t_dam)
dalpha_dam = m_dw_corr_dam/(np.sqrt(v_dw_corr_dam)+epsilon_dam)
[7]:
# Load the old parameters and update them
damid = np.loadtxt("potential/%02d/chr_lam_param.txt"%(run_number-1))[:60642]
for i in update_chr_list:
damid[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]
damid[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]
[8]:
# TSASeq part, very similar to the above
m_dw_tsa = np.loadtxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number-1))
v_dw_tsa = np.loadtxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number-1))
m_db_tsa = np.loadtxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number-1))
v_db_tsa = np.loadtxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number-1))
beta1_tsa = 0.9
beta2_tsa = 0.999
epsilon_tsa = 1e-8
eta_tsa = 0.01
t_tsa = int(np.loadtxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number-1)))
grad_tsa = -tsaseq_simulated + expt_tsa
[9]:
# START TO DO THE ADAM TRAINING
# momentum beta 1
# *** weights *** #
m_dw_tsa = beta1_tsa*m_dw_tsa + (1-beta1_tsa)*grad_tsa
# *** biases *** #
m_db_tsa = beta1_tsa*m_db_tsa + (1-beta1_tsa)*grad_tsa
# rms beta 2
# *** weights *** #
v_dw_tsa = beta2_tsa*v_dw_tsa + (1-beta2_tsa)*(grad_tsa**2)
# *** biases *** #
v_db_tsa = beta2_tsa*v_db_tsa + (1-beta2_tsa)*grad_tsa
np.savetxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number), m_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number), v_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number), m_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number), v_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number), np.array([t_tsa+1]).reshape((-1,1)), fmt='%d')
## bias correction
m_dw_corr_tsa = m_dw_tsa/(1-beta1_tsa**t_tsa)
m_db_corr_tsa = m_db_tsa/(1-beta1_tsa**t_tsa)
v_dw_corr_tsa = v_dw_tsa/(1-beta2_tsa**t_tsa)
v_db_corr_tsa = v_db_tsa/(1-beta2_tsa**t_tsa)
dalpha_tsa = m_dw_corr_tsa/(np.sqrt(v_dw_corr_tsa)+epsilon_tsa)
[10]:
# Load the old parameters and update them
tsaseq = np.loadtxt("potential/%02d/chr_spec_param.txt"%(run_number-1))[:60642]
for i in update_chr_list:
tsaseq[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]
tsaseq[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]
[11]:
#Added portion to overide the parameters to 0.0 if no expt signal on segment
zero_signal_damid = (damid_data_low_res[:] == 0.0)
zero_signal_tsa = (tsa_data_low_res[:] == 0.0)
damid[zero_signal_damid] = 0.0
tsaseq[zero_signal_tsa] = 0.0
Save the new parameters¶
[12]:
subprocess.call(["mkdir -p potential/%02d/"%(run_number)],shell=True,stdout=subprocess.PIPE)
np.savetxt("potential/%02d/chr_spec_param.txt"%(run_number), np.append(tsaseq,[0]*9900).reshape((-1,1)), fmt='%.6f')
np.savetxt("potential/%02d/chr_lam_param.txt"%(run_number), np.append(damid,[0]*9900).reshape((-1,1)), fmt='%.6f')