Tutorials: Chromosome Nuclear Landmarks Optimization

Import the necessary packages

[1]:
import sys
import numpy as np
import sklearn.cluster as sk
import matplotlib.pyplot as plt
import copy
import subprocess
import scipy.stats
import warnings
import MDAnalysis as mda
from openNucleome.utils import DamID_TSASeq_calculation

Compute the simulated DamID and TSASeq

In the next step, we calculated the DamID and TSASeq with the function “openNucleome.utils.DamID_TSASeq_calculation” and the trajectory “OpenNucleome/tutorials/HFF_100KB/SphericalNucleus /LangevinDynamics/reduced_traj.dcd”, which contains 50 frames totally

[2]:
### DamID_TSASeq_calculation(traj_data, gLength, maternalIdx, paternalIdx, start_frame, end_frame)

# gLengthFile.txt: The difference between each two neighboring values in the file represents the length of the chromosome
# maternalIdxFile.txt: Index of each maternal chromosome
# paternalIdxFile.txt: Index of each paternal chromosome

damid_simulated, tsaseq_simulated, n_clusters = DamID_TSASeq_calculation("reduced_traj.dcd", "mol_info/gLengthFile.txt", "mol_info/maternalIdxFile.txt",
                                                                         "mol_info/paternalIdxFile.txt", 0, 50)
[3]:
# Defined some important variables
run_number                  = 1

N_chr_beads                 = 60642    #Number of chromosome particles
N_nucleolus_particles       = 300      #The number of nucleolus particles
N_speckles_particles        = 1600     #The number of speckle particles
N_lamina_particles          = 8000     #The number of lamina particles
radius_nucleus              = 13.0     #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm

"""Info files"""
gLength             = np.loadtxt("mol_info/gLengthFile.txt",dtype=int)      #The difference between each two neighboring values
                                                                            #in the file represents the length of the chromosome
maternalIdx         = np.loadtxt("mol_info/maternalIdxFile.txt",dtype=int)  #Index of each maternal chromosome
paternalIdx         = np.loadtxt("mol_info/paternalIdxFile.txt",dtype=int)  #Index of each paternal chromosome

Load the experimental DamID and TSASeq

[4]:
gw_lamina           = np.mean(damid_simulated)
gw_speckles         = np.mean(tsaseq_simulated)

damid_data_low_res  = np.loadtxt("DamID-OE.txt", usecols=[1])
tsa_data_low_res    = np.loadtxt("TSA-Seq-OE.txt", usecols=[1])

damid_data_low_res_haploid  = np.zeros(30321) #Haploid results
tsa_data_low_res_haploid    = np.zeros(30321) #Haploid results
for i in range(23):
    damid_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(damid_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
                                                       damid_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
                                                       )
    tsa_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(tsa_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
                                                       tsa_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
                                                       )

expt_dam            = damid_data_low_res_haploid*gw_lamina
expt_tsa            = tsa_data_low_res_haploid*gw_speckles

update_chr_list     = np.array([i for i in range(1,23)])
update_chr_list    -= 1

Adam optimization

[5]:
##DamID part

m_dw_dam                    = np.loadtxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number-1))
v_dw_dam                    = np.loadtxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number-1))
m_db_dam                    = np.loadtxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number-1))
v_db_dam                    = np.loadtxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number-1))
beta1_dam                   = 0.9
beta2_dam                   = 0.999
epsilon_dam                 = 1e-8
eta_dam                     = 0.01
t_dam                       = int(np.loadtxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number-1)))

grad_dam        = -damid_simulated + expt_dam
[6]:
# START TO DO THE ADAM OPTIMIZATION

# momentum beta 1
# *** weights *** #
m_dw_dam        = beta1_dam*m_dw_dam + (1-beta1_dam)*grad_dam
# *** biases *** #
m_db_dam        = beta1_dam*m_db_dam + (1-beta1_dam)*grad_dam
# rms beta 2
# *** weights *** #
v_dw_dam        = beta2_dam*v_dw_dam + (1-beta2_dam)*(grad_dam**2)
# *** biases *** #
v_db_dam        = beta2_dam*v_db_dam + (1-beta2_dam)*grad_dam

# We save the updated parameters in Adam for the next iteration

subprocess.call(["mkdir -p adam_chr_NL_param/%02d"%(run_number)],shell=True,stdout=subprocess.PIPE)
np.savetxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number), m_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number), v_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number), m_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number), v_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number), np.array([t_dam+1]).reshape((-1,1)), fmt='%d')

## bias correction
m_dw_corr_dam   = m_dw_dam/(1-beta1_dam**t_dam)
m_db_corr_dam   = m_db_dam/(1-beta1_dam**t_dam)
v_dw_corr_dam   = v_dw_dam/(1-beta2_dam**t_dam)
v_db_corr_dam   = v_db_dam/(1-beta2_dam**t_dam)

dalpha_dam      = m_dw_corr_dam/(np.sqrt(v_dw_corr_dam)+epsilon_dam)
[7]:
# Load the old parameters and update them

damid = np.loadtxt("potential/%02d/chr_lam_param.txt"%(run_number-1))[:60642]

for i in update_chr_list:
    damid[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]
    damid[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]
[8]:
# TSASeq part, very similar to the above

m_dw_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number-1))
v_dw_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number-1))
m_db_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number-1))
v_db_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number-1))
beta1_tsa                   = 0.9
beta2_tsa                   = 0.999
epsilon_tsa                 = 1e-8
eta_tsa                     = 0.01
t_tsa                       = int(np.loadtxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number-1)))

grad_tsa        = -tsaseq_simulated + expt_tsa
[9]:
# START TO DO THE ADAM TRAINING
# momentum beta 1
# *** weights *** #
m_dw_tsa        = beta1_tsa*m_dw_tsa + (1-beta1_tsa)*grad_tsa
# *** biases *** #
m_db_tsa        = beta1_tsa*m_db_tsa + (1-beta1_tsa)*grad_tsa
# rms beta 2
# *** weights *** #
v_dw_tsa        = beta2_tsa*v_dw_tsa + (1-beta2_tsa)*(grad_tsa**2)
# *** biases *** #
v_db_tsa        = beta2_tsa*v_db_tsa + (1-beta2_tsa)*grad_tsa

np.savetxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number), m_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number), v_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number), m_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number), v_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number), np.array([t_tsa+1]).reshape((-1,1)), fmt='%d')

## bias correction
m_dw_corr_tsa   = m_dw_tsa/(1-beta1_tsa**t_tsa)
m_db_corr_tsa   = m_db_tsa/(1-beta1_tsa**t_tsa)
v_dw_corr_tsa   = v_dw_tsa/(1-beta2_tsa**t_tsa)
v_db_corr_tsa   = v_db_tsa/(1-beta2_tsa**t_tsa)

dalpha_tsa     = m_dw_corr_tsa/(np.sqrt(v_dw_corr_tsa)+epsilon_tsa)
[10]:
# Load the old parameters and update them

tsaseq = np.loadtxt("potential/%02d/chr_spec_param.txt"%(run_number-1))[:60642]

for i in update_chr_list:
    tsaseq[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]
    tsaseq[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]
[11]:
#Added portion to overide the parameters to 0.0 if no expt signal on segment

zero_signal_damid   = (damid_data_low_res[:]    == 0.0)
zero_signal_tsa     = (tsa_data_low_res[:]      == 0.0)
damid[zero_signal_damid]  = 0.0
tsaseq[zero_signal_tsa]    = 0.0

Save the new parameters

[12]:
subprocess.call(["mkdir -p potential/%02d/"%(run_number)],shell=True,stdout=subprocess.PIPE)

np.savetxt("potential/%02d/chr_spec_param.txt"%(run_number), np.append(tsaseq,[0]*9900).reshape((-1,1)), fmt='%.6f')
np.savetxt("potential/%02d/chr_lam_param.txt"%(run_number), np.append(damid,[0]*9900).reshape((-1,1)), fmt='%.6f')