{ "cells": [ { "cell_type": "markdown", "id": "8f70aab6-4bd7-4037-802b-58f060d35ec9", "metadata": {}, "source": [ "## Tutorials: Chromosome Nuclear Landmarks Optimization" ] }, { "cell_type": "markdown", "id": "3dada086-dc56-4af1-bc5a-b2c83bc82a9a", "metadata": {}, "source": [ "### Import the necessary packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "7fbf57dc-ce2f-4720-bc4e-75de146108d8", "metadata": {}, "outputs": [], "source": [ "import sys\n", "import numpy as np\n", "import sklearn.cluster as sk\n", "import matplotlib.pyplot as plt\n", "import copy\n", "import subprocess\n", "import scipy.stats\n", "import warnings\n", "import MDAnalysis as mda\n", "from openNucleome.utils import DamID_TSASeq_calculation" ] }, { "cell_type": "markdown", "id": "bc6ff132-cb12-4b4e-b2f7-e2ca9b421cc8", "metadata": {}, "source": [ "### Compute the simulated DamID and TSASeq\n", "\n", "In the next step, we calculated the DamID and TSASeq with the function \"openNucleome.utils.DamID_TSASeq_calculation\" and the trajectory \"OpenNucleome/tutorials/HFF_100KB/SphericalNucleus\n", "/LangevinDynamics/reduced_traj.dcd\", which contains 50 frames totally" ] }, { "cell_type": "code", "execution_count": 2, "id": "ed5cfd82-e410-435b-a60c-c13734a4133a", "metadata": {}, "outputs": [], "source": [ "### DamID_TSASeq_calculation(traj_data, gLength, maternalIdx, paternalIdx, start_frame, end_frame)\n", "\n", "# gLengthFile.txt: The difference between each two neighboring values in the file represents the length of the chromosome\n", "# maternalIdxFile.txt: Index of each maternal chromosome\n", "# paternalIdxFile.txt: Index of each paternal chromosome\n", "\n", "damid_simulated, tsaseq_simulated, n_clusters = DamID_TSASeq_calculation(\"reduced_traj.dcd\", \"mol_info/gLengthFile.txt\", \"mol_info/maternalIdxFile.txt\",\n", " \"mol_info/paternalIdxFile.txt\", 0, 50)" ] }, { "cell_type": "code", "execution_count": 3, "id": "dfbe437b-c19c-43bc-b8a3-8dc6ee31c486", "metadata": {}, "outputs": [], "source": [ "# Defined some important variables\n", "run_number = 1\n", "\n", "N_chr_beads = 60642 #Number of chromosome particles \n", "N_nucleolus_particles = 300 #The number of nucleolus particles\n", "N_speckles_particles = 1600 #The number of speckle particles\n", "N_lamina_particles = 8000 #The number of lamina particles\n", "radius_nucleus = 13.0 #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm\n", "\n", "\"\"\"Info files\"\"\"\n", "gLength = np.loadtxt(\"mol_info/gLengthFile.txt\",dtype=int) #The difference between each two neighboring values\n", " #in the file represents the length of the chromosome\n", "maternalIdx = np.loadtxt(\"mol_info/maternalIdxFile.txt\",dtype=int) #Index of each maternal chromosome\n", "paternalIdx = np.loadtxt(\"mol_info/paternalIdxFile.txt\",dtype=int) #Index of each paternal chromosome" ] }, { "cell_type": "markdown", "id": "3f081d14-cd83-4550-8017-537ab2b0e727", "metadata": {}, "source": [ "### Load the experimental DamID and TSASeq" ] }, { "cell_type": "code", "execution_count": 4, "id": "8eef636c-19e5-4662-91b0-fcd804cf8db0", "metadata": {}, "outputs": [], "source": [ "gw_lamina = np.mean(damid_simulated)\n", "gw_speckles = np.mean(tsaseq_simulated)\n", "\n", "damid_data_low_res = np.loadtxt(\"DamID-OE.txt\", usecols=[1])\n", "tsa_data_low_res = np.loadtxt(\"TSA-Seq-OE.txt\", usecols=[1])\n", "\n", "damid_data_low_res_haploid = np.zeros(30321) #Haploid results\n", "tsa_data_low_res_haploid = np.zeros(30321) #Haploid results\n", "for i in range(23):\n", " damid_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(damid_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +\n", " damid_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]\n", " )\n", " tsa_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(tsa_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +\n", " tsa_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]\n", " )\n", " \n", "expt_dam = damid_data_low_res_haploid*gw_lamina\n", "expt_tsa = tsa_data_low_res_haploid*gw_speckles\n", "\n", "update_chr_list = np.array([i for i in range(1,23)])\n", "update_chr_list -= 1" ] }, { "cell_type": "markdown", "id": "0a2e1fdc-ebb9-44a1-8cdc-27f9df78face", "metadata": {}, "source": [ "### Adam optimization" ] }, { "cell_type": "code", "execution_count": 5, "id": "2b4ff4fb-83ef-46c5-90cf-4166876bb5ca", "metadata": {}, "outputs": [], "source": [ "##DamID part\n", "\n", "m_dw_dam = np.loadtxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number-1))\n", "v_dw_dam = np.loadtxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number-1))\n", "m_db_dam = np.loadtxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number-1))\n", "v_db_dam = np.loadtxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number-1))\n", "beta1_dam = 0.9\n", "beta2_dam = 0.999\n", "epsilon_dam = 1e-8\n", "eta_dam = 0.01\n", "t_dam = int(np.loadtxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number-1)))\n", "\n", "grad_dam = -damid_simulated + expt_dam" ] }, { "cell_type": "code", "execution_count": 6, "id": "ac4c5326-d70c-4f4b-9f46-036263f5c363", "metadata": {}, "outputs": [], "source": [ "# START TO DO THE ADAM OPTIMIZATION\n", "\n", "# momentum beta 1\n", "# *** weights *** #\n", "m_dw_dam = beta1_dam*m_dw_dam + (1-beta1_dam)*grad_dam\n", "# *** biases *** #\n", "m_db_dam = beta1_dam*m_db_dam + (1-beta1_dam)*grad_dam\n", "# rms beta 2\n", "# *** weights *** #\n", "v_dw_dam = beta2_dam*v_dw_dam + (1-beta2_dam)*(grad_dam**2)\n", "# *** biases *** #\n", "v_db_dam = beta2_dam*v_db_dam + (1-beta2_dam)*grad_dam\n", "\n", "# We save the updated parameters in Adam for the next iteration\n", "\n", "subprocess.call([\"mkdir -p adam_chr_NL_param/%02d\"%(run_number)],shell=True,stdout=subprocess.PIPE)\n", "np.savetxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number), m_dw_dam.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number), v_dw_dam.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number), m_db_dam.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number), v_db_dam.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number), np.array([t_dam+1]).reshape((-1,1)), fmt='%d')\n", "\n", "## bias correction\n", "m_dw_corr_dam = m_dw_dam/(1-beta1_dam**t_dam)\n", "m_db_corr_dam = m_db_dam/(1-beta1_dam**t_dam)\n", "v_dw_corr_dam = v_dw_dam/(1-beta2_dam**t_dam)\n", "v_db_corr_dam = v_db_dam/(1-beta2_dam**t_dam)\n", "\n", "dalpha_dam = m_dw_corr_dam/(np.sqrt(v_dw_corr_dam)+epsilon_dam)" ] }, { "cell_type": "code", "execution_count": 7, "id": "663e0f3d-1133-42b8-ad54-547f74f7532e", "metadata": {}, "outputs": [], "source": [ "# Load the old parameters and update them\n", "\n", "damid = np.loadtxt(\"potential/%02d/chr_lam_param.txt\"%(run_number-1))[:60642]\n", "\n", "for i in update_chr_list:\n", " damid[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]\n", " damid[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]" ] }, { "cell_type": "code", "execution_count": 8, "id": "0822f848-e744-4a17-b718-95db239146bd", "metadata": {}, "outputs": [], "source": [ "# TSASeq part, very similar to the above\n", "\n", "m_dw_tsa = np.loadtxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number-1))\n", "v_dw_tsa = np.loadtxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number-1))\n", "m_db_tsa = np.loadtxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number-1))\n", "v_db_tsa = np.loadtxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number-1))\n", "beta1_tsa = 0.9\n", "beta2_tsa = 0.999\n", "epsilon_tsa = 1e-8\n", "eta_tsa = 0.01\n", "t_tsa = int(np.loadtxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number-1)))\n", "\n", "grad_tsa = -tsaseq_simulated + expt_tsa" ] }, { "cell_type": "code", "execution_count": 9, "id": "95718cb1-5a40-4690-ae5d-9fa3d25c560d", "metadata": {}, "outputs": [], "source": [ "# START TO DO THE ADAM TRAINING\n", "# momentum beta 1\n", "# *** weights *** #\n", "m_dw_tsa = beta1_tsa*m_dw_tsa + (1-beta1_tsa)*grad_tsa\n", "# *** biases *** #\n", "m_db_tsa = beta1_tsa*m_db_tsa + (1-beta1_tsa)*grad_tsa\n", "# rms beta 2\n", "# *** weights *** #\n", "v_dw_tsa = beta2_tsa*v_dw_tsa + (1-beta2_tsa)*(grad_tsa**2)\n", "# *** biases *** #\n", "v_db_tsa = beta2_tsa*v_db_tsa + (1-beta2_tsa)*grad_tsa\n", "\n", "np.savetxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number), m_dw_tsa.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number), v_dw_tsa.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number), m_db_tsa.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number), v_db_tsa.reshape((-1,1)), fmt='%15.12e')\n", "np.savetxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number), np.array([t_tsa+1]).reshape((-1,1)), fmt='%d')\n", "\n", "## bias correction\n", "m_dw_corr_tsa = m_dw_tsa/(1-beta1_tsa**t_tsa)\n", "m_db_corr_tsa = m_db_tsa/(1-beta1_tsa**t_tsa)\n", "v_dw_corr_tsa = v_dw_tsa/(1-beta2_tsa**t_tsa)\n", "v_db_corr_tsa = v_db_tsa/(1-beta2_tsa**t_tsa)\n", "\n", "dalpha_tsa = m_dw_corr_tsa/(np.sqrt(v_dw_corr_tsa)+epsilon_tsa)" ] }, { "cell_type": "code", "execution_count": 10, "id": "ddddfdea-2a69-4a92-ab42-d6bf44752797", "metadata": {}, "outputs": [], "source": [ "# Load the old parameters and update them\n", "\n", "tsaseq = np.loadtxt(\"potential/%02d/chr_spec_param.txt\"%(run_number-1))[:60642]\n", "\n", "for i in update_chr_list:\n", " tsaseq[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]\n", " tsaseq[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]" ] }, { "cell_type": "code", "execution_count": 11, "id": "6629e894-6432-4240-aef0-bc867434ca77", "metadata": {}, "outputs": [], "source": [ "#Added portion to overide the parameters to 0.0 if no expt signal on segment\n", "\n", "zero_signal_damid = (damid_data_low_res[:] == 0.0)\n", "zero_signal_tsa = (tsa_data_low_res[:] == 0.0)\n", "damid[zero_signal_damid] = 0.0\n", "tsaseq[zero_signal_tsa] = 0.0" ] }, { "cell_type": "markdown", "id": "93b3f223-77b8-4e01-8f90-5b93fe5dd4c5", "metadata": {}, "source": [ "### Save the new parameters" ] }, { "cell_type": "code", "execution_count": 12, "id": "2dc60a27-f677-47a6-8b33-edbe4e61e25b", "metadata": {}, "outputs": [], "source": [ "subprocess.call([\"mkdir -p potential/%02d/\"%(run_number)],shell=True,stdout=subprocess.PIPE)\n", "\n", "np.savetxt(\"potential/%02d/chr_spec_param.txt\"%(run_number), np.append(tsaseq,[0]*9900).reshape((-1,1)), fmt='%.6f')\n", "np.savetxt(\"potential/%02d/chr_lam_param.txt\"%(run_number), np.append(damid,[0]*9900).reshape((-1,1)), fmt='%.6f')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }