Tutorials: Configuration Generation

Import the necessary packages

[1]:
import numpy as np
import subprocess
import math
import random

N_configs         = 10      #The number of configs that the users hope to generate
bond_length       = 0.5     #Bond length
lamina_prox_tol   = 1.0     #Extra proximity tolerance for lamina
R_small_sphere    = 4.0     #Radius of the small sphere in which each individual chromosome is collapsed
R_low_radial      = 0.5     #The lowest values for chrcom uniform position
N_chr_beads       = 60642   #The chromatin beads we have for the 100KB model
R_nucleus         = 13.0    #Radius of the nucleus (reduced unit)

Import the necessary reference files

[2]:
damid_average = np.loadtxt('averaged_damid.txt') #averaged DamID signals of homologous chromosomes
### chromatin_info.txt: This file contains the information of each chromatin bead, and is necessary when creating the final configuration.
### 1st column: the index of each bead; 2nd column: the index of belonging chromosome; 3rd column: the type of each chromatin bead.
mol_info      = np.loadtxt("chromatin_info.txt", dtype=int)

Generate the raw configs of chromosomes

As an example, we first created a total of 10 configurations for the genome by sequentially generating the conformation of each one of the 46 chromosomes as follows. For a given chromosome, we start by placing the first bead at the center (origin) of the nucleus. The positions of the following beads, \(i\), were determined from the \((i-1)\)-th bead as \(r_i= r_{i-1} + 0.5 v\). \(v\) is a normalized random vector, and 0.5 was selected as the bond length between neighboring beads. To produce globular chromosome conformations, we rejected vectors, \(v\), that led to bead positions with distance from the center larger than \(4\sigma\). Upon creating the conformation of a chromosome \(i\), we shift its center of mass to a value \(r^i_\text{com}\) determined as follows. We first compute a mean radial distance, \(r^i_\text{o}\) with the following equation

\[\frac{6\sigma - r^i_\text{o}}{r^i_\text{o}-2\sigma} = \frac{D_\text{hi}-D}{D-D_\text{lo}},\]

where \(D_i\) is the average value of Lamin B DamID profile for chromosome \(i\). \(D_\text{hi}\) and \(D_\text{lo}\) represent the highest and lowest average DamID values of all chromosomes, and \(6\sigma\) and \(2\sigma\) represent the upper and lower bound in radial positions for chromosomes. As shown in Fig. S6, the average Lamin B DamID profiles are highly correlated with normalized chromosome radial positions as reported by DNA MERFISH, supporting their use as a proxy for estimating normalized chromosome radial positions. We then select \(r^i_\text{com}\) as a uniformly distributed random variable within the range \([r^i_\text{o}-2\sigma,r^i_\text{o}+2\sigma]\). Without loss of generality, we randomly chose the directions for shifting all 46 chromosomes.

[3]:
ave_largest = np.amax(damid_average)
ave_smallest = np.amin(damid_average)

rp_pos = []

for k in damid_average:
    p1 = k-ave_smallest
    p2 = ave_largest-k
    rp_pos.append((6*p1+2*p2)/(p1+p2))  # Artificial estimated radial positions from the DamID
[4]:
subprocess.call(["mkdir -p raw_configs_txt"],shell=True,stdout=subprocess.PIPE)

for k in range(N_configs):
    file_write      =   open("raw_configs_txt/human_init_%d"%(k+1),"w")

    xyz_chr      = np.zeros((N_chr_beads,3))
    xyz_chr[:,:] = 100.0
    atom_index   = 0
    for chr in range(1,47):
        #Go chromosome by chromosome and initialize them randomly and save in xyz_chr
        for beads_chr in range(np.count_nonzero(mol_info[:,1] == chr)):
            if beads_chr!= 0:
                while(np.sum(xyz_chr[atom_index]**2) > (R_small_sphere)**2):
                    #Brute force but only needs to be done once
                    draw_norm           = np.random.normal(loc=0.0,scale=1.0,size=3)
                    draw_norm          /= np.sum(draw_norm**2,axis=None)**0.5
                    xyz_chr[atom_index] = xyz_chr[atom_index-1] + bond_length*draw_norm
            else:
                #First bead of the chr is at the origin
                xyz_chr[atom_index,:] = 0.
            atom_index += 1

        #Push the COM of the chr to some random point in the nucleus by choosing the radial coordinate uniformly
        draw_norm           = np.random.normal(loc=0.0,scale=1.0,size=3)
        draw_norm          /= np.sum(draw_norm**2,axis=None)**0.5
        if chr < 45: ## 1 - 44 chromosomes
            ## uniformly distributed random variable within the range [r-2sigma,r+2sigma]
            com_chr             = draw_norm*(np.random.uniform(low=rp_pos[(chr-1)//2]-2,high=rp_pos[(chr-1)//2]+2))
        else: # sex chromosome
            com_chr             = draw_norm*(np.random.uniform(low=0.5,high=8.0))

        chr_only =  (mol_info[:,1] == chr)
        xyz_chr[chr_only,:] += com_chr

    ###
    atom_index      = 0
    random_counter  = 0

    for j in range(len(mol_info)):
        file_write.write("%d %d %d 0.0   %.6f  %.6f  %.6f\n"  %(mol_info[atom_index][0],mol_info[atom_index][1],mol_info[atom_index][2],
        xyz_chr[atom_index][0],xyz_chr[atom_index][1],xyz_chr[atom_index][2]))
        atom_index += 1

    file_write.close()

Generate Speckle and Nucleolus beads

All the beads are uniformly distributed within the cell nucleus in the initial configurations

[5]:
n_nuc = 300
n_spec = 1600

for i in range(N_configs):
    beads = np.loadtxt("raw_configs_txt/human_init_%d"%(i+1), usecols=[4,5,6])
    count = 0

    while (count < n_nuc+n_spec): # 300 Nucleolus beads and 1600 Speckle beads
        theta = np.pi*random.random()
        phi = 2*np.pi*random.random()
        radial = np.sqrt(12.25**2*random.random())
        ran1 = radial*np.sin(theta)*np.cos(phi)
        ran2 = radial*np.sin(theta)*np.sin(phi)
        ran3 = radial*np.cos(theta)
        pos = np.array([ran1,ran2,ran3])

        m = beads-pos
        # if there are beads within this cutoff, it will lead to a unstable system, so reject this sampling
        if len(np.where(np.sqrt(np.sum(m**2,axis=1))<0.4)[0]) == 0: #0.4 is the empirical cutoff
            beads = np.vstack((beads, pos.reshape((-1,3))))
            count += 1

    np.savetxt('raw_configs_txt/with_Sp_No_%d.txt'%(i+1), beads, fmt='%.6f')

Generate Lamina beads

The nuclear envelope provides an enclosure to confine DNA and a repressive environment to organize chromatin with specific interactions. To account for the role of the nuclear lamina while keeping our model simple, we approximate it with discrete particles uniformly placed on a sphere.

We used the Fibonacci grid to initialize the lamina particles, which form a uniform and almost equidistant network of lamina particles on the surface of the nucleus. The Cartesian coordinates associated with the \(i^{th}\) lamina particles are defined as

\[x_i = 2R_\text{N}\times(1 - \frac{i}{N_\text{La}-1}), y_i = \sqrt{R_\text{N}^2-x^2}\times \cos[i\Phi], z_i = \sqrt{R_\text{N}^2-x^2}\times \sin[i\Phi]\]

where \(N_\text{La} =8000\) represents the number of lamina particles, \(i\in \{0,1,\dots,N_\text{La}-2,N_\text{La}-1\}\), and \(\Phi = \pi\times (3-\sqrt{5})\) is the golden angle. We set \(R_\text{N} = 5\mu m\) as the radius of the human foreskin fibroblasts (HFF) cell nucleus.

[6]:
def fibonacci_sphere(samples=8000):

    points = []
    phi = math.pi * (3. - math.sqrt(5.))  # golden angle in radians

    for i in range(samples):
        y = 1 - (i / float(samples - 1)) * 2  # y goes from 1 to -1
        radius = math.sqrt(1 - y * y)  # radius at y

        theta = phi * i  # golden angle increment

        x = math.cos(theta) * radius
        z = math.sin(theta) * radius

        points.append([x, y, z])

    return points

Lamina = R_nucleus*np.array(fibonacci_sphere())

Generate the initial configuration files used in the simulation

[7]:
X, zero = 'X', 0

n_chrom       = 46
nuc_type      = 5
spe_type1     = 6
spe_type2     = 7
lam_type      = 8

for i in range(N_configs):
    beads = np.loadtxt('raw_configs_txt/with_Sp_No_%d.txt'%(i+1))
    coor = np.vstack((beads, Lamina))

    file_write = open('init_config_pool/human_%d.pdb'%(i+1), 'w')
    for i in range(N_chr_beads):
        if i == 0:
            file_write.write('CRYST1   26.400   26.400   26.400  90.00  90.00  90.00 P 1           1\n')
        file_write.write('ATOM%7s%3s%8s%4s%12.3f%8.3f%8.3f%6.2f%6.2f            \n'
                         %(mol_info[i,0],mol_info[i,2],X,mol_info[i,1],coor[i,0],coor[i,1],coor[i,2],zero,zero))
    for k in range(1,len(coor)-N_chr_beads+1):
        idx_curr = N_chr_beads+k
        mol_curr = n_chrom+k
        if k<=n_nuc:
            file_write.write('ATOM%7s%3s%8s%4s%12.3f%8.3f%8.3f%6.2f%6.2f            \n'
                             %(idx_curr,nuc_type,X,mol_curr,coor[idx_curr-1,0],coor[idx_curr-1,1],coor[idx_curr-1,2],zero,zero))
        elif k<=n_nuc+n_spec:
            file_write.write('ATOM%7s%3s%8s%4s%12.3f%8.3f%8.3f%6.2f%6.2f            \n'
                             %(idx_curr,spe_type1,X,mol_curr,coor[idx_curr-1,0],coor[idx_curr-1,1],coor[idx_curr-1,2],zero,zero))
        else:
            file_write.write('ATOM%7s%3s%8s%4s%12.3f%8.3f%8.3f%6.2f%6.2f            \n'
                             %(idx_curr,lam_type,X,mol_curr,coor[idx_curr-1,0],coor[idx_curr-1,1],coor[idx_curr-1,2],zero,zero))
    file_write.write("END")
    file_write.close()

Delete the unnecessary raw configs

[8]:
subprocess.call(["rm -r raw_configs_txt"],shell=True,stdout=subprocess.PIPE)
[8]:
0