| [f683fe] | 1 | #!/usr/bin/python
 | 
|---|
 | 2 | #
 | 
|---|
 | 3 | # Takes two pdb file and a dbond file, matches the coordinates and thus creates a mapping from old ids to new ids.
 | 
|---|
 | 4 | 
 | 
|---|
 | 5 | import sys, random, math, re
 | 
|---|
 | 6 | wrerr=sys.stderr.write
 | 
|---|
 | 7 | wrout=sys.stdout.write
 | 
|---|
 | 8 | 
 | 
|---|
 | 9 | # check arguments
 | 
|---|
 | 10 | if len(sys.argv) < 5:
 | 
|---|
 | 11 |         print "Usage: "+sys.argv[0]+" <srcPDBfile> <destXYZfile> <offsetID> <offsetXYZ> <srcDBONDfile> [destDBONDfile]" 
 | 
|---|
 | 12 |         sys.exit(1)
 | 
|---|
 | 13 | 
 | 
|---|
 | 14 | EPSILON=1e-3
 | 
|---|
 | 15 | CUTOFF=2.
 | 
|---|
 | 16 | inputsrcPDB = open(sys.argv[1], "r")
 | 
|---|
 | 17 | inputdestPDB = open(sys.argv[2], "r")
 | 
|---|
 | 18 | inputsrcDBOND = open(sys.argv[5], "r")
 | 
|---|
 | 19 | offsetID=int(sys.argv[3])
 | 
|---|
 | 20 | offsetXYZ=int(sys.argv[4])
 | 
|---|
 | 21 | if len(sys.argv) > 6:
 | 
|---|
 | 22 |         output = open(sys.argv[6],"w")
 | 
|---|
 | 23 | else:
 | 
|---|
 | 24 |         output = open(sys.argv[5]+".new", "w")
 | 
|---|
 | 25 | 
 | 
|---|
 | 26 | # 1. first parse both PDB files into arrays (id, element, xyz) , therewhile scan BoundaryBoxes
 | 
|---|
 | 27 | max = [ 0., 0., 0. ]
 | 
|---|
 | 28 | min = [ 0., 0., 0. ]
 | 
|---|
 | 29 | x = [ 0., 0., 0. ]
 | 
|---|
 | 30 | print "Scanning source PDB file"+sys.argv[1]+"."
 | 
|---|
 | 31 | srcAtoms = []
 | 
|---|
 | 32 | for line in inputsrcPDB:
 | 
|---|
 | 33 |         if "#" in line:
 | 
|---|
 | 34 |                 continue
 | 
|---|
 | 35 |         if "END" in line:
 | 
|---|
 | 36 |                 break
 | 
|---|
 | 37 |         if "ATOM" in line:
 | 
|---|
 | 38 |                 entries = line.split()
 | 
|---|
 | 39 |                 for n in range(3):
 | 
|---|
 | 40 |                         x[n] = float(entries[offsetXYZ+n])
 | 
|---|
 | 41 |                         if x[n] > max[n]:
 | 
|---|
 | 42 |                                 max[n] = x[n]
 | 
|---|
 | 43 |                         if x[n] < min[n]:
 | 
|---|
 | 44 |                                 min[n] = x[n]
 | 
|---|
 | 45 |                 srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
 | 
|---|
 | 46 | inputsrcPDB.close()
 | 
|---|
 | 47 | print "Scanning destination XYZ file"+sys.argv[2]+"."
 | 
|---|
 | 48 | destAtoms = []
 | 
|---|
 | 49 | index = 0
 | 
|---|
 | 50 | for line in inputdestPDB:
 | 
|---|
 | 51 |         entries = line.split()
 | 
|---|
 | 52 |         if (len(entries)<1 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
 | 
|---|
 | 53 |                 continue
 | 
|---|
 | 54 |         for n in range(3):
 | 
|---|
 | 55 |                 x[n] = float(entries[1+n])
 | 
|---|
 | 56 |                 if x[n] > max[n]:
 | 
|---|
 | 57 |                         x[n]-=max[n]
 | 
|---|
 | 58 |                 if x[n] < min[n]:
 | 
|---|
 | 59 |                         x[n]+=max[n]
 | 
|---|
 | 60 |         destAtoms.append([index, x[0], x[1], x[2]])
 | 
|---|
 | 61 |         index+=1
 | 
|---|
 | 62 | inputdestPDB.close()
 | 
|---|
 | 63 | 
 | 
|---|
 | 64 | # 2. create Linked Cell with minimum distance box length
 | 
|---|
 | 65 | print "Found Box bounds [%f,%f]x[%f,%f]x[%f,%f]." % (min[0],max[0],min[1],max[1],min[2],max[2])
 | 
|---|
 | 66 | for i in range(3):      # shift by minimum if below zero
 | 
|---|
 | 67 |   if min[i] < 0:
 | 
|---|
 | 68 |     max[i]-=min[i]
 | 
|---|
 | 69 |   else:
 | 
|---|
 | 70 |     min[i]=0
 | 
|---|
 | 71 | cells_x=int(math.ceil(float(max[0])/CUTOFF))
 | 
|---|
 | 72 | cells_y=int(math.ceil(float(max[1])/CUTOFF))
 | 
|---|
 | 73 | cells_z=int(math.ceil(float(max[2])/CUTOFF))
 | 
|---|
 | 74 | print "Number of cells in each axis direction (%f,%f,%f)." % (cells_x, cells_y, cells_z)
 | 
|---|
 | 75 | 
 | 
|---|
 | 76 | # 3. put each atom into its cell, lists may contain multiple atoms, mark src(0) or dest (1)
 | 
|---|
 | 77 | cell=[]
 | 
|---|
 | 78 | for i in range(cells_x):
 | 
|---|
 | 79 |   cell.append([])
 | 
|---|
 | 80 |   for j in range(cells_y):
 | 
|---|
 | 81 |     cell[i].append([])
 | 
|---|
 | 82 |     for k in range(cells_z):
 | 
|---|
 | 83 |       cell[i][j].append([0])
 | 
|---|
 | 84 | for i in range(len(srcAtoms)):
 | 
|---|
 | 85 |         atom = srcAtoms[i]
 | 
|---|
 | 86 |         print atom
 | 
|---|
 | 87 |         for n in range(3):
 | 
|---|
 | 88 |                 x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
 | 
|---|
 | 89 |         if cells_x ==x[0]:
 | 
|---|
 | 90 |                 x[0]-=1
 | 
|---|
 | 91 |         if cells_y ==x[1]:
 | 
|---|
 | 92 |                 x[1]-=1
 | 
|---|
 | 93 |         if cells_z ==x[2]:
 | 
|---|
 | 94 |                 x[2]-=1
 | 
|---|
 | 95 |         print x
 | 
|---|
 | 96 |         cell[x[0]][x[1]][x[2]][0]+=1
 | 
|---|
 | 97 |         cell[x[0]][x[1]][x[2]].append([0,i])    # 0 means src
 | 
|---|
 | 98 |         print "Source atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
 | 
|---|
 | 99 | for i in range(len(destAtoms)):
 | 
|---|
 | 100 |         atom = destAtoms[i]
 | 
|---|
 | 101 |         for n in range(3):
 | 
|---|
 | 102 |                 x[n] = int(math.floor(float(atom[1+n])/CUTOFF)) 
 | 
|---|
 | 103 |         cell[x[0]][x[1]][x[2]][0]+=1
 | 
|---|
 | 104 |         cell[x[0]][x[1]][x[2]].append([1,i])    # 1 means dest
 | 
|---|
 | 105 |         print "Destination atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
 | 
|---|
 | 106 | 
 | 
|---|
 | 107 | # 4. go through each cell and match (src, dest)-pairs by closest distance, warn if greater than EPSILON
 | 
|---|
 | 108 | srcMatches=0
 | 
|---|
 | 109 | destMatches=0
 | 
|---|
 | 110 | Map = {}
 | 
|---|
 | 111 | i=-1
 | 
|---|
 | 112 | j=-1
 | 
|---|
 | 113 | k=-1
 | 
|---|
 | 114 | l=-1
 | 
|---|
 | 115 | e=-1
 | 
|---|
 | 116 | r=-1
 | 
|---|
 | 117 | t=-1
 | 
|---|
 | 118 | m=-1
 | 
|---|
 | 119 | for i in range(cells_x):
 | 
|---|
 | 120 |         for j in range(cells_y):
 | 
|---|
 | 121 |                 for k in range(cells_z):
 | 
|---|
 | 122 |                         
 | 
|---|
 | 123 |                         #go through every atom in cell
 | 
|---|
 | 124 |                         try:
 | 
|---|
 | 125 |                                 for l in range(1, cell[i][j][k][0]+1):
 | 
|---|
 | 126 |                                         if cell[i][j][k][l][0] != 0:    # skip if it's not a src atom
 | 
|---|
 | 127 |                                                 continue
 | 
|---|
 | 128 |                                         atom1=cell[i][j][k][l][1]
 | 
|---|
 | 129 |                                         print "Current source atom is "+str(srcAtoms[atom1][0])+"."
 | 
|---|
 | 130 |                                         currentPair=[atom1,-1]
 | 
|---|
 | 131 |                                         oldDist=0.
 | 
|---|
 | 132 |                                         # go through cell and all lower neighbours
 | 
|---|
 | 133 |                                         for e in range(i-1,i+2):
 | 
|---|
 | 134 |                                                 #if on boarder continue periodic
 | 
|---|
 | 135 |                                                 #if e>cells_x-1:
 | 
|---|
 | 136 |                                                 #       e=e-cells_x
 | 
|---|
 | 137 |                                                 if (e < 0) or (e >= cells_x):
 | 
|---|
 | 138 |                                                         continue
 | 
|---|
 | 139 |                                                 for r in range(j-1,j+2):
 | 
|---|
 | 140 |                                                         #if on boarder continue periodic
 | 
|---|
 | 141 |                                                         #if r>cells_y-1:
 | 
|---|
 | 142 |                                                         #       r=r-cells_y
 | 
|---|
 | 143 |                                                         if (r < 0) or (r >= cells_y):
 | 
|---|
 | 144 |                                                                 continue
 | 
|---|
 | 145 |                                                         for t in range(k-1,k+2):
 | 
|---|
 | 146 |                                                                 #if on boarder continue periodic
 | 
|---|
 | 147 |                                                                 #if t>cells_z-1:
 | 
|---|
 | 148 |                                                                 #       t=t-cells_z
 | 
|---|
 | 149 |                                                                 if (t < 0) or (t >= cells_z):
 | 
|---|
 | 150 |                                                                         continue
 | 
|---|
 | 151 |                                                                 #go through all atoms in cell                           
 | 
|---|
 | 152 |                                                                 for m in range(1, cell[e][r][t][0]+1):
 | 
|---|
 | 153 |                                                                         if cell[e][r][t][m][0] != 1:            # skip if it's not a dest atom
 | 
|---|
 | 154 |                                                                                 continue
 | 
|---|
 | 155 |                                                                         atom2=cell[e][r][t][m][1]
 | 
|---|
 | 156 |                                                                         print "Current destination atom is "+str(destAtoms[atom2][0])+"."
 | 
|---|
 | 157 |                                                                         dist=0
 | 
|---|
 | 158 |                                                                         tmp=0
 | 
|---|
 | 159 |                                                                         for n in range(3):
 | 
|---|
 | 160 |                                                                                 tmp = srcAtoms[atom1][1+n] - destAtoms[atom2][1+n]
 | 
|---|
 | 161 |                                                                                 dist += tmp*tmp
 | 
|---|
 | 162 |                                                                         print "Squared distance between the two is "+str(dist)+"."
 | 
|---|
 | 163 |                                                                         if ((oldDist > dist) or ((currentPair[1] == -1) and (dist<EPSILON))):
 | 
|---|
 | 164 |                                                                                 currentPair[1] = atom2
 | 
|---|
 | 165 |                                                                                 oldDist = dist
 | 
|---|
 | 166 |                                         if currentPair[1] == -1:
 | 
|---|
 | 167 |                                                 print"Could not find a suitable partner for srcAtom (%d,%d)!\n" % (srcAtoms[currentPair[0]][0],currentPair[1])
 | 
|---|
 | 168 |                                                 Map[ srcAtoms[currentPair[0]][0] ] = currentPair[1]
 | 
|---|
 | 169 |                                         else:
 | 
|---|
 | 170 |                                                 print "Found a suitable partner for srcAtom "+str(srcAtoms[currentPair[0]][0])+","+str(destAtoms[currentPair[1]][0])+"."
 | 
|---|
 | 171 |                                                 srcMatches+=1
 | 
|---|
 | 172 |                                                 destMatches+=1
 | 
|---|
 | 173 |                                                 Map[ srcAtoms[currentPair[0]][0] ] = destAtoms[currentPair[1]][0]
 | 
|---|
 | 174 |                         except IndexError:
 | 
|---|
 | 175 |                                 wrerr("Index Error: (%d,%d,%d)[%d] and (%d,%d,%d)[%d]\n" % (i,j,k,l,e,r,t,m))
 | 
|---|
 | 176 |                                 break
 | 
|---|
 | 177 | 
 | 
|---|
 | 178 | # 5. print the listing
 | 
|---|
 | 179 | print "We have "+str(srcMatches)+" matching atoms."
 | 
|---|
 | 180 | print "Mapping is:"
 | 
|---|
 | 181 | for key in Map:
 | 
|---|
 | 182 |         print str(key)+" -> "+str(Map[key])
 | 
|---|
 | 183 | #print Map              # work also
 | 
|---|
 | 184 | 
 | 
|---|
 | 185 | # 6. use the listing to rewrite the dbond file, store under given filename
 | 
|---|
 | 186 | for line in inputsrcDBOND:
 | 
|---|
 | 187 |         if "#" in line:
 | 
|---|
 | 188 |                 continue
 | 
|---|
 | 189 |         entries=line.split()
 | 
|---|
 | 190 |         flag=0
 | 
|---|
 | 191 |         for n in range(len(entries)):
 | 
|---|
 | 192 |                 if int(entries[n]) == 0 or Map[ int(entries[n]) ] == -1:
 | 
|---|
 | 193 |                         flag=1
 | 
|---|
 | 194 |         if flag==1:
 | 
|---|
 | 195 |                 continue
 | 
|---|
 | 196 |         for n in range(len(entries)):
 | 
|---|
 | 197 |                 output.write("%d\t" % (Map[ int(entries[n]) ]))
 | 
|---|
 | 198 |         output.write("\n")
 | 
|---|
 | 199 | output.close()
 | 
|---|
 | 200 | print "We have "+str(srcMatches)+" matching atoms."
 | 
|---|
 | 201 | # exit
 | 
|---|