00001
00002 #include "../headers/rpdb.h"
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 static const char *ST_keep_hetatm[] = {
00030
00031 "HEA", "HBI", "BIO", "CFM", "CLP", "FES", "F3S", "FS3", "FS4", "BPH",
00032 "BPB", "BCL", "BCB", "COB", "ZN", "FEA", "FEO", "H4B", "BH4", "BHS",
00033 "HBL", "THB", "DDH", "DHE", "HAS", "HDD", "HDM", "HEB", "HEC", "HEO",
00034 "HES", "HEV", "MHM", "SRM", "VER", "1FH", "2FH", "HC0", "HC1", "HF3",
00035 "HF5", "NFS", "OMO", "PHF", "SF3", "SF4", "CFM", "CFN", "CLF", "CLP",
00036 "CN1", "CNB", "CNF", "CUB", "CUM", "CUN", "CUO", "F3S", "FES", "FS2",
00037 "FS3", "FS4", "FSO", "FSX", "PHO", "BH1", "CHL", "CL1", "CL2", "CLA",
00038 "CCH", "CFO", "FE2", "FCI", "FCO", "FDC", "FEA", "FEO", "FNE", "HIF",
00039 "OFO", "PFC", "HE5", "BAZ", "BOZ", "FE", "HEM", "HCO", "1CP", "CLN",
00040 "COH", "CP3", "DEU", "FDD", "FDE", "FEC", "FMI", "HE5", "HEG", "HIF",
00041 "HNI", "MMP", "MNH", "MNR", "MP1", "PC3", "PCU", "PNI", "POR", "PP9",
00042 "MSE", "HIE", "HID", "HIP","ACE","FAD"
00043 } ;
00044
00045 static const int ST_nb_keep_hetatm = 116;
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101 void rpdb_extract_pdb_atom( char *pdb_line, char *type, int *atm_id, char *name,
00102 char *alt_loc, char *res_name, char *chain,
00103 int *res_id, char *insert,
00104 float *x, float *y, float *z, float *occ,
00105 float *bfactor, char *symbol, int *charge, int *guess_flag)
00106 {
00107
00108
00109
00110
00111
00112
00113
00114
00115 int rlen = strlen(pdb_line) ;
00116
00117 char *prt,
00118 ctmp ;
00119
00120
00121 strncpy(type, pdb_line, 6) ;
00122
00123
00124 prt = pdb_line + 6 ;
00125 ctmp = pdb_line[11] ; pdb_line[11] = '\0' ;
00126 *atm_id = atoi(prt) ; pdb_line[11] = ctmp ;
00127
00128
00129 strncpy(name, pdb_line + 12, 4);
00130 name[4] = '\0';
00131 str_trim(name) ;
00132
00133
00134 *alt_loc = pdb_line[16] ;
00135
00136
00137 rpdb_extract_atm_resname(pdb_line, res_name) ;
00138
00139
00140 chain[0] = pdb_line[21];
00141 chain[1] = '\0';
00142
00143
00144 prt = pdb_line + 22 ;
00145 ctmp = pdb_line[26] ; pdb_line[26] = '\0' ;
00146 *res_id = atoi(prt) ; pdb_line[26] = ctmp ;
00147
00148
00149 *insert = pdb_line[26];
00150
00151
00152 rpdb_extract_atom_values(pdb_line, x, y, z, occ, bfactor);
00153
00154
00155
00156 if (rlen >= 77) {
00157 strncpy(symbol, pdb_line + 76, 2);
00158 symbol[2] = '\0';
00159 str_trim(symbol);
00160 if(strlen(symbol) < 1) {
00161 guess_element(name, symbol,res_name) ;
00162 *guess_flag+=1;
00163 }
00164 }
00165 else {
00166 guess_element(name, symbol,res_name) ;
00167 *guess_flag+=1;
00168 }
00169 str_trim(symbol);
00170
00171
00172 if(rlen >= 79) {
00173 char buf[4] = " " ;
00174 if((pdb_line[78] == ' ' && pdb_line[79] == ' ') || pdb_line[78] == '\n'){
00175 *charge = 0 ;
00176 }
00177 else {
00178 buf[0] = pdb_line[78] ;
00179 buf[1] = pdb_line[79] ;
00180 buf[2] = '\0' ;
00181 *charge = (int) atoi(buf) ;
00182 }
00183 }
00184 else *charge = 0 ;
00185
00186 }
00187
00188
00189 int element_in_kept_res(char *res_name){
00190 int i;
00191 for(i=0;i<ST_nb_keep_hetatm;i++){
00192 if(!strncmp(res_name, ST_keep_hetatm[i],3)) return 1;
00193 }
00194 return 0;
00195 }
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228 void guess_element(char *aname, char *element, char *res_name)
00229 {
00230
00231 char tmp[strlen(aname)+1] ;
00232 strcpy(tmp, aname) ;
00233
00234 str_trim(tmp) ;
00235 char *ptmp = tmp ;
00236
00237
00238 if(isdigit(tmp[0])) ptmp = ptmp+1 ;
00239
00240 if(element_in_std_res(res_name)){
00241
00242
00243 int index = is_valid_prot_element(ptmp, 1) ;
00244 if(index != -1) {
00245 element[0] = ptmp[0];
00246 element[1] = '\0';
00247
00248 return ;
00249 }
00250 }
00251 else if(element_in_nucl_acid(res_name)) {
00252 int index = is_valid_nucl_acid_element(ptmp, 1) ;
00253 if(index != -1) {
00254 element[0] = ptmp[0];
00255 element[1] = '\0';
00256
00257 return ;
00258 }
00259 }
00260 else {
00261 int index = is_valid_element(ptmp, 1) ;
00262 if(index != -1) {
00263 strcpy(element,ptmp);
00264 return ;
00265 }
00266 }
00267
00268 element[0] = ptmp[0];
00269 element[1] = ptmp[1];
00270 element[2] = '\0';
00271 }
00272
00273 int is_N(char *aname)
00274 {
00275
00276 if(aname[0] == 'N' && isdigit(aname[1])) return 1 ;
00277 if( aname[0] != 'H' && aname[0] != 'M' && aname[1] == 'N'
00278 && str_is_number(aname, 0)) return 1 ;
00279
00280 return 0 ;
00281 }
00282
00283 int is_O(char *aname)
00284 {
00285
00286
00287
00288 if(aname[0] == 'O') {
00289
00290 if( aname[1] == 'P' && (aname[2] == 'A' || aname[2] == 'B' || aname[2] == 'C')
00291 && isdigit(aname[3])) {
00292 return 1 ;
00293 }
00294
00295
00296 if(aname[1] == 'E' && isdigit(aname[2]) && isdigit(aname[3])) return 1 ;
00297 }
00298 else {
00299
00300 if( aname[0] != 'C' && aname[0] != 'H' && aname[0] != 'M' && aname[1] == 'O')
00301 return 1 ;
00302
00303
00304 if(aname[0] == 'C' && aname[1] == 'O' && aname[2] != ' ' && aname[3] != ' ')
00305 return 1 ;
00306 }
00307
00308 return 0 ;
00309 }
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 void rpdb_extract_atm_resname(char *pdb_line, char *res_name)
00333 {
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343 strncpy(res_name, pdb_line + 17, 4);
00344 res_name[4] = '\0';
00345 str_trim(res_name);
00346 }
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365 void rpdb_extract_atom_values(char *pdb_line, float *x, float *y, float *z,
00366 float *occ, float *bfactor)
00367 {
00368
00369
00370
00371
00372
00373
00374
00375
00376 char *ptr,
00377 ctmp ;
00378
00379 ptr = pdb_line + 30 ;
00380 ctmp = pdb_line[38] ; pdb_line[38] = '\0' ;
00381 *x = (float) atof(ptr) ; pdb_line[38] = ctmp ;
00382
00383 ptr = pdb_line + 38 ;
00384 ctmp = pdb_line[46] ; pdb_line[46] = '\0' ;
00385 *y = (float) atof(ptr) ; pdb_line[46] = ctmp ;
00386
00387 ptr = pdb_line + 46 ;
00388 ctmp = pdb_line[54] ; pdb_line[54] = '\0' ;
00389 *z = (float) atof(ptr) ; pdb_line[54] = ctmp ;
00390
00391 ptr = pdb_line + 54 ;
00392 ctmp = pdb_line[60] ; pdb_line[60] = '\0' ;
00393 *occ = (float) atof(ptr) ; pdb_line[60] = ctmp ;
00394
00395 ptr = pdb_line + 60 ;
00396 ctmp = pdb_line[66] ; pdb_line[66] = '\0' ;
00397 *bfactor = (float) atof(ptr) ; pdb_line[66] = ctmp ;
00398 }
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416 void rpdb_extract_cryst1(char *pdb_line, float *alpha, float *beta, float *gamma,
00417 float *a, float *b, float *c)
00418 {
00419
00420
00421
00422
00423
00424
00425
00426
00427 char ch, *s;
00428
00429 s = pdb_line+6 ;
00430 ch = pdb_line[15] ; pdb_line[15] = '\0' ;
00431 *a = (float) atof(s) ;
00432
00433 s = pdb_line+15 ;
00434 *s = ch ; ch = pdb_line[24]; pdb_line[24] = '\0' ;
00435 *b = (float) atof(s) ;
00436
00437 s = pdb_line+24 ;
00438 *s = ch; ch = pdb_line[33]; pdb_line[33] = '\0' ;
00439 *c = (float) atof(s) ;
00440
00441 s = pdb_line+33;
00442 *s = ch; ch = pdb_line[40]; pdb_line[40] = '\0' ;
00443 *alpha = (float) atof(s) ;
00444
00445 s = pdb_line+40;
00446 *s = ch; ch = pdb_line[47]; pdb_line[47] = '\0' ;
00447 *beta = (float) atof(s) ;
00448
00449 s = pdb_line+47;
00450 *s = ch; ch = pdb_line[54]; pdb_line[54] = '\0' ;
00451 *gamma = (float) atof(s) ;
00452 }
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478 s_pdb* rpdb_open(char *fpath, const char *ligan, const int keep_lig)
00479 {
00480 s_pdb *pdb = NULL ;
00481
00482 char buf[M_PDB_BUF_LEN],
00483 resb[5] ;
00484
00485 int nhetatm = 0,
00486 natoms = 0,
00487 natm_lig = 0 ;
00488 int i ;
00489
00490 pdb = (s_pdb *) my_malloc(sizeof(s_pdb)) ; ;
00491
00492
00493 pdb->fpdb = fopen_pdb_check_case(fpath, "r");
00494 if (!pdb->fpdb) {
00495 my_free(pdb) ;
00496 fprintf(stderr, "! File %s does not exist\n", fpath) ;
00497 return NULL ;
00498 }
00499
00500 while(fgets(buf, M_PDB_LINE_LEN + 2, pdb->fpdb)) {
00501 if (!strncmp(buf, "ATOM ", 5)) {
00502
00503 if(buf[16]==' ' || buf[16]=='A'){
00504
00505 rpdb_extract_atm_resname(buf, resb) ;
00506 if( ligan && ligan[0] == resb[0] && ligan[1] == resb[1]
00507 && ligan[2] == resb[2]){
00508
00509 if(keep_lig) {
00510 natm_lig ++ ;
00511 natoms++ ;
00512 }
00513 }
00514 else {
00515 natoms++ ;
00516 }
00517 }
00518 }
00519 else if(!strncmp(buf, "HETATM", 6)) {
00520
00521 if(buf[16]==' ' || buf[16]=='A'){
00522
00523 rpdb_extract_atm_resname(buf, resb) ;
00524 if( keep_lig && ligan && ligan[0] == resb[0] && ligan[1] == resb[1]
00525 && ligan[2] == resb[2]){
00526 natm_lig ++ ; natoms++ ;
00527 }
00528 else {
00529
00530 if (keep_lig && ! ligan && strncmp(resb,"HOH",3) && strncmp(resb,"WAT",3)){
00531 natoms++ ; nhetatm++ ;
00532 }
00533 else {
00534 for(i = 0 ; i < ST_nb_keep_hetatm ; i++) {
00535 if(ST_keep_hetatm[i][0] == resb[0] && ST_keep_hetatm[i][1]
00536 == resb[1] && ST_keep_hetatm[i][2] == resb[2]) {
00537 nhetatm++ ; natoms++ ;
00538 break ;
00539 }
00540 }
00541 }
00542 }
00543 }
00544 }
00545
00546
00547
00548
00549
00550 else if (!strncmp(buf, "END", 3)) break ;
00551 }
00552
00553 if (natoms == 0) {
00554 fprintf(stderr, "! File '%s' contains no atoms...\n", fpath) ;
00555 my_free(pdb) ;
00556
00557 return NULL ;
00558 }
00559
00560
00561 pdb->latoms = (s_atm*) my_calloc(natoms, sizeof(s_atm)) ;
00562 pdb->latoms_p = (s_atm**) my_calloc(natoms, sizeof(s_atm*)) ;
00563
00564 if(nhetatm > 0) pdb->lhetatm = (s_atm**) my_calloc(nhetatm, sizeof(s_atm*)) ;
00565 else pdb->lhetatm = NULL ;
00566
00567 if(natm_lig > 0) pdb->latm_lig = (s_atm**) my_calloc(natm_lig, sizeof(s_atm*)) ;
00568 else pdb->latm_lig = NULL ;
00569
00570 pdb->natoms = natoms ;
00571 pdb->nhetatm = nhetatm ;
00572 pdb->natm_lig = natm_lig ;
00573 rewind(pdb->fpdb) ;
00574
00575 return pdb ;
00576 }
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599 void rpdb_read(s_pdb *pdb, const char *ligan, const int keep_lig)
00600 {
00601 int i,
00602 iatoms,
00603 ihetatm,
00604 iatm_lig,
00605 ligfound ;
00606
00607 char pdb_line[M_PDB_BUF_LEN],
00608 resb[5] ;
00609
00610 s_atm *atom = NULL ;
00611 s_atm *atoms = pdb->latoms ;
00612 s_atm **atoms_p = pdb->latoms_p ;
00613 s_atm **atm_lig = pdb->latm_lig ;
00614 int guess_flag=0;
00615 iatoms = 0 ;
00616 ihetatm = 0 ;
00617 iatm_lig = 0 ;
00618 ligfound = 0 ;
00619
00620
00621 while(fgets(pdb_line, M_PDB_LINE_LEN + 2, pdb->fpdb)) {
00622 if (strncmp(pdb_line, "ATOM ", 5) == 0) {
00623 if(pdb_line[16]==' ' || pdb_line[16]=='A'){
00624
00625 rpdb_extract_atm_resname(pdb_line, resb) ;
00626
00627 if( ligan && ligan[0] == resb[0] && ligan[1] == resb[1]
00628 && ligan[2] == resb[2]){
00629 if(keep_lig) {
00630 atom = atoms + iatoms ;
00631
00632
00633 rpdb_extract_pdb_atom(pdb_line, atom->type, &(atom->id),
00634 atom->name, &(atom->pdb_aloc), atom->res_name,
00635 atom->chain, &(atom->res_id), &(atom->pdb_insert),
00636 &(atom->x), &(atom->y), &(atom->z),
00637 &(atom->occupancy), &(atom->bfactor), atom->symbol,
00638 &(atom->charge), &guess_flag);
00639
00640
00641 atom->mass = pte_get_mass(atom->symbol) ;
00642 atom->radius = pte_get_vdw_ray(atom->symbol) ;
00643 atom->electroneg = pte_get_enegativity(atom->symbol) ;
00644 atom->sort_x = -1 ;
00645
00646 atoms_p[iatoms] = atom ;
00647 iatoms++ ;
00648
00649 atm_lig[iatm_lig] = atom ;
00650 iatm_lig ++ ;
00651 ligfound = 1 ;
00652 }
00653 }
00654 else {
00655
00656 atom = atoms + iatoms ;
00657 rpdb_extract_pdb_atom(pdb_line, atom->type, &(atom->id),
00658 atom->name, &(atom->pdb_aloc), atom->res_name,
00659 atom->chain, &(atom->res_id), &(atom->pdb_insert),
00660 &(atom->x), &(atom->y), &(atom->z), &(atom->occupancy),
00661 &(atom->bfactor), atom->symbol, &(atom->charge), &guess_flag);
00662
00663
00664 atom->mass = pte_get_mass(atom->symbol) ;
00665 atom->radius = pte_get_vdw_ray(atom->symbol) ;
00666 atom->electroneg = pte_get_enegativity(atom->symbol) ;
00667 atom->sort_x = -1 ;
00668
00669 atoms_p[iatoms] = atom ;
00670 iatoms++ ;
00671 }
00672 }
00673 }
00674 else if(strncmp(pdb_line, "HETATM", 6) == 0) {
00675 if(pdb_line[16]==' ' || pdb_line[16]=='A'){
00676
00677 rpdb_extract_atm_resname(pdb_line, resb) ;
00678
00679 if( ligan && keep_lig && ligan[0] == resb[0] && ligan[1] == resb[1]
00680 && ligan[2] == resb[2]){
00681
00682 atom = atoms + iatoms ;
00683 rpdb_extract_pdb_atom(pdb_line, atom->type, &(atom->id),
00684 atom->name, &(atom->pdb_aloc), atom->res_name,
00685 atom->chain, &(atom->res_id), &(atom->pdb_insert),
00686 &(atom->x), &(atom->y), &(atom->z), &(atom->occupancy),
00687 &(atom->bfactor), atom->symbol, &(atom->charge), &guess_flag);
00688
00689
00690 atom->mass = pte_get_mass(atom->symbol) ;
00691 atom->radius = pte_get_vdw_ray(atom->symbol) ;
00692 atom->electroneg = pte_get_enegativity(atom->symbol) ;
00693 atom->sort_x = -1 ;
00694
00695 atoms_p[iatoms] = atom ;
00696 atm_lig[iatm_lig] = atom ;
00697
00698 iatm_lig ++ ; iatoms++ ;
00699 ligfound = 1 ;
00700 }
00701 else if(pdb->lhetatm) {
00702
00703
00704 if (keep_lig && ! ligan && strncmp(resb,"HOH",3) && strncmp(resb,"WAT",3)){
00705 atom = atoms + iatoms ;
00706 rpdb_extract_pdb_atom(pdb_line, atom->type, &(atom->id),
00707 atom->name, &(atom->pdb_aloc), atom->res_name,
00708 atom->chain, &(atom->res_id), &(atom->pdb_insert),
00709 &(atom->x), &(atom->y), &(atom->z),
00710 &(atom->occupancy), &(atom->bfactor),
00711 atom->symbol, &(atom->charge), &guess_flag);
00712
00713
00714 atom->mass = pte_get_mass(atom->symbol) ;
00715 atom->radius = pte_get_vdw_ray(atom->symbol) ;
00716 atom->electroneg = pte_get_enegativity(atom->symbol) ;
00717 atom->sort_x = -1 ;
00718
00719 atoms_p[iatoms] = atom ;
00720 pdb->lhetatm[ihetatm] = atom ;
00721 ihetatm ++ ; iatoms++ ;
00722 }
00723 else {
00724 for(i = 0 ; i < ST_nb_keep_hetatm ; i++) {
00725 if( ST_keep_hetatm[i][0] == resb[0] && ST_keep_hetatm[i][1]
00726 == resb[1] && ST_keep_hetatm[i][2] == resb[2]) {
00727 atom = atoms + iatoms ;
00728 rpdb_extract_pdb_atom(pdb_line, atom->type, &(atom->id),
00729 atom->name, &(atom->pdb_aloc), atom->res_name,
00730 atom->chain, &(atom->res_id), &(atom->pdb_insert),
00731 &(atom->x), &(atom->y), &(atom->z),
00732 &(atom->occupancy), &(atom->bfactor),
00733 atom->symbol, &(atom->charge), &guess_flag);
00734
00735
00736 atom->mass = pte_get_mass(atom->symbol) ;
00737 atom->radius = pte_get_vdw_ray(atom->symbol) ;
00738 atom->electroneg = pte_get_enegativity(atom->symbol) ;
00739 atom->sort_x = -1 ;
00740
00741 atoms_p[iatoms] = atom ;
00742 pdb->lhetatm[ihetatm] = atom ;
00743 ihetatm ++ ; iatoms++ ;
00744 break ;
00745 }
00746 }
00747 }
00748 }
00749 }
00750 }
00751 else if (strncmp(pdb_line, "CRYST1", 6) == 0) {
00752 rpdb_extract_cryst1(pdb_line, &(pdb->alpha), &(pdb->beta), &(pdb->gamma),
00753 &(pdb->A), &(pdb->B), &(pdb->C));
00754 }
00755 else if (!strncmp(pdb_line, "END", 3)) break ;
00756 }
00757
00758
00759
00760
00761
00762
00763 if(ligan && keep_lig && (ligfound == 0 || pdb->natm_lig <= 0)) {
00764 fprintf(stderr, ">! Warning: ligand '%s' not found in the pdb...\n", ligan) ;
00765 if(pdb->latm_lig) fprintf(stderr, "! Ligand list is not NULL however...\n") ;
00766 if(ligfound == 1) fprintf(stderr, "! And ligfound == 1!! :-/\n") ;
00767 }
00768 else if(ligfound == 1 && iatm_lig <= 0) {
00769 fprintf(stderr, ">! Warning: ligand '%s' has been detected but no atoms \
00770 has been stored!\n", ligan) ;
00771 }
00772 else if((ligfound == 1 && pdb->natm_lig <= 0) || (pdb->natm_lig <=0
00773 && iatm_lig > 0)) {
00774 fprintf(stderr, ">! Warning: ligand '%s' has been detected in rpdb_read \
00775 but not in rpdb_open!\n", ligan) ;
00776 }
00777
00778 }
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794 void free_pdb_atoms(s_pdb *pdb)
00795 {
00796 if(pdb) {
00797 if(pdb->lhetatm) {
00798 my_free(pdb->lhetatm) ;
00799 pdb->lhetatm = NULL ;
00800 }
00801
00802 if(pdb->latoms) {
00803 my_free(pdb->latoms) ;
00804 pdb->latoms = NULL ;
00805 }
00806 if(pdb->latm_lig) {
00807 my_free(pdb->latm_lig) ;
00808 pdb->latm_lig = NULL ;
00809 }
00810 if(pdb->fpdb) {
00811 fclose(pdb->fpdb) ;
00812 pdb->fpdb = NULL ;
00813 }
00814
00815 if(pdb->latoms_p) {
00816 my_free(pdb->latoms_p) ;
00817 pdb->latoms_p = NULL ;
00818 }
00819
00820 my_free(pdb) ;
00821 }
00822 }