Path: blob/devel/elmergrid/src/metis-5.1.0/GKlib/seq.c
3206 views
/*1*2* Sequence handler library by Huzefa Rangwala3* Date : 03.01.20074*5*6*7*/8910#include <GKlib.h>1112131415/*********************************************************/16/* ! \brief Initializes the <tt>gk_seq_t</tt> variable1718192021\param A pointer to gk_seq_t itself22\returns null23*/24/***********************************************************************/2526void gk_seq_init(gk_seq_t *seq)27{2829seq->len = 0;30seq->sequence = NULL;3132seq->pssm = NULL;33seq->psfm = NULL;3435seq->name = NULL;3637}3839/***********************************************************************/40/*! \brief This function creates the localizations for the various sequences4142\param string i.e amino acids, nucleotides, sequences43\returns gk_i2cc2i_t variable44*/45/*********************************************************************/4647gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)48{495051int nsymbols;52gk_idx_t i;53gk_i2cc2i_t *t;5455nsymbols = strlen(alphabet);56t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");57t->n = nsymbols;58t->i2c = gk_cmalloc(256, "gk_i2c_create_common");59t->c2i = gk_imalloc(256, "gk_i2c_create_common");606162gk_cset(256, -1, t->i2c);63gk_iset(256, -1, t->c2i);6465for(i=0;i<nsymbols;i++){66t->i2c[i] = alphabet[i];67t->c2i[(int)alphabet[i]] = i;68}6970return t;7172}737475/*********************************************************************/76/*! \brief This function reads a pssm in the format of gkmod pssm7778\param file_name is the name of the pssm file79\returns gk_seq_t80*/81/********************************************************************/82gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)83{84gk_seq_t *seq;85gk_idx_t i, j, ii;86size_t ntokens, nbytes, len;87FILE *fpin;888990gk_Tokens_t tokens;91static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";92static int PSSMWIDTH = 20;93char *header, line[MAXLINELEN];94gk_i2cc2i_t *converter;9596header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");9798converter = gk_i2cc2i_create_common(AAORDER);99100gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);101len --;102103seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");104gk_seq_init(seq);105106seq->len = len;107seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");108seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");109seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");110111seq->nsymbols = PSSMWIDTH;112seq->name = gk_getbasename(filename);113114fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");115116117/* Read the header line */118if (fgets(line, MAXLINELEN-1, fpin) == NULL)119errexit("Unexpected end of file: %s\n", filename);120gk_strtoupper(line);121gk_strtokenize(line, " \t\n", &tokens);122123for (i=0; i<PSSMWIDTH; i++)124header[i] = tokens.list[i][0];125126gk_freetokenslist(&tokens);127128129/* Read the rest of the lines */130for (i=0, ii=0; ii<len; ii++) {131if (fgets(line, MAXLINELEN-1, fpin) == NULL)132errexit("Unexpected end of file: %s\n", filename);133gk_strtoupper(line);134gk_strtokenize(line, " \t\n", &tokens);135136seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];137138for (j=0; j<PSSMWIDTH; j++) {139seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);140seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);141}142143144145gk_freetokenslist(&tokens);146i++;147}148149seq->len = i; /* Reset the length if certain characters were skipped */150151gk_free((void **)&header, LTERM);152gk_fclose(fpin);153154return seq;155}156157158/**************************************************************************/159/*! \brief This function frees the memory allocated to the seq structure.160161\param gk_seq_t162\returns nothing163*/164/**************************************************************************/165void gk_seq_free(gk_seq_t *seq)166{167gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);168gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);169gk_free((void **)&seq->name, &seq->sequence, LTERM);170//gk_free((void **)&seq, LTERM);171gk_free((void **) &seq, LTERM);172173}174175176