Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/elmergrid/src/metis-5.1.0/GKlib/seq.c
3206 views
1
/*
2
*
3
* Sequence handler library by Huzefa Rangwala
4
* Date : 03.01.2007
5
*
6
*
7
*
8
*/
9
10
11
#include <GKlib.h>
12
13
14
15
16
/*********************************************************/
17
/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
18
19
20
21
22
\param A pointer to gk_seq_t itself
23
\returns null
24
*/
25
/***********************************************************************/
26
27
void gk_seq_init(gk_seq_t *seq)
28
{
29
30
seq->len = 0;
31
seq->sequence = NULL;
32
33
seq->pssm = NULL;
34
seq->psfm = NULL;
35
36
seq->name = NULL;
37
38
}
39
40
/***********************************************************************/
41
/*! \brief This function creates the localizations for the various sequences
42
43
\param string i.e amino acids, nucleotides, sequences
44
\returns gk_i2cc2i_t variable
45
*/
46
/*********************************************************************/
47
48
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
49
{
50
51
52
int nsymbols;
53
gk_idx_t i;
54
gk_i2cc2i_t *t;
55
56
nsymbols = strlen(alphabet);
57
t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
58
t->n = nsymbols;
59
t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
60
t->c2i = gk_imalloc(256, "gk_i2c_create_common");
61
62
63
gk_cset(256, -1, t->i2c);
64
gk_iset(256, -1, t->c2i);
65
66
for(i=0;i<nsymbols;i++){
67
t->i2c[i] = alphabet[i];
68
t->c2i[(int)alphabet[i]] = i;
69
}
70
71
return t;
72
73
}
74
75
76
/*********************************************************************/
77
/*! \brief This function reads a pssm in the format of gkmod pssm
78
79
\param file_name is the name of the pssm file
80
\returns gk_seq_t
81
*/
82
/********************************************************************/
83
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
84
{
85
gk_seq_t *seq;
86
gk_idx_t i, j, ii;
87
size_t ntokens, nbytes, len;
88
FILE *fpin;
89
90
91
gk_Tokens_t tokens;
92
static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
93
static int PSSMWIDTH = 20;
94
char *header, line[MAXLINELEN];
95
gk_i2cc2i_t *converter;
96
97
header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
98
99
converter = gk_i2cc2i_create_common(AAORDER);
100
101
gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
102
len --;
103
104
seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
105
gk_seq_init(seq);
106
107
seq->len = len;
108
seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
109
seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
110
seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
111
112
seq->nsymbols = PSSMWIDTH;
113
seq->name = gk_getbasename(filename);
114
115
fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
116
117
118
/* Read the header line */
119
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
120
errexit("Unexpected end of file: %s\n", filename);
121
gk_strtoupper(line);
122
gk_strtokenize(line, " \t\n", &tokens);
123
124
for (i=0; i<PSSMWIDTH; i++)
125
header[i] = tokens.list[i][0];
126
127
gk_freetokenslist(&tokens);
128
129
130
/* Read the rest of the lines */
131
for (i=0, ii=0; ii<len; ii++) {
132
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
133
errexit("Unexpected end of file: %s\n", filename);
134
gk_strtoupper(line);
135
gk_strtokenize(line, " \t\n", &tokens);
136
137
seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
138
139
for (j=0; j<PSSMWIDTH; j++) {
140
seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
141
seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
142
}
143
144
145
146
gk_freetokenslist(&tokens);
147
i++;
148
}
149
150
seq->len = i; /* Reset the length if certain characters were skipped */
151
152
gk_free((void **)&header, LTERM);
153
gk_fclose(fpin);
154
155
return seq;
156
}
157
158
159
/**************************************************************************/
160
/*! \brief This function frees the memory allocated to the seq structure.
161
162
\param gk_seq_t
163
\returns nothing
164
*/
165
/**************************************************************************/
166
void gk_seq_free(gk_seq_t *seq)
167
{
168
gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
169
gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
170
gk_free((void **)&seq->name, &seq->sequence, LTERM);
171
//gk_free((void **)&seq, LTERM);
172
gk_free((void **) &seq, LTERM);
173
174
}
175
176