CoCalc -- help-index.c

GitHub Repository: srohatgi01/cups
Path: blob/master/cgi-bin/help-index.c
¹⁰⁹⁰ views
1
/*
2
 * Online help index routines for CUPS.
3
 *
4
 * Copyright © 2007-2019 by Apple Inc.
5
 * Copyright © 1997-2007 by Easy Software Products.
6
 *
7
 * Licensed under Apache License v2.0.  See the file "LICENSE" for more
8
 * information.
9
 */
10

11
/*
12
 * Include necessary headers...
13
 */
14

15
#include "cgi-private.h"
16
#include <cups/dir.h>
17

18

19
/*
20
 * List of common English words that should not be indexed...
21
 */
22

23
static char		help_common_words[][6] =
24
			{
25
			  "about",
26
			  "all",
27
			  "an",
28
			  "and",
29
			  "are",
30
			  "as",
31
			  "at",
32
			  "be",
33
			  "been",
34
			  "but",
35
			  "by",
36
			  "call",
37
			  "can",
38
			  "come",
39
			  "could",
40
			  "day",
41
			  "did",
42
			  "do",
43
			  "down",
44
			  "each",
45
			  "find",
46
			  "first",
47
			  "for",
48
			  "from",
49
			  "go",
50
			  "had",
51
			  "has",
52
			  "have",
53
			  "he",
54
			  "her",
55
			  "him",
56
			  "his",
57
			  "hot",
58
			  "how",
59
			  "if",
60
			  "in",
61
			  "is",
62
			  "it",
63
			  "know",
64
			  "like",
65
			  "long",
66
			  "look",
67
			  "make",
68
			  "many",
69
			  "may",
70
			  "more",
71
			  "most",
72
			  "my",
73
			  "no",
74
			  "now",
75
			  "of",
76
			  "on",
77
			  "one",
78
			  "or",
79
			  "other",
80
			  "out",
81
			  "over",
82
			  "said",
83
			  "see",
84
			  "she",
85
			  "side",
86
			  "so",
87
			  "some",
88
			  "sound",
89
			  "than",
90
			  "that",
91
			  "the",
92
			  "their",
93
			  "them",
94
			  "then",
95
			  "there",
96
			  "these",
97
			  "they",
98
			  "thing",
99
			  "this",
100
			  "time",
101
			  "to",
102
			  "two",
103
			  "up",
104
			  "use",
105
			  "was",
106
			  "water",
107
			  "way",
108
			  "we",
109
			  "were",
110
			  "what",
111
			  "when",
112
			  "which",
113
			  "who",
114
			  "will",
115
			  "with",
116
			  "word",
117
			  "would",
118
			  "write",
119
			  "you",
120
			  "your"
121
			};
122

123

124
/*
125
 * Local functions...
126
 */
127

128
static help_word_t	*help_add_word(help_node_t *n, const char *text);
129
static void		help_delete_node(help_node_t *n);
130
static void		help_delete_word(help_word_t *w);
131
static int		help_load_directory(help_index_t *hi,
132
			                    const char *directory,
133
					    const char *relative);
134
static int		help_load_file(help_index_t *hi,
135
			               const char *filename,
136
				       const char *relative,
137
				       time_t     mtime);
138
static help_node_t	*help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
139
static int		help_sort_by_name(help_node_t *p1, help_node_t *p2);
140
static int		help_sort_by_score(help_node_t *p1, help_node_t *p2);
141
static int		help_sort_words(help_word_t *w1, help_word_t *w2);
142

143

144
/*
145
 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
146
 */
147

148
void
149
helpDeleteIndex(help_index_t *hi)	/* I - Help index */
150
{
151
  help_node_t	*node;			/* Current node */
152

153

154
  if (!hi)
155
    return;
156

157
  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
158
       node;
159
       node = (help_node_t *)cupsArrayNext(hi->nodes))
160
  {
161
    if (!hi->search)
162
      help_delete_node(node);
163
  }
164

165
  cupsArrayDelete(hi->nodes);
166
  cupsArrayDelete(hi->sorted);
167

168
  free(hi);
169
}
170

171

172
/*
173
 * 'helpFindNode()' - Find a node in an index.
174
 */
175

176
help_node_t *				/* O - Node pointer or NULL */
177
helpFindNode(help_index_t *hi,		/* I - Index */
178
             const char   *filename,	/* I - Filename */
179
             const char   *anchor)	/* I - Anchor */
180
{
181
  help_node_t	key;			/* Search key */
182

183

184
 /*
185
  * Range check input...
186
  */
187

188
  if (!hi || !filename)
189
    return (NULL);
190

191
 /*
192
  * Initialize the search key...
193
  */
194

195
  key.filename = (char *)filename;
196
  key.anchor   = (char *)anchor;
197

198
 /*
199
  * Return any match...
200
  */
201

202
  return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
203
}
204

205

206
/*
207
 * 'helpLoadIndex()' - Load a help index from disk.
208
 */
209

210
help_index_t *				/* O - Index pointer or NULL */
211
helpLoadIndex(const char *hifile,	/* I - Index filename */
212
              const char *directory)	/* I - Directory that is indexed */
213
{
214
  help_index_t	*hi;			/* Help index */
215
  cups_file_t	*fp;			/* Current file */
216
  char		line[2048],		/* Line from file */
217
		*ptr,			/* Pointer into line */
218
		*filename,		/* Filename in line */
219
		*anchor,		/* Anchor in line */
220
		*sectptr,		/* Section pointer in line */
221
		section[1024],		/* Section name */
222
		*text;			/* Text in line */
223
  time_t	mtime;			/* Modification time */
224
  off_t		offset;			/* Offset into file */
225
  size_t	length;			/* Length in bytes */
226
  int		update;			/* Update? */
227
  help_node_t	*node;			/* Current node */
228
  help_word_t	*word;			/* Current word */
229

230

231
 /*
232
  * Create a new, empty index.
233
  */
234

235
  if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
236
    return (NULL);
237

238
  hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
239
  hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
240

241
  if (!hi->nodes || !hi->sorted)
242
  {
243
    cupsArrayDelete(hi->nodes);
244
    cupsArrayDelete(hi->sorted);
245
    free(hi);
246
    return (NULL);
247
  }
248

249
 /*
250
  * Try loading the existing index file...
251
  */
252

253
  if ((fp = cupsFileOpen(hifile, "r")) != NULL)
254
  {
255
   /*
256
    * Lock the file and then read the first line...
257
    */
258

259
    cupsFileLock(fp, 1);
260

261
    if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
262
    {
263
     /*
264
      * Got a valid header line, now read the data lines...
265
      */
266

267
      node = NULL;
268

269
      while (cupsFileGets(fp, line, sizeof(line)))
270
      {
271
       /*
272
	* Each line looks like one of the following:
273
	*
274
	*     filename mtime offset length "section" "text"
275
	*     filename#anchor offset length "text"
276
	*     SP count word
277
	*/
278

279
        if (line[0] == ' ')
280
	{
281
	 /*
282
	  * Read a word in the current node...
283
	  */
284

285
          if (!node || (ptr = strrchr(line, ' ')) == NULL)
286
	    continue;
287

288
          if ((word = help_add_word(node, ptr + 1)) != NULL)
289
	    word->count = atoi(line + 1);
290
        }
291
	else
292
	{
293
	 /*
294
	  * Add a node...
295
	  */
296

297
	  filename = line;
298

299
	  if ((ptr = strchr(line, ' ')) == NULL)
300
            break;
301

302
	  while (isspace(*ptr & 255))
303
            *ptr++ = '\0';
304

305
	  if ((anchor = strrchr(filename, '#')) != NULL)
306
	  {
307
            *anchor++ = '\0';
308
	    mtime = 0;
309
	  }
310
	  else
311
	    mtime = strtol(ptr, &ptr, 10);
312

313
	  offset = strtoll(ptr, &ptr, 10);
314
	  length = (size_t)strtoll(ptr, &ptr, 10);
315

316
	  while (isspace(*ptr & 255))
317
            ptr ++;
318

319
          if (!anchor)
320
	  {
321
	   /*
322
	    * Get section...
323
	    */
324

325
            if (*ptr != '\"')
326
	      break;
327

328
            ptr ++;
329
	    sectptr = ptr;
330

331
            while (*ptr && *ptr != '\"')
332
	      ptr ++;
333

334
            if (*ptr != '\"')
335
	      break;
336

337
            *ptr++ = '\0';
338

339
            strlcpy(section, sectptr, sizeof(section));
340

341
	    while (isspace(*ptr & 255))
342
              ptr ++;
343
          }
344
          else
345
            section[0] = '\0';
346

347
          if (*ptr != '\"')
348
	    break;
349

350
          ptr ++;
351
	  text = ptr;
352

353
          while (*ptr && *ptr != '\"')
354
	    ptr ++;
355

356
          if (*ptr != '\"')
357
	    break;
358

359
          *ptr++ = '\0';
360

361
	  if ((node = help_new_node(filename, anchor, section, text,
362
				    mtime, offset, length)) == NULL)
363
            break;
364

365
	  node->score = -1;
366

367
	  cupsArrayAdd(hi->nodes, node);
368
        }
369
      }
370
    }
371

372
    cupsFileClose(fp);
373
  }
374

375
 /*
376
  * Scan for new/updated files...
377
  */
378

379
  update = help_load_directory(hi, directory, NULL);
380

381
 /*
382
  * Remove any files that are no longer installed...
383
  */
384

385
  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
386
       node;
387
       node = (help_node_t *)cupsArrayNext(hi->nodes))
388
    if (node->score < 0)
389
    {
390
     /*
391
      * Delete this node...
392
      */
393

394
      cupsArrayRemove(hi->nodes, node);
395
      help_delete_node(node);
396
    }
397

398
 /*
399
  * Add nodes to the sorted array...
400
  */
401

402
  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
403
       node;
404
       node = (help_node_t *)cupsArrayNext(hi->nodes))
405
    cupsArrayAdd(hi->sorted, node);
406

407
 /*
408
  * Save the index if we updated it...
409
  */
410

411
  if (update)
412
    helpSaveIndex(hi, hifile);
413

414
 /*
415
  * Return the index...
416
  */
417

418
  return (hi);
419
}
420

421

422
/*
423
 * 'helpSaveIndex()' - Save a help index to disk.
424
 */
425

426
int					/* O - 0 on success, -1 on error */
427
helpSaveIndex(help_index_t *hi,		/* I - Index */
428
              const char   *hifile)	/* I - Index filename */
429
{
430
  cups_file_t	*fp;			/* Index file */
431
  help_node_t	*node;			/* Current node */
432
  help_word_t	*word;			/* Current word */
433

434

435
 /*
436
  * Try creating a new index file...
437
  */
438

439
  if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
440
    return (-1);
441

442
 /*
443
  * Lock the file while we write it...
444
  */
445

446
  cupsFileLock(fp, 1);
447

448
  cupsFilePuts(fp, "HELPV2\n");
449

450
  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
451
       node;
452
       node = (help_node_t *)cupsArrayNext(hi->nodes))
453
  {
454
   /*
455
    * Write the current node with/without the anchor...
456
    */
457

458
    if (node->anchor)
459
    {
460
      if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
461
                         node->filename, node->anchor,
462
                         CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
463
			 node->text) < 0)
464
        break;
465
    }
466
    else
467
    {
468
      if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
469
                         node->filename, (int)node->mtime,
470
                         CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
471
			 node->section ? node->section : "", node->text) < 0)
472
        break;
473
    }
474

475
   /*
476
    * Then write the words associated with the node...
477
    */
478

479
    for (word = (help_word_t *)cupsArrayFirst(node->words);
480
         word;
481
	 word = (help_word_t *)cupsArrayNext(node->words))
482
      if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
483
        break;
484
  }
485

486
  cupsFileFlush(fp);
487

488
  if (cupsFileClose(fp) < 0)
489
    return (-1);
490
  else if (node)
491
    return (-1);
492
  else
493
    return (0);
494
}
495

496

497
/*
498
 * 'helpSearchIndex()' - Search an index.
499
 */
500

501
help_index_t *				/* O - Search index */
502
helpSearchIndex(help_index_t *hi,	/* I - Index */
503
                const char   *query,	/* I - Query string */
504
		const char   *section,	/* I - Limit search to this section */
505
		const char   *filename)	/* I - Limit search to this file */
506
{
507
  help_index_t	*search;		/* Search index */
508
  help_node_t	*node;			/* Current node */
509
  help_word_t	*word;			/* Current word */
510
  void		*sc;			/* Search context */
511
  int		matches;		/* Number of matches */
512

513

514
 /*
515
  * Range check...
516
  */
517

518
  if (!hi || !query)
519
    return (NULL);
520

521
 /*
522
  * Reset the scores of all nodes to 0...
523
  */
524

525
  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
526
       node;
527
       node = (help_node_t *)cupsArrayNext(hi->nodes))
528
    node->score = 0;
529

530
 /*
531
  * Find the first node to search in...
532
  */
533

534
  if (filename)
535
  {
536
    node = helpFindNode(hi, filename, NULL);
537
    if (!node)
538
      return (NULL);
539
  }
540
  else
541
    node = (help_node_t *)cupsArrayFirst(hi->nodes);
542

543
 /*
544
  * Convert the query into a regular expression...
545
  */
546

547
  sc = cgiCompileSearch(query);
548
  if (!sc)
549
    return (NULL);
550

551
 /*
552
  * Allocate a search index...
553
  */
554

555
  search = calloc(1, sizeof(help_index_t));
556
  if (!search)
557
  {
558
    cgiFreeSearch(sc);
559
    return (NULL);
560
  }
561

562
  search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
563
  search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
564

565
  if (!search->nodes || !search->sorted)
566
  {
567
    cupsArrayDelete(search->nodes);
568
    cupsArrayDelete(search->sorted);
569
    free(search);
570
    cgiFreeSearch(sc);
571
    return (NULL);
572
  }
573

574
  search->search = 1;
575

576
 /*
577
  * Check each node in the index, adding matching nodes to the
578
  * search index...
579
  */
580

581
  for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
582
    if (node->section && section && strcmp(node->section, section))
583
      continue;
584
    else if (filename && strcmp(node->filename, filename))
585
      continue;
586
    else
587
    {
588
      matches = cgiDoSearch(sc, node->text);
589

590
      for (word = (help_word_t *)cupsArrayFirst(node->words);
591
           word;
592
	   word = (help_word_t *)cupsArrayNext(node->words))
593
        if (cgiDoSearch(sc, word->text) > 0)
594
          matches += word->count;
595

596
      if (matches > 0)
597
      {
598
       /*
599
	* Found a match, add the node to the search index...
600
	*/
601

602
	node->score = matches;
603

604
	cupsArrayAdd(search->nodes, node);
605
	cupsArrayAdd(search->sorted, node);
606
      }
607
    }
608

609
 /*
610
  * Free the search context...
611
  */
612

613
  cgiFreeSearch(sc);
614

615
 /*
616
  * Return the results...
617
  */
618

619
  return (search);
620
}
621

622

623
/*
624
 * 'help_add_word()' - Add a word to a node.
625
 */
626

627
static help_word_t *			/* O - New word */
628
help_add_word(help_node_t *n,		/* I - Node */
629
              const char  *text)	/* I - Word text */
630
{
631
  help_word_t	*w,			/* New word */
632
		key;			/* Search key */
633

634

635
 /*
636
  * Create the words array as needed...
637
  */
638

639
  if (!n->words)
640
    n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
641

642
 /*
643
  * See if the word is already added...
644
  */
645

646
  key.text = (char *)text;
647

648
  if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
649
  {
650
   /*
651
    * Create a new word...
652
    */
653

654
    if ((w = calloc(1, sizeof(help_word_t))) == NULL)
655
      return (NULL);
656

657
    if ((w->text = strdup(text)) == NULL)
658
    {
659
      free(w);
660
      return (NULL);
661
    }
662

663
    cupsArrayAdd(n->words, w);
664
  }
665

666
 /*
667
  * Bump the counter for this word and return it...
668
  */
669

670
  w->count ++;
671

672
  return (w);
673
}
674

675

676
/*
677
 * 'help_delete_node()' - Free all memory used by a node.
678
 */
679

680
static void
681
help_delete_node(help_node_t *n)	/* I - Node */
682
{
683
  help_word_t	*w;			/* Current word */
684

685

686
  if (!n)
687
    return;
688

689
  if (n->filename)
690
    free(n->filename);
691

692
  if (n->anchor)
693
    free(n->anchor);
694

695
  if (n->section)
696
    free(n->section);
697

698
  if (n->text)
699
    free(n->text);
700

701
  for (w = (help_word_t *)cupsArrayFirst(n->words);
702
       w;
703
       w = (help_word_t *)cupsArrayNext(n->words))
704
    help_delete_word(w);
705

706
  cupsArrayDelete(n->words);
707

708
  free(n);
709
}
710

711

712
/*
713
 * 'help_delete_word()' - Free all memory used by a word.
714
 */
715

716
static void
717
help_delete_word(help_word_t *w)	/* I - Word */
718
{
719
  if (!w)
720
    return;
721

722
  if (w->text)
723
    free(w->text);
724

725
  free(w);
726
}
727

728

729
/*
730
 * 'help_load_directory()' - Load a directory of files into an index.
731
 */
732

733
static int				/* O - 0 = success, -1 = error, 1 = updated */
734
help_load_directory(
735
    help_index_t *hi,			/* I - Index */
736
    const char   *directory,		/* I - Directory */
737
    const char   *relative)		/* I - Relative path */
738
{
739
  cups_dir_t	*dir;			/* Directory file */
740
  cups_dentry_t	*dent;			/* Directory entry */
741
  char		*ext,			/* Pointer to extension */
742
		filename[1024],		/* Full filename */
743
		relname[1024];		/* Relative filename */
744
  int		update;			/* Updated? */
745
  help_node_t	*node;			/* Current node */
746

747

748
 /*
749
  * Open the directory and scan it...
750
  */
751

752
  if ((dir = cupsDirOpen(directory)) == NULL)
753
    return (0);
754

755
  update = 0;
756

757
  while ((dent = cupsDirRead(dir)) != NULL)
758
  {
759
   /*
760
    * Skip "." files...
761
    */
762

763
    if (dent->filename[0] == '.')
764
      continue;
765

766
   /*
767
    * Get absolute and relative filenames...
768
    */
769

770
    snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
771
    if (relative)
772
      snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
773
    else
774
      strlcpy(relname, dent->filename, sizeof(relname));
775

776
   /*
777
    * Check if we have a HTML file...
778
    */
779

780
    if ((ext = strstr(dent->filename, ".html")) != NULL &&
781
        (!ext[5] || !strcmp(ext + 5, ".gz")))
782
    {
783
     /*
784
      * HTML file, see if we have already indexed the file...
785
      */
786

787
      if ((node = helpFindNode(hi, relname, NULL)) != NULL)
788
      {
789
       /*
790
        * File already indexed - check dates to confirm that the
791
	* index is up-to-date...
792
	*/
793

794
        if (node->mtime == dent->fileinfo.st_mtime)
795
	{
796
	 /*
797
	  * Same modification time, so mark all of the nodes
798
	  * for this file as up-to-date...
799
	  */
800

801
          for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
802
	    if (!strcmp(node->filename, relname))
803
	      node->score = 0;
804
	    else
805
	      break;
806

807
          continue;
808
	}
809
      }
810

811
      update = 1;
812

813
      help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
814
    }
815
    else if (S_ISDIR(dent->fileinfo.st_mode))
816
    {
817
     /*
818
      * Process sub-directory...
819
      */
820

821
      if (help_load_directory(hi, filename, relname) == 1)
822
        update = 1;
823
    }
824
  }
825

826
  cupsDirClose(dir);
827

828
  return (update);
829
}
830

831

832
/*
833
 * 'help_load_file()' - Load a HTML files into an index.
834
 */
835

836
static int				/* O - 0 = success, -1 = error */
837
help_load_file(
838
    help_index_t *hi,			/* I - Index */
839
    const char   *filename,		/* I - Filename */
840
    const char   *relative,		/* I - Relative path */
841
    time_t       mtime)			/* I - Modification time */
842
{
843
  cups_file_t	*fp;			/* HTML file */
844
  help_node_t	*node;			/* Current node */
845
  char		line[1024],		/* Line from file */
846
		temp[1024],		/* Temporary word */
847
                section[1024],		/* Section */
848
		*ptr,			/* Pointer into line */
849
		*anchor,		/* Anchor name */
850
		*text;			/* Text for anchor */
851
  off_t		offset;			/* File offset */
852
  char		quote;			/* Quote character */
853
  help_word_t	*word;			/* Current word */
854
  int		wordlen;		/* Length of word */
855

856

857
  if ((fp = cupsFileOpen(filename, "r")) == NULL)
858
    return (-1);
859

860
  node   = NULL;
861
  offset = 0;
862

863
  strlcpy(section, "Other", sizeof(section));
864

865
  while (cupsFileGets(fp, line, sizeof(line)))
866
  {
867
   /*
868
    * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
869
    */
870

871
    if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
872
    {
873
     /*
874
      * Got section line, copy it!
875
      */
876

877
      for (ptr += 13; isspace(*ptr & 255); ptr ++);
878

879
      strlcpy(section, ptr, sizeof(section));
880
      if ((ptr = strstr(section, "-->")) != NULL)
881
      {
882
       /*
883
        * Strip comment stuff from end of line...
884
	*/
885

886
        for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
887

888
	if (isspace(*ptr & 255))
889
	  *ptr = '\0';
890
      }
891
      continue;
892
    }
893

894
    for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
895
    {
896
      ptr ++;
897

898
      if (!_cups_strncasecmp(ptr, "TITLE>", 6))
899
      {
900
       /*
901
        * Found the title...
902
	*/
903

904
	anchor = NULL;
905
	ptr += 6;
906
      }
907
      else
908
      {
909
        char *idptr;			/* Pointer to ID */
910

911
	if (!_cups_strncasecmp(ptr, "A NAME=", 7))
912
	  ptr += 7;
913
	else if ((idptr = strstr(ptr, " ID=")) != NULL)
914
	  ptr = idptr + 4;
915
	else if ((idptr = strstr(ptr, " id=")) != NULL)
916
	  ptr = idptr + 4;
917
	else
918
	  continue;
919

920
       /*
921
        * Found an anchor...
922
	*/
923

924
	if (*ptr == '\"' || *ptr == '\'')
925
	{
926
	 /*
927
	  * Get quoted anchor...
928
	  */
929

930
	  quote  = *ptr;
931
          anchor = ptr + 1;
932
	  if ((ptr = strchr(anchor, quote)) != NULL)
933
	    *ptr++ = '\0';
934
	  else
935
	    break;
936
	}
937
	else
938
	{
939
	 /*
940
	  * Get unquoted anchor...
941
	  */
942

943
          anchor = ptr + 1;
944

945
	  for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
946

947
	  if (*ptr != '>')
948
	    *ptr++ = '\0';
949
	  else
950
	    break;
951
	}
952

953
       /*
954
        * Got the anchor, now lets find the end...
955
	*/
956

957
        while (*ptr && *ptr != '>')
958
	  ptr ++;
959

960
        if (*ptr != '>')
961
	  break;
962

963
        *ptr++ = '\0';
964
      }
965

966
     /*
967
      * Now collect text for the link...
968
      */
969

970
      text = ptr;
971
      while ((ptr = strchr(text, '<')) == NULL)
972
      {
973
	ptr = text + strlen(text);
974
	if (ptr >= (line + sizeof(line) - 2))
975
	  break;
976

977
        *ptr++ = ' ';
978

979
        if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
980
	  break;
981
      }
982

983
      *ptr = '\0';
984

985
      if (node)
986
	node->length = (size_t)(offset - node->offset);
987

988
      if (!*text)
989
      {
990
        node = NULL;
991
        break;
992
      }
993

994
      if ((node = helpFindNode(hi, relative, anchor)) != NULL)
995
      {
996
       /*
997
	* Node already in the index, so replace the text and other
998
	* data...
999
	*/
1000

1001
        cupsArrayRemove(hi->nodes, node);
1002

1003
        if (node->section)
1004
	  free(node->section);
1005

1006
	if (node->text)
1007
	  free(node->text);
1008

1009
        if (node->words)
1010
	{
1011
	  for (word = (help_word_t *)cupsArrayFirst(node->words);
1012
	       word;
1013
	       word = (help_word_t *)cupsArrayNext(node->words))
1014
	    help_delete_word(word);
1015

1016
	  cupsArrayDelete(node->words);
1017
	  node->words = NULL;
1018
	}
1019

1020
	node->section = section[0] ? strdup(section) : NULL;
1021
	node->text    = strdup(text);
1022
	node->mtime   = mtime;
1023
	node->offset  = offset;
1024
	node->score   = 0;
1025
      }
1026
      else
1027
      {
1028
       /*
1029
	* New node...
1030
	*/
1031

1032
        node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1033
      }
1034

1035
     /*
1036
      * Go through the text value and replace tabs and newlines with
1037
      * whitespace and eliminate extra whitespace...
1038
      */
1039

1040
      for (ptr = node->text, text = node->text; *ptr;)
1041
	if (isspace(*ptr & 255))
1042
	{
1043
	  while (isspace(*ptr & 255))
1044
	    ptr ++;
1045

1046
	  *text++ = ' ';
1047
        }
1048
	else if (text != ptr)
1049
	  *text++ = *ptr++;
1050
	else
1051
	{
1052
	  text ++;
1053
	  ptr ++;
1054
	}
1055

1056
      *text = '\0';
1057

1058
     /*
1059
      * (Re)add the node to the array...
1060
      */
1061

1062
      cupsArrayAdd(hi->nodes, node);
1063

1064
      if (!anchor)
1065
        node = NULL;
1066
      break;
1067
    }
1068

1069
    if (node)
1070
    {
1071
     /*
1072
      * Scan this line for words...
1073
      */
1074

1075
      for (ptr = line; *ptr; ptr ++)
1076
      {
1077
       /*
1078
	* Skip HTML stuff...
1079
	*/
1080

1081
	if (*ptr == '<')
1082
	{
1083
          if (!strncmp(ptr, "<!--", 4))
1084
	  {
1085
	   /*
1086
	    * Skip HTML comment...
1087
	    */
1088

1089
            if ((text = strstr(ptr + 4, "-->")) == NULL)
1090
	      ptr += strlen(ptr) - 1;
1091
	    else
1092
	      ptr = text + 2;
1093
	  }
1094
	  else
1095
	  {
1096
	   /*
1097
            * Skip HTML element...
1098
	    */
1099

1100
            for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1101
	    {
1102
	      if (*ptr == '\"' || *ptr == '\'')
1103
	      {
1104
		for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1105

1106
		if (!*ptr)
1107
		  ptr --;
1108
	      }
1109
	    }
1110

1111
	    if (!*ptr)
1112
	      ptr --;
1113
          }
1114

1115
          continue;
1116
	}
1117
	else if (*ptr == '&')
1118
	{
1119
	 /*
1120
	  * Skip HTML entity...
1121
	  */
1122

1123
	  for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1124

1125
	  if (!*ptr)
1126
	    ptr --;
1127

1128
	  continue;
1129
	}
1130
	else if (!isalnum(*ptr & 255))
1131
          continue;
1132

1133
       /*
1134
	* Found the start of a word, search until we find the end...
1135
	*/
1136

1137
	for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1138

1139
	wordlen = (int)(ptr - text);
1140

1141
        memcpy(temp, text, (size_t)wordlen);
1142
	temp[wordlen] = '\0';
1143

1144
        ptr --;
1145

1146
	if (wordlen > 1 && !bsearch(temp, help_common_words,
1147
	                            (sizeof(help_common_words) /
1148
				     sizeof(help_common_words[0])),
1149
				    sizeof(help_common_words[0]),
1150
				    (int (*)(const void *, const void *))
1151
				        _cups_strcasecmp))
1152
          help_add_word(node, temp);
1153
      }
1154
    }
1155

1156
   /*
1157
    * Get the offset of the next line...
1158
    */
1159

1160
    offset = cupsFileTell(fp);
1161
  }
1162

1163
  cupsFileClose(fp);
1164

1165
  if (node)
1166
    node->length = (size_t)(offset - node->offset);
1167

1168
  return (0);
1169
}
1170

1171

1172
/*
1173
 * 'help_new_node()' - Create a new node and add it to an index.
1174
 */
1175

1176
static help_node_t *			/* O - Node pointer or NULL on error */
1177
help_new_node(const char   *filename,	/* I - Filename */
1178
              const char   *anchor,	/* I - Anchor */
1179
	      const char   *section,	/* I - Section */
1180
	      const char   *text,	/* I - Text */
1181
	      time_t       mtime,	/* I - Modification time */
1182
              off_t        offset,	/* I - Offset in file */
1183
	      size_t       length)	/* I - Length in bytes */
1184
{
1185
  help_node_t	*n;			/* Node */
1186

1187

1188
  n = (help_node_t *)calloc(1, sizeof(help_node_t));
1189
  if (!n)
1190
    return (NULL);
1191

1192
  n->filename = strdup(filename);
1193
  n->anchor   = anchor ? strdup(anchor) : NULL;
1194
  n->section  = (section && *section) ? strdup(section) : NULL;
1195
  n->text     = strdup(text);
1196
  n->mtime    = mtime;
1197
  n->offset   = offset;
1198
  n->length   = length;
1199

1200
  return (n);
1201
}
1202

1203

1204
/*
1205
 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1206
 */
1207

1208
static int				/* O - Difference */
1209
help_sort_by_name(help_node_t *n1,	/* I - First node */
1210
                  help_node_t *n2)	/* I - Second node */
1211
{
1212
  int		diff;			/* Difference */
1213

1214

1215
  if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1216
    return (diff);
1217

1218
  if (!n1->anchor && !n2->anchor)
1219
    return (0);
1220
  else if (!n1->anchor)
1221
    return (-1);
1222
  else if (!n2->anchor)
1223
    return (1);
1224
  else
1225
    return (strcmp(n1->anchor, n2->anchor));
1226
}
1227

1228

1229
/*
1230
 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1231
 */
1232

1233
static int				/* O - Difference */
1234
help_sort_by_score(help_node_t *n1,	/* I - First node */
1235
                   help_node_t *n2)	/* I - Second node */
1236
{
1237
  int		diff;			/* Difference */
1238

1239

1240
  if (n1->score != n2->score)
1241
    return (n2->score - n1->score);
1242

1243
  if (n1->section && !n2->section)
1244
    return (1);
1245
  else if (!n1->section && n2->section)
1246
    return (-1);
1247
  else if (n1->section && n2->section &&
1248
           (diff = strcmp(n1->section, n2->section)) != 0)
1249
    return (diff);
1250

1251
  return (_cups_strcasecmp(n1->text, n2->text));
1252
}
1253

1254

1255
/*
1256
 * 'help_sort_words()' - Sort words alphabetically.
1257
 */
1258

1259
static int				/* O - Difference */
1260
help_sort_words(help_word_t *w1,	/* I - Second word */
1261
                help_word_t *w2)	/* I - Second word */
1262
{
1263
  return (_cups_strcasecmp(w1->text, w2->text));
1264
}
1265

1266
Product

Resources

Company