parser.h File Reference

Header for XML-like parser. More...

#include <stdio.h>
#include <errno.h>
#include <pcre.h>
#include "template.h"

Include dependency graph for parser.h:

Include dependency graph

This graph shows which files directly or indirectly include this file:

Included by dependency graph

Go to the source code of this file.

Enumerations

enum  eparse {
  PARSE_BEGIN = 0, PARSE_END = 1, PARSE_CLASS = 1, PARSE_PAIRS = 2,
  PARSE_CONTENT = 3, PARSE_ARGS = 4, PARSE_VECTOR = 3, PARSE_ARG = 2,
  PARSE_ELEMENT = 1, PARSE_OPT = PCRE_CASELESS | PCRE_DOTALL, PARSE_MATCH = 0, PARSE_ERR = EINVAL
}

Functions

int parse (char *)
 Parse source as a collection of specific demi-XML legmata, ferreting out the adherent contents.
void parsetag (char *)
 Parses lectate and funnels adherent content to an appropriate translator.
char * parsepair (char *)

Variables

template_t te
 template node enabling parser's callback


Detailed Description

Header for XML-like parser.

Definition in file parser.h.


Enumeration Type Documentation

enum eparse
 

PCRE constants

Enumeration values:
PARSE_BEGIN  match begin
PARSE_END  match end
PARSE_CLASS  class subexpression
PARSE_PAIRS  pair subexpression
PARSE_CONTENT  content subexpressions
PARSE_ARGS  total subexpressions including whole
PARSE_VECTOR  PCRE vector
PARSE_ARG  argument subexpression PARSE_PAIRS
PARSE_ELEMENT  element subexpression PARSE_CLASS
PARSE_OPT  default PCRE options
PARSE_MATCH  PCRE match
PARSE_ERR  default errno

Definition at line 14 of file parser.h.

00014             {
00015   PARSE_BEGIN = 0,              
00016   PARSE_END = 1,                
00017   PARSE_CLASS = 1,              
00018   PARSE_PAIRS = 2,              
00019   PARSE_CONTENT = 3,            
00020   PARSE_ARGS = 4,               
00021   PARSE_VECTOR = 3,             
00022   PARSE_ARG = 2,                
00023   PARSE_ELEMENT = 1,            
00024   PARSE_OPT = PCRE_CASELESS | PCRE_DOTALL, 
00025   PARSE_MATCH = 0,              
00026   PARSE_ERR = EINVAL            
00027 };


Function Documentation

int parse char *  pcin  ) 
 

Parse source as a collection of specific demi-XML legmata, ferreting out the adherent contents.

Parameters:
pcin partand [sic]
Returns:
EXIT_SUCCESS, EXIT_FAILURE.

Definition at line 38 of file parser.c.

References ALLOC, EVAL, template_t::i, lex(), NONE, PARSE_ELEMENT, PARSE_ERR, parsetag(), template_t::pten, STRCAT, te, and templatefree().

Referenced by main().

00039 {
00040   int i, iret = EXIT_SUCCESS;
00041   const char *pcmaskbase = "<[[:space:]]*?(%s)+?(.*?)>(.*?)</\\%d>";
00042   const char *pcdiv = "|";
00043   const char *pcerr = "parser.c parse()";
00044   char NONE(*pcmaskelement), NONE(*pcmask);
00045 
00046   /* Initialize template list. */
00047   EVAL(template(&te) != EXIT_SUCCESS, PARSE_ERR);
00048 
00049   /* Goal: element mask of the form `math|chem|music|...'. */
00050   for (i = 0; i < te.i; i++) {
00051     STRCAT(pcmaskelement, (te.pten + i)->pcclass);
00052     STRCAT(pcmaskelement, pcdiv);
00053   }
00054 
00055   /* Elide the terminal divider. */
00056   *(pcmaskelement + strlen(pcmaskelement) - strlen(pcdiv)) = '\0';
00057 
00058   /* Slight superfluity when %d recombined as integer; and on single-character classes. */
00059   ALLOC(pcmask, strlen(pcmaskelement) + strlen(pcmaskbase));
00060   sprintf(pcmask, pcmaskbase, pcmaskelement, PARSE_ELEMENT);
00061 
00062   /* Perform tokenization with callback.
00063      Important implication: class name associated with content is always the first pair! */
00064   EVAL(lex(pcin, pcmask, parsetag) != EXIT_SUCCESS, EINVAL);
00065 
00066  cleanup:
00067   templatefree(&te);
00068   free(pcmaskelement);
00069   free(pcmask);
00070   return iret;
00071 }

Here is the call graph for this function:

char* parsepair char *   ) 
 

void parsetag char *  pc  ) 
 

Parses lectate and funnels adherent content to an appropriate translator.

Parameters:
pc lectand

Definition at line 75 of file parser.c.

References addpair(), EVAL, template_t::i, NONE, PAIRNEW, PARSE_ARG, PARSE_ARGS, PARSE_BEGIN, PARSE_CLASS, PARSE_CONTENT, PARSE_END, PARSE_MATCH, PARSE_OPT, PARSE_PAIRS, template_t::pten, STRSUB, te, and translate().

Referenced by parse().

00076 {
00077   int i, ire, aisub[PARSE_ARGS * PARSE_VECTOR], iret; /* iret: macro placeholder */
00078   pcre *pre;
00079   const char *pcre;
00080   const char *pcmask = "^<[[:space:]]*([^[:space:]]+)(.*?)>(.*)<";
00081   const char *pcerr = "parser.c parsetag()";
00082   char NONE(*pcclass), NONE(*pcpairs), NONE(*pccontent);
00083   pair_t pa;
00084 
00085   EVAL((pre = pcre_compile(pcmask, PARSE_OPT, &pcre, &ire, NULL)) == NULL, EINVAL);
00086   EVAL(pcre_exec(pre, NULL, pc, strlen(pc), 0, 0, aisub, 12) < PARSE_MATCH, EINVAL);
00087   STRSUB(pcclass, pc + aisub[PARSE_CLASS * PARSE_ARG + PARSE_BEGIN], aisub[PARSE_CLASS * PARSE_ARG + PARSE_END] - aisub[PARSE_CLASS * PARSE_ARG + PARSE_BEGIN]);
00088   STRSUB(pcpairs, pc + aisub[PARSE_PAIRS * PARSE_ARG + PARSE_BEGIN], aisub[PARSE_PAIRS * PARSE_ARG + PARSE_END] - aisub[PARSE_PAIRS * PARSE_ARG + PARSE_BEGIN]);
00089   STRSUB(pccontent, pc + aisub[PARSE_CONTENT * PARSE_ARG + PARSE_BEGIN], aisub[PARSE_CONTENT * PARSE_ARG + PARSE_END] - aisub[PARSE_CONTENT * PARSE_ARG + PARSE_BEGIN]);
00090   
00091   /* Shine constructor. */
00092   PAIRNEW(pa);
00093   /* Construct pair devised of class and content. */
00094   addpair(pcclass, pccontent, &pa);
00095   
00096   for (i = 0; i < te.i; i++) {
00097     if (strcmp(pcclass, (te.pten + i)->pcclass) == 0) {
00098       translate((te.pten + i)->pcdigest, &pa);
00099     }
00100   }
00101 
00102  cleanup:
00103   free(pcclass);
00104   free(pcpairs);
00105   free(pccontent);
00106 }

Here is the call graph for this function:


Variable Documentation

template_t te
 

template node enabling parser's callback

Definition at line 12 of file parser.h.

Referenced by parse(), and parsetag().


Generated on Tue Dec 7 06:38:25 2004 for CSCI101:ProjectLatex by  doxygen 1.3.9.1