XMLParserCore.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2005 Palmsource, Inc.
00003  * 
00004  * This software is licensed as described in the file LICENSE, which
00005  * you should have received as part of this distribution. The terms
00006  * are also available at http://www.openbinder.org/license.html.
00007  * 
00008  * This software consists of voluntary contributions made by many
00009  * individuals. For the exact contribution history, see the revision
00010  * history and logs, available at http://www.openbinder.org
00011  */
00012 
00013 #ifndef _XMLParserCore_P_H
00014 #define _XMLParserCore_P_H
00015 
00016 
00017 #include <xml/Parser.h>
00018 
00019 #if _SUPPORTS_NAMESPACE
00020 namespace palmos {
00021 namespace xml { 
00022 #endif
00023 
00024 /*
00025 **
00026 ** This class is the core class for two different flavors of parsers.
00027 ** This core class parses data that is fed to it via ProcessInputBuffer.
00028 ** It forms the base class for parsers that either act as a data recipient
00029 ** or use a data source.
00030 */
00031 class XMLParserCore
00032 {
00033 public:
00034     XMLParserCore(BXMLParseContext * context, bool dtdOnly, uint32_t flags);
00035     ~XMLParserCore(void);
00036 
00037     status_t ProcessBegin(void);
00038     status_t ProcessInputBuffer(size_t dataSize, const uint8_t * dataBuffer, bool& errorExit);
00039     status_t ProcessEnd(bool isComplete);
00040 
00041 private:
00042     // =====================================================================
00043     typedef enum
00044     {
00045         PARSER_PE_DECL,
00046         PARSER_GE_DECL
00047     }decl_type;
00048 
00049     // =====================================================================
00050     typedef enum
00051     {
00052         PARSER_IN_UNKNOWN,
00053         PARSER_IN_UNKNOWN_MARKUP,                   // <        encountered
00054         PARSER_IN_UNKNOWN_MARKUP_GT_E,              // <!       encountered
00055         PARSER_IN_PROCESSING_INSTRUCTION_TARGET,    // <?       encountered
00056         PARSER_IN_PROCESSING_INSTRUCTION,           // <?...S   encountered
00057         PARSER_IN_ELEMENT_START_NAME,               // <...     encountered
00058         PARSER_IN_ELEMENT_START_TAG,
00059         PARSER_IN_ELEMENT_END_TAG,
00060         PARSER_IN_CDATA,
00061         PARSER_IN_COMMENT,
00062         PARSER_IN_DOCTYPE,
00063         PARSER_IN_ELEMENT_DECL,
00064         PARSER_IN_ATTLIST_DECL,
00065         PARSER_IN_ENTITY_DECL,
00066         PARSER_IN_NOTATION_DECL,
00067         PARSER_IN_GE_REF,
00068         PARSER_IN_PE_REF
00069     }parser_state;
00070 
00071 
00072     // =====================================================================
00073     typedef enum
00074     {
00075         PARSER_NORMAL,
00076         PARSER_NEARING_END_1
00077     }forward_looking_state;
00078 
00079 
00080     // =====================================================================
00081     typedef enum
00082     {
00083         PARSER_SUB_IN_UNKNOWN,
00084         PARSER_SUB_IN_WHITESPACE,
00085         PARSER_SUB_IN_NAME,
00086         PARSER_SUB_READY_FOR_VALUE,
00087         PARSER_SUB_IN_VALUE,
00088         PARSER_SUB_IN_ELEMENT,
00089         PARSER_SUB_IN_READY_FOR_WS_1,
00090         PARSER_SUB_IN_WHITESPACE_1,
00091         PARSER_SUB_IN_WHITESPACE_2,
00092         PARSER_SUB_IN_WHITESPACE_3,
00093         PARSER_SUB_IN_WHITESPACE_4,
00094         PARSER_SUB_IN_WHITESPACE_5,
00095         PARSER_SUB_IN_WHITESPACE_7,
00096         PARSER_SUB_IN_WHITESPACE_8,
00097         PARSER_SUB_IN_WHITESPACE_9,
00098         PARSER_SUB_IN_WHITESPACE_10,
00099         PARSER_SUB_IN_DOCTYPE_NAME,
00100         PARSER_SUB_IN_EXTERNAL_ID,
00101         PARSER_SUB_IN_INTERNAL_PARSED_VALUE,
00102         PARSER_SUB_IN_READIING_NDATA,
00103         PARSER_SUB_IN_READING_PUBLIC_ID,
00104         PARSER_SUB_IN_READING_SYSTEM_ID,
00105         PARSER_SUB_IN_SPEC,
00106         PARSER_SUB_IN_READING_NOTATION,
00107         PARSER_SUB_IN_ENTITY_REF
00108     }parser_sub_state;
00109 
00110 
00111     // Private methods.
00112     void initializeState();
00113     void finalizeState();
00114 
00115     status_t handle_attribute_decl(SString & element, SString & data);
00116     status_t handle_entity_decl(bool parameter, SString & name, SString & value, uint32_t flags, bool doctypeBeginOnly);
00117     status_t handle_element_start(SString & name, SValue & attributes, uint32_t flags);
00118     status_t expand_char_ref(const SString & entity, SString & entityVal);
00119     status_t    expand_char_refs(SString & str);
00120     status_t expand_entities(SString & str, char delimiter);
00121 
00122 
00123 
00124     // Members
00125 protected:
00126     BXMLParseContext * m_context;
00127 private:
00128     bool m_dtdOnly;
00129     uint32_t m_flags;
00130     // Characters remaining from the last iteration
00131     uint8_t * m_remainingChars;
00132     int32_t m_remainingCharsSize;
00133     
00134     uint8_t * m_parseText;
00135     int32_t m_parseTextLength;
00136     
00137     int32_t m_characterSize;
00138     
00139     parser_state            m_state;
00140     forward_looking_state   m_upcomming;
00141     parser_sub_state        m_subState;
00142     
00143     // The current token, until it has been completed, and we're ready to
00144     // move on to the next one.
00145     SString     m_currentToken;
00146     
00147     // Current Name meaning element, target, notation, or any of those other thigns
00148     SString     m_currentName;
00149     SString     m_savedName;
00150     SString     m_currentSubName;
00151     SString     m_entityValue;
00152     uint8_t     m_delimiter;
00153     
00154     // Mapping of name/values.  Use for attributes, everything.
00155     SValue      m_stringMap;
00156 
00157     uint8_t     * m_longStringData ;
00158     uint8_t     m_carryoverLongData[4];
00159     uint8_t     m_someChars[3];
00160     
00161     bool        m_inDTD;
00162     decl_type   m_declType;
00163     
00164     SString     m_emptyString;
00165 
00166 
00167 };
00168 
00169 #if _SUPPORTS_NAMESPACE
00170 }; // namespace xml
00171 }; // namespace palmos
00172 #endif
00173 
00174 #endif // #define _XMLParserCore_P_H