--- src/xercesc/validators/DTD/DTDScanner.cpp +++ src/xercesc/validators/DTD/DTDScanner.cpp @@ -27,7 +27,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -39,7 +41,6 @@ #include #include #include -#include XERCES_CPP_NAMESPACE_BEGIN @@ -1041,338 +1042,354 @@ // Check for a PE ref here, but don't require spaces checkForPERef(false, true); - // We have to check entity nesting here - XMLSize_t curReader; - + ValueStackOf* arrNestedDecl=NULL; // // We know that the caller just saw an opening parenthesis, so we need - // to parse until we hit the end of it, recursing for other nested - // parentheses we see. + // to parse until we hit the end of it; if we find several parenthesis, + // store them in an array to be processed later. // // We have to check for one up front, since it could be something like // (((a)*)) etc... // ContentSpecNode* curNode = 0; - if (fReaderMgr->skippedChar(chOpenParen)) + while(fReaderMgr->skippedChar(chOpenParen)) { - curReader = fReaderMgr->getCurrentReaderNum(); + // to check entity nesting + const XMLSize_t curReader = fReaderMgr->getCurrentReaderNum(); + if(arrNestedDecl==NULL) + arrNestedDecl=new (fMemoryManager) ValueStackOf(5, fMemoryManager); + arrNestedDecl->push(curReader); - // Lets call ourself and get back the resulting node - curNode = scanChildren(elemDecl, bufToUse); + // Check for a PE ref here, but don't require spaces + checkForPERef(false, true); + } - // If that failed, no need to go further, return failure - if (!curNode) - return 0; + // We must find a leaf node here, either standalone or nested in the parenthesis + if (!fReaderMgr->getName(bufToUse)) + { + fScanner->emitError(XMLErrs::ExpectedElementName); + return 0; + } + + // + // Create a leaf node for it. If we can find the element id for + // this element, then use it. Else, we have to fault in an element + // decl, marked as created because of being in a content model. + // + XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); + if (!decl) + { + decl = new (fGrammarPoolMemoryManager) DTDElementDecl + ( + bufToUse.getRawBuffer() + , fEmptyNamespaceId + , DTDElementDecl::Any + , fGrammarPoolMemoryManager + ); + decl->setCreateReason(XMLElementDecl::InContentModel); + decl->setExternalElemDeclaration(isReadingExternalEntity()); + fDTDGrammar->putElemDecl(decl); + } + curNode = new (fGrammarPoolMemoryManager) ContentSpecNode + ( + decl->getElementName() + , fGrammarPoolMemoryManager + ); + + // Check for a PE ref here, but don't require spaces + const bool gotSpaces = checkForPERef(false, true); - if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getValidationScheme() == XMLScanner::Val_Always) - fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); + // Check for a repetition character after the leaf + XMLCh repCh = fReaderMgr->peekNextChar(); + ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager); + if (tmpNode != curNode) + { + if (gotSpaces) + { + if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace)) + { + delete tmpNode; + } + fScanner->emitError(XMLErrs::UnexpectedWhitespace); + } + fReaderMgr->getNextChar(); + curNode = tmpNode; } - else + + while(arrNestedDecl==NULL || !arrNestedDecl->empty()) { - // Not a nested paren, so it must be a leaf node - if (!fReaderMgr->getName(bufToUse)) + // Check for a PE ref here, but don't require spaces + checkForPERef(false, true); + + // + // Ok, the next character tells us what kind of content this particular + // model this particular parentesized section is. Its either a choice if + // we see ',', a sequence if we see '|', or a single leaf node if we see + // a closing paren. + // + const XMLCh opCh = fReaderMgr->peekNextChar(); + + if ((opCh != chComma) + && (opCh != chPipe) + && (opCh != chCloseParen)) { - fScanner->emitError(XMLErrs::ExpectedElementName); + // Not a legal char, so delete our node and return failure + delete curNode; + fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf); return 0; } // - // Create a leaf node for it. If we can find the element id for - // this element, then use it. Else, we have to fault in an element - // decl, marked as created because of being in a content model. + // Create the head node of the correct type. We need this to remember + // the top of the local tree. If it was a single subexpr, then just + // set the head node to the current node. For the others, we'll build + // the tree off the second child as we move across. // - XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); - if (!decl) + ContentSpecNode* headNode = 0; + ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType; + if (opCh == chComma) { - decl = new (fGrammarPoolMemoryManager) DTDElementDecl + curType = ContentSpecNode::Sequence; + headNode = new (fGrammarPoolMemoryManager) ContentSpecNode ( - bufToUse.getRawBuffer() - , fEmptyNamespaceId - , DTDElementDecl::Any + curType + , curNode + , 0 + , true + , true , fGrammarPoolMemoryManager ); - decl->setCreateReason(XMLElementDecl::InContentModel); - decl->setExternalElemDeclaration(isReadingExternalEntity()); - fDTDGrammar->putElemDecl(decl); + curNode = headNode; } - curNode = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - decl->getElementName() - , fGrammarPoolMemoryManager - ); - - // Check for a PE ref here, but don't require spaces - const bool gotSpaces = checkForPERef(false, true); - - // Check for a repetition character after the leaf - const XMLCh repCh = fReaderMgr->peekNextChar(); - ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager); - if (tmpNode != curNode) + else if (opCh == chPipe) { - if (gotSpaces) - { - if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace)) - { - delete tmpNode; - } - fScanner->emitError(XMLErrs::UnexpectedWhitespace); - } + curType = ContentSpecNode::Choice; + headNode = new (fGrammarPoolMemoryManager) ContentSpecNode + ( + curType + , curNode + , 0 + , true + , true + , fGrammarPoolMemoryManager + ); + curNode = headNode; + } + else + { + headNode = curNode; fReaderMgr->getNextChar(); - curNode = tmpNode; } - } - - // Check for a PE ref here, but don't require spaces - checkForPERef(false, true); - - // - // Ok, the next character tells us what kind of content this particular - // model this particular parentesized section is. Its either a choice if - // we see ',', a sequence if we see '|', or a single leaf node if we see - // a closing paren. - // - const XMLCh opCh = fReaderMgr->peekNextChar(); - - if ((opCh != chComma) - && (opCh != chPipe) - && (opCh != chCloseParen)) - { - // Not a legal char, so delete our node and return failure - delete curNode; - fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf); - return 0; - } - - // - // Create the head node of the correct type. We need this to remember - // the top of the local tree. If it was a single subexpr, then just - // set the head node to the current node. For the others, we'll build - // the tree off the second child as we move across. - // - ContentSpecNode* headNode = 0; - ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType; - if (opCh == chComma) - { - curType = ContentSpecNode::Sequence; - headNode = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - curType - , curNode - , 0 - , true - , true - , fGrammarPoolMemoryManager - ); - curNode = headNode; - } - else if (opCh == chPipe) - { - curType = ContentSpecNode::Choice; - headNode = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - curType - , curNode - , 0 - , true - , true - , fGrammarPoolMemoryManager - ); - curNode = headNode; - } - else - { - headNode = curNode; - fReaderMgr->getNextChar(); - } - // - // If it was a sequence or choice, we just loop until we get to the - // end of our section, adding each new leaf or sub expression to the - // right child of the current node, and making that new node the current - // node. - // - if ((opCh == chComma) || (opCh == chPipe)) - { - ContentSpecNode* lastNode = 0; - while (true) + // + // If it was a sequence or choice, we just loop until we get to the + // end of our section, adding each new leaf or sub expression to the + // right child of the current node, and making that new node the current + // node. + // + if ((opCh == chComma) || (opCh == chPipe)) { - // - // The next thing must either be another | or , character followed - // by another leaf or subexpression, or a closing parenthesis, or a - // PE ref. - // - if (fReaderMgr->lookingAtChar(chPercent)) - { - checkForPERef(false, true); - } - else if (fReaderMgr->skippedSpace()) - { - // Just skip whitespace - fReaderMgr->skipPastSpaces(); - } - else if (fReaderMgr->skippedChar(chCloseParen)) + ContentSpecNode* lastNode = 0; + while (true) { // - // We've hit the end of this section, so break out. But, we - // need to see if we left a partial sequence of choice node - // without a second node. If so, we have to undo that and - // put its left child into the right node of the previous - // node. + // The next thing must either be another | or , character followed + // by another leaf or subexpression, or a closing parenthesis, or a + // PE ref. // - if ((curNode->getType() == ContentSpecNode::Choice) - || (curNode->getType() == ContentSpecNode::Sequence)) + if (fReaderMgr->lookingAtChar(chPercent)) + { + checkForPERef(false, true); + } + else if (fReaderMgr->skippedSpace()) + { + // Just skip whitespace + fReaderMgr->skipPastSpaces(); + } + else if (fReaderMgr->skippedChar(chCloseParen)) { - if (!curNode->getSecond()) + // + // We've hit the end of this section, so break out. But, we + // need to see if we left a partial sequence of choice node + // without a second node. If so, we have to undo that and + // put its left child into the right node of the previous + // node. + // + if ((curNode->getType() == ContentSpecNode::Choice) + || (curNode->getType() == ContentSpecNode::Sequence)) { - ContentSpecNode* saveFirst = curNode->orphanFirst(); - lastNode->setSecond(saveFirst); - curNode = lastNode; + if (!curNode->getSecond()) + { + ContentSpecNode* saveFirst = curNode->orphanFirst(); + lastNode->setSecond(saveFirst); + curNode = lastNode; + } } + break; } - break; - } - else if (fReaderMgr->skippedChar(opCh)) - { - // Check for a PE ref here, but don't require spaces - checkForPERef(false, true); - - if (fReaderMgr->skippedChar(chOpenParen)) + else if (fReaderMgr->skippedChar(opCh)) { - curReader = fReaderMgr->getCurrentReaderNum(); + // Check for a PE ref here, but don't require spaces + checkForPERef(false, true); - // Recurse to handle this new guy - ContentSpecNode* subNode; - try { - subNode = scanChildren(elemDecl, bufToUse); - } - catch (const XMLErrs::Codes) + if (fReaderMgr->skippedChar(chOpenParen)) { - delete headNode; - throw; - } + const XMLSize_t curReader = fReaderMgr->getCurrentReaderNum(); - // If it failed, we are done, clean up here and return failure - if (!subNode) - { - delete headNode; - return 0; + // Recurse to handle this new guy + ContentSpecNode* subNode; + try { + subNode = scanChildren(elemDecl, bufToUse); + } + catch (const XMLErrs::Codes) + { + delete headNode; + throw; + } + + // If it failed, we are done, clean up here and return failure + if (!subNode) + { + delete headNode; + return 0; + } + + if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getValidationScheme() == XMLScanner::Val_Always) + fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); + + // Else patch it in and make it the new current + ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode + ( + curType + , subNode + , 0 + , true + , true + , fGrammarPoolMemoryManager + ); + curNode->setSecond(newCur); + lastNode = curNode; + curNode = newCur; } + else + { + // + // Got to be a leaf node, so get a name. If we cannot get + // one, then clean up and get outa here. + // + if (!fReaderMgr->getName(bufToUse)) + { + delete headNode; + fScanner->emitError(XMLErrs::ExpectedElementName); + return 0; + } + + // + // Create a leaf node for it. If we can find the element + // id for this element, then use it. Else, we have to + // fault in an element decl, marked as created because + // of being in a content model. + // + XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); + if (!decl) + { + decl = new (fGrammarPoolMemoryManager) DTDElementDecl + ( + bufToUse.getRawBuffer() + , fEmptyNamespaceId + , DTDElementDecl::Any + , fGrammarPoolMemoryManager + ); + decl->setCreateReason(XMLElementDecl::InContentModel); + decl->setExternalElemDeclaration(isReadingExternalEntity()); + fDTDGrammar->putElemDecl(decl); + } - if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getValidationScheme() == XMLScanner::Val_Always) - fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); + ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode + ( + decl->getElementName() + , fGrammarPoolMemoryManager + ); - // Else patch it in and make it the new current - ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - curType - , subNode - , 0 - , true - , true - , fGrammarPoolMemoryManager - ); - curNode->setSecond(newCur); - lastNode = curNode; - curNode = newCur; + // Check for a repetition character after the leaf + const XMLCh repCh = fReaderMgr->peekNextChar(); + ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager); + if (tmpLeaf != tmpLeaf2) + fReaderMgr->getNextChar(); + + // + // Create a new sequence or choice node, with the leaf + // (or rep surrounding it) we just got as its first node. + // Make the new node the second node of the current node, + // and then make it the current node. + // + ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode + ( + curType + , tmpLeaf2 + , 0 + , true + , true + , fGrammarPoolMemoryManager + ); + curNode->setSecond(newCur); + lastNode = curNode; + curNode = newCur; + } } else { - // - // Got to be a leaf node, so get a name. If we cannot get - // one, then clean up and get outa here. - // - if (!fReaderMgr->getName(bufToUse)) + // Cannot be valid + delete headNode; // emitError may do a throw so need to clean-up first + if (opCh == chComma) { - delete headNode; - fScanner->emitError(XMLErrs::ExpectedElementName); - return 0; + fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen); } - - // - // Create a leaf node for it. If we can find the element - // id for this element, then use it. Else, we have to - // fault in an element decl, marked as created because - // of being in a content model. - // - XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); - if (!decl) + else { - decl = new (fGrammarPoolMemoryManager) DTDElementDecl + fScanner->emitError ( - bufToUse.getRawBuffer() - , fEmptyNamespaceId - , DTDElementDecl::Any - , fGrammarPoolMemoryManager + XMLErrs::ExpectedSeqOrCloseParen + , elemDecl.getFullName() ); - decl->setCreateReason(XMLElementDecl::InContentModel); - decl->setExternalElemDeclaration(isReadingExternalEntity()); - fDTDGrammar->putElemDecl(decl); - } + } + return 0; + } + } + } - ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - decl->getElementName() - , fGrammarPoolMemoryManager - ); + // + // We saw the terminating parenthesis so lets check for any repetition + // character, and create a node for that, making the head node the child + // of it. + // + const XMLCh repCh = fReaderMgr->peekNextChar(); + curNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager); + if (curNode != headNode) + fReaderMgr->getNextChar(); + + // prepare for recursion + if(arrNestedDecl==NULL) + break; + else + { + // If that failed, no need to go further, return failure + if (!curNode) + return 0; - // Check for a repetition character after the leaf - const XMLCh repCh = fReaderMgr->peekNextChar(); - ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager); - if (tmpLeaf != tmpLeaf2) - fReaderMgr->getNextChar(); + const XMLSize_t curReader = arrNestedDecl->pop(); + if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getValidationScheme() == XMLScanner::Val_Always) + fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); - // - // Create a new sequence or choice node, with the leaf - // (or rep surrounding it) we just got as its first node. - // Make the new node the second node of the current node, - // and then make it the current node. - // - ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode - ( - curType - , tmpLeaf2 - , 0 - , true - , true - , fGrammarPoolMemoryManager - ); - curNode->setSecond(newCur); - lastNode = curNode; - curNode = newCur; - } - } - else + if(arrNestedDecl->empty()) { - // Cannot be valid - delete headNode; // emitError may do a throw so need to clean-up first - if (opCh == chComma) - { - fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen); - } - else - { - fScanner->emitError - ( - XMLErrs::ExpectedSeqOrCloseParen - , elemDecl.getFullName() - ); - } - return 0; + delete arrNestedDecl; + arrNestedDecl=NULL; } } } - // - // We saw the terminating parenthesis so lets check for any repetition - // character, and create a node for that, making the head node the child - // of it. - // - XMLCh repCh = fReaderMgr->peekNextChar(); - ContentSpecNode* retNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager); - if (retNode != headNode) - fReaderMgr->getNextChar(); - - return retNode; + return curNode; }