Skip to content
Snippets Groups Projects
Commit b67a0990 authored by Odilitime's avatar Odilitime
Browse files

single tag fixes and implementation into tree, style clean up

parent de6beed5
No related branches found
No related tags found
No related merge requests found
......@@ -36,11 +36,11 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
std::vector<unsigned int> starts;
unsigned int cursor;
int state = 0;
int prependWhiteSpace=false;
int prependWhiteSpace = false;
for (cursor = 0; cursor < html.length(); cursor++) { // TODO handle trying to look ahead past string
if (state == 0) { // Outside tag
if (html[cursor] == ' ' || html[cursor] == '\t' || html[cursor] == '\r' || html[cursor] == '\n') {
prependWhiteSpace=true;
prependWhiteSpace = true;
continue;
}
else if (html[cursor] == '<') {
......@@ -48,15 +48,16 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
state = 4;
}
else if (html[cursor + 1] == '/') {
// start closing tag
if (currentNode && currentNode->parent) {
currentNode = currentNode->parent;
} else {
std::cout << "HTMLParser::Parse - currentNode/parent is null - close tag" << std::endl;
}
state = 1; // ignore closing tags
//starts.push_back(cursor);
}
// don't need to ignore these tags
/*
// these don't have closing tags
else if (
(html[cursor + 1] == 'h' && html[cursor + 2] == 'r') ||
(html[cursor + 1] == 'b' && html[cursor + 2] == 'r') ||
......@@ -66,9 +67,36 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
(html[cursor + 1] == 'm' && html[cursor + 2] == 'e' && html[cursor + 3] == 't' && html[cursor + 4] == 'a') ||
(html[cursor + 1] == 'i' && html[cursor + 2] == 'n' && html[cursor + 3] == 'p' && html[cursor + 4] == 'u' && html[cursor + 5] == 't')
) {
state = 1;
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
currentNode->children.push_back(tagNode);
tagNode->parent = currentNode;
} else {
std::cout << "HTMLParser::Parse - currentNode is null - tagNode" << std::endl;
}
currentNode = tagNode;
size_t closeTagPos = html.substr(cursor + 1).find(">");
//std::cout << "found closeTagPos at " << closeTagPos << std::endl;
if (closeTagPos == std::string::npos) {
std::cout << "HTMLParser::Parse - can't find closing tag for single tag" << std::endl;
cursor ++;
} else {
std::string element = html.substr(cursor, closeTagPos + 2);
//std::cout << "creating element, tag: " << element << std::endl;
parseTag(element, *dynamic_cast<TagNode*>(currentNode.get()));
cursor += 2 + closeTagPos;
}
// drop back
if (currentNode && currentNode->parent) {
currentNode = currentNode->parent;
} else {
std::cout << "HTMLParser::Parse - currentNode/parent is null - textNode state3" << std::endl;
}
prependWhiteSpace = false;
state = 0;
}
*/
else {
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
......@@ -99,15 +127,18 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
}
else if (state == 1) { // Skip Over Element
if (html[cursor] == '>') {
//std::string element = html.substr(starts.back(), cursor - starts.back() + 1);
//starts.pop_back();
//std::cout << "HTMLParser::parse - close tag: " << element << std::endl;
state = 0;
prependWhiteSpace=false;
prependWhiteSpace = false;
}
}
else if (state == 4) { // HTML Comment
if (html[cursor] == '-' && html[cursor + 1] == '-' && html[cursor + 2] == '>') {
state = 0;
cursor += 2; // advance cursor to end of comment
prependWhiteSpace=false;
prependWhiteSpace = false;
}
}
else if (state == 2) { // Tag
......@@ -115,8 +146,9 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
std::string element = html.substr(starts.back(), cursor - starts.back() + 1);
starts.pop_back();
parseTag(element, *dynamic_cast<TagNode*>(currentNode.get()));
//std::cout << "HTMLParser::parse - close tag: " << element << std::endl;
state = 0;
prependWhiteSpace=false;
prependWhiteSpace = false;
}
}
else if (state == 3) { // Text
......@@ -129,7 +161,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
std::cout << "HTMLParser::Parse - currentNode/parent is null - textNode state3" << std::endl;
}
state = 0;
prependWhiteSpace=false;
prependWhiteSpace = false;
}
}
}
......@@ -139,6 +171,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
}
void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
//std::cout << "HTMLParser::parseTag - element [" << element << "]" << std::endl;
unsigned int cursor;
unsigned int start = 1; // skip first <
int state = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment