252 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			252 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| 
								 | 
							
								#include "scanscalar.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <algorithm>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "exp.h"
							 | 
						||
| 
								 | 
							
								#include "regeximpl.h"
							 | 
						||
| 
								 | 
							
								#include "stream.h"
							 | 
						||
| 
								 | 
							
								#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								namespace YAML {
							 | 
						||
| 
								 | 
							
								// ScanScalar
							 | 
						||
| 
								 | 
							
								// . This is where the scalar magic happens.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// . We do the scanning in three phases:
							 | 
						||
| 
								 | 
							
								//   1. Scan until newline
							 | 
						||
| 
								 | 
							
								//   2. Eat newline
							 | 
						||
| 
								 | 
							
								//   3. Scan leading blanks.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// . Depending on the parameters given, we store or stop
							 | 
						||
| 
								 | 
							
								//   and different places in the above flow.
							 | 
						||
| 
								 | 
							
								std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
							 | 
						||
| 
								 | 
							
								  bool foundNonEmptyLine = false;
							 | 
						||
| 
								 | 
							
								  bool pastOpeningBreak = (params.fold == FOLD_FLOW);
							 | 
						||
| 
								 | 
							
								  bool emptyLine = false, moreIndented = false;
							 | 
						||
| 
								 | 
							
								  int foldedNewlineCount = 0;
							 | 
						||
| 
								 | 
							
								  bool foldedNewlineStartedMoreIndented = false;
							 | 
						||
| 
								 | 
							
								  std::size_t lastEscapedChar = std::string::npos;
							 | 
						||
| 
								 | 
							
								  std::string scalar;
							 | 
						||
| 
								 | 
							
								  params.leadingSpaces = false;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (!params.end) {
							 | 
						||
| 
								 | 
							
								    params.end = &Exp::Empty();
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  while (INPUT) {
							 | 
						||
| 
								 | 
							
								    // ********************************
							 | 
						||
| 
								 | 
							
								    // Phase #1: scan until line ending
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    std::size_t lastNonWhitespaceChar = scalar.size();
							 | 
						||
| 
								 | 
							
								    bool escapedNewline = false;
							 | 
						||
| 
								 | 
							
								    while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								      if (!INPUT) {
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // document indicator?
							 | 
						||
| 
								 | 
							
								      if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								        if (params.onDocIndicator == BREAK) {
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (params.onDocIndicator == THROW) {
							 | 
						||
| 
								 | 
							
								          throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      foundNonEmptyLine = true;
							 | 
						||
| 
								 | 
							
								      pastOpeningBreak = true;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // escaped newline? (only if we're escaping on slash)
							 | 
						||
| 
								 | 
							
								      if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								        // eat escape character and get out (but preserve trailing whitespace!)
							 | 
						||
| 
								 | 
							
								        INPUT.get();
							 | 
						||
| 
								 | 
							
								        lastNonWhitespaceChar = scalar.size();
							 | 
						||
| 
								 | 
							
								        lastEscapedChar = scalar.size();
							 | 
						||
| 
								 | 
							
								        escapedNewline = true;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // escape this?
							 | 
						||
| 
								 | 
							
								      if (INPUT.peek() == params.escape) {
							 | 
						||
| 
								 | 
							
								        scalar += Exp::Escape(INPUT);
							 | 
						||
| 
								 | 
							
								        lastNonWhitespaceChar = scalar.size();
							 | 
						||
| 
								 | 
							
								        lastEscapedChar = scalar.size();
							 | 
						||
| 
								 | 
							
								        continue;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // otherwise, just add the damn character
							 | 
						||
| 
								 | 
							
								      char ch = INPUT.get();
							 | 
						||
| 
								 | 
							
								      scalar += ch;
							 | 
						||
| 
								 | 
							
								      if (ch != ' ' && ch != '\t') {
							 | 
						||
| 
								 | 
							
								        lastNonWhitespaceChar = scalar.size();
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // eof? if we're looking to eat something, then we throw
							 | 
						||
| 
								 | 
							
								    if (!INPUT) {
							 | 
						||
| 
								 | 
							
								      if (params.eatEnd) {
							 | 
						||
| 
								 | 
							
								        throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // doc indicator?
							 | 
						||
| 
								 | 
							
								    if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
							 | 
						||
| 
								 | 
							
								        Exp::DocIndicator().Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // are we done via character match?
							 | 
						||
| 
								 | 
							
								    int n = params.end->Match(INPUT);
							 | 
						||
| 
								 | 
							
								    if (n >= 0) {
							 | 
						||
| 
								 | 
							
								      if (params.eatEnd) {
							 | 
						||
| 
								 | 
							
								        INPUT.eat(n);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // do we remove trailing whitespace?
							 | 
						||
| 
								 | 
							
								    if (params.fold == FOLD_FLOW)
							 | 
						||
| 
								 | 
							
								      scalar.erase(lastNonWhitespaceChar);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // ********************************
							 | 
						||
| 
								 | 
							
								    // Phase #2: eat line ending
							 | 
						||
| 
								 | 
							
								    n = Exp::Break().Match(INPUT);
							 | 
						||
| 
								 | 
							
								    INPUT.eat(n);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // ********************************
							 | 
						||
| 
								 | 
							
								    // Phase #3: scan initial spaces
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // first the required indentation
							 | 
						||
| 
								 | 
							
								    while (INPUT.peek() == ' ' &&
							 | 
						||
| 
								 | 
							
								           (INPUT.column() < params.indent ||
							 | 
						||
| 
								 | 
							
								            (params.detectIndent && !foundNonEmptyLine)) &&
							 | 
						||
| 
								 | 
							
								           !params.end->Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								      INPUT.eat(1);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // update indent if we're auto-detecting
							 | 
						||
| 
								 | 
							
								    if (params.detectIndent && !foundNonEmptyLine) {
							 | 
						||
| 
								 | 
							
								      params.indent = std::max(params.indent, INPUT.column());
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // and then the rest of the whitespace
							 | 
						||
| 
								 | 
							
								    while (Exp::Blank().Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								      // we check for tabs that masquerade as indentation
							 | 
						||
| 
								 | 
							
								      if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
							 | 
						||
| 
								 | 
							
								          params.onTabInIndentation == THROW) {
							 | 
						||
| 
								 | 
							
								        throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if (!params.eatLeadingWhitespace) {
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if (params.end->Matches(INPUT)) {
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      INPUT.eat(1);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // was this an empty line?
							 | 
						||
| 
								 | 
							
								    bool nextEmptyLine = Exp::Break().Matches(INPUT);
							 | 
						||
| 
								 | 
							
								    bool nextMoreIndented = Exp::Blank().Matches(INPUT);
							 | 
						||
| 
								 | 
							
								    if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
							 | 
						||
| 
								 | 
							
								      foldedNewlineStartedMoreIndented = moreIndented;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // for block scalars, we always start with a newline, so we should ignore it
							 | 
						||
| 
								 | 
							
								    // (not fold or keep)
							 | 
						||
| 
								 | 
							
								    if (pastOpeningBreak) {
							 | 
						||
| 
								 | 
							
								      switch (params.fold) {
							 | 
						||
| 
								 | 
							
								        case DONT_FOLD:
							 | 
						||
| 
								 | 
							
								          scalar += "\n";
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								        case FOLD_BLOCK:
							 | 
						||
| 
								 | 
							
								          if (!emptyLine && !nextEmptyLine && !moreIndented &&
							 | 
						||
| 
								 | 
							
								              !nextMoreIndented && INPUT.column() >= params.indent) {
							 | 
						||
| 
								 | 
							
								            scalar += " ";
							 | 
						||
| 
								 | 
							
								          } else if (nextEmptyLine) {
							 | 
						||
| 
								 | 
							
								            foldedNewlineCount++;
							 | 
						||
| 
								 | 
							
								          } else {
							 | 
						||
| 
								 | 
							
								            scalar += "\n";
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          if (!nextEmptyLine && foldedNewlineCount > 0) {
							 | 
						||
| 
								 | 
							
								            scalar += std::string(foldedNewlineCount - 1, '\n');
							 | 
						||
| 
								 | 
							
								            if (foldedNewlineStartedMoreIndented ||
							 | 
						||
| 
								 | 
							
								                nextMoreIndented | !foundNonEmptyLine) {
							 | 
						||
| 
								 | 
							
								              scalar += "\n";
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            foldedNewlineCount = 0;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								        case FOLD_FLOW:
							 | 
						||
| 
								 | 
							
								          if (nextEmptyLine) {
							 | 
						||
| 
								 | 
							
								            scalar += "\n";
							 | 
						||
| 
								 | 
							
								          } else if (!emptyLine && !escapedNewline) {
							 | 
						||
| 
								 | 
							
								            scalar += " ";
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    emptyLine = nextEmptyLine;
							 | 
						||
| 
								 | 
							
								    moreIndented = nextMoreIndented;
							 | 
						||
| 
								 | 
							
								    pastOpeningBreak = true;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // are we done via indentation?
							 | 
						||
| 
								 | 
							
								    if (!emptyLine && INPUT.column() < params.indent) {
							 | 
						||
| 
								 | 
							
								      params.leadingSpaces = true;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // post-processing
							 | 
						||
| 
								 | 
							
								  if (params.trimTrailingSpaces) {
							 | 
						||
| 
								 | 
							
								    std::size_t pos = scalar.find_last_not_of(" \t");
							 | 
						||
| 
								 | 
							
								    if (lastEscapedChar != std::string::npos) {
							 | 
						||
| 
								 | 
							
								      if (pos < lastEscapedChar || pos == std::string::npos) {
							 | 
						||
| 
								 | 
							
								        pos = lastEscapedChar;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    if (pos < scalar.size()) {
							 | 
						||
| 
								 | 
							
								      scalar.erase(pos + 1);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  switch (params.chomp) {
							 | 
						||
| 
								 | 
							
								    case CLIP: {
							 | 
						||
| 
								 | 
							
								      std::size_t pos = scalar.find_last_not_of('\n');
							 | 
						||
| 
								 | 
							
								      if (lastEscapedChar != std::string::npos) {
							 | 
						||
| 
								 | 
							
								        if (pos < lastEscapedChar || pos == std::string::npos) {
							 | 
						||
| 
								 | 
							
								          pos = lastEscapedChar;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      if (pos == std::string::npos) {
							 | 
						||
| 
								 | 
							
								        scalar.erase();
							 | 
						||
| 
								 | 
							
								      } else if (pos + 1 < scalar.size()) {
							 | 
						||
| 
								 | 
							
								        scalar.erase(pos + 2);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    } break;
							 | 
						||
| 
								 | 
							
								    case STRIP: {
							 | 
						||
| 
								 | 
							
								      std::size_t pos = scalar.find_last_not_of('\n');
							 | 
						||
| 
								 | 
							
								      if (lastEscapedChar != std::string::npos) {
							 | 
						||
| 
								 | 
							
								        if (pos < lastEscapedChar || pos == std::string::npos) {
							 | 
						||
| 
								 | 
							
								          pos = lastEscapedChar;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      if (pos == std::string::npos) {
							 | 
						||
| 
								 | 
							
								        scalar.erase();
							 | 
						||
| 
								 | 
							
								      } else if (pos < scalar.size()) {
							 | 
						||
| 
								 | 
							
								        scalar.erase(pos + 1);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    } break;
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  return scalar;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								}  // namespace YAML
							 |