////////////////////////////////////////////////////////////////////////// // // pgAdmin III - PostgreSQL Tools // RCS-ID: $Id: $ // Copyright (C) 2002 - 2009, The pgAdmin Development Team // This software is released under the BSD Licence // // csvfiles.cpp - CSV file parsing // ////////////////////////////////////////////////////////////////////////// #include "pgAdmin3.h" #include "utils/sysLogger.h" #include "utils/csvfiles.h" // PostgreSQL and GPDB now support CSV format logs. // So, we need a way to parse the CSV files into lines, and lines into tokens (fields). bool CSVTokenizer::HasMoreTokens() const { if ( m_string.length() > 0) { if ( m_pos >= m_string.length()) return false; if ( m_string.find_first_not_of(wxT(','), m_pos) != wxString::npos ) // there are non delimiter characters left, so we do have more tokens return true; if (m_string[m_pos] == wxT('\n')) return false; } return m_pos == 0 && !m_string.empty(); } wxString CSVTokenizer::GetNextToken() { wxString token; if ( !HasMoreTokens() ) return token; // skip leading blanks if not quoted. while (m_pos < m_string.length() && m_string[m_pos] == wxT(' ')) m_pos ++; // Are we a quoted field? Must handle this special. bool quoted_string = (m_string[m_pos] == wxT('\"')); bool inquote = false; size_t pos = m_pos; // find the end of this token. for (; pos < m_string.length(); pos++) { if (quoted_string && m_string[pos] == wxT('\"')) inquote = !inquote; if (!inquote) { // Check to see if we have found the end of this token. // Tokens normally end with a ',' delimiter. if (m_string[pos] == wxT(',')) break; // Last token is delimited by '\n' or by end of string. if (m_string[pos] == wxT('\n') && pos == m_string.length()-1) break; } } if (quoted_string && !inquote) { token.assign(m_string, m_pos + 1, pos - m_pos - 2); // Remove leading and trailing quotes // Remove double doublequote chars, replace with single doublequote chars token.Replace(wxT("\"\""),wxT("\""),true); } else token.assign(m_string, m_pos, pos - m_pos); if (quoted_string && inquote) wxLogNotice(wxT("unterminated double quoted string: %s\n"),token); m_pos = pos + 1; // Skip token and delimiter if (m_pos > m_string.length()) // Perhaps no delimiter if at end of string if orig string didn't have '\n'. m_pos = m_string.length(); return token; } bool CSVLineTokenizer::HasMoreLines() const { if ( m_string.find_first_not_of(wxT('\n'), m_pos) != wxString::npos ) // there are non line-end characters left, so we do have more lines return true; return false; } wxString CSVLineTokenizer::GetNextLine(bool & partial) { wxString token; partial = true; if ( !HasMoreLines() ) return token; // find the end of this line. CSV lines end in "\n", but // CSV lines may have "\n" chars inside double-quoted strings, so we need to find that out. bool inquote = false; for (size_t pos = m_pos; pos < m_string.length(); pos++) { if (m_string[pos] == wxT('\"')) inquote = !inquote; if (m_string[pos] == wxT('\n') && !inquote) { // Good, we found a complete log line terminated // by "\n", and the "\n" wasn't in a quoted string. size_t len = pos - m_pos + 1; // return the line, including the trailing "\n" token.assign(m_string, m_pos, len); m_pos = pos + 1; // point to next line. partial = false; return token; } } // no more delimiters, so the line is everything till the end of // string, but we don't have all of the CSV the line... Some must still be coming. token.assign(m_string, m_pos, wxString::npos); partial = true; m_pos = m_string.length(); return token; }