//////////////////////////////////////////////////////////////////////////
//
// pgAdmin III - PostgreSQL Tools
// RCS-ID:      $Id:  $
// Copyright (C) 2002 - 2009, The pgAdmin Development Team
// This software is released under the BSD Licence
//
// csvfiles.cpp - CSV file parsing
//
//////////////////////////////////////////////////////////////////////////

#include "pgAdmin3.h"
#include "utils/sysLogger.h"
#include "utils/csvfiles.h"

// PostgreSQL and GPDB now support CSV format logs.
// So, we need a way to parse the CSV files into lines, and lines into tokens (fields).

bool CSVTokenizer::HasMoreTokens() const
{
    if ( m_string.length() > 0)
    {
        if ( m_pos >= m_string.length())
            return false;

        if ( m_string.find_first_not_of(wxT(','), m_pos) != wxString::npos )
            // there are non delimiter characters left, so we do have more tokens
            return true;

        if (m_string[m_pos] == wxT('\n'))
            return false;
    }
    return m_pos == 0 && !m_string.empty();
}

wxString CSVTokenizer::GetNextToken()
{
    wxString token;

    if ( !HasMoreTokens() )
        return token;

    // skip leading blanks if not quoted.
    while (m_pos < m_string.length() && m_string[m_pos] == wxT(' '))
        m_pos ++;

    // Are we a quoted field?  Must handle this special.
    bool quoted_string = (m_string[m_pos] == wxT('\"'));
    bool inquote = false;

    size_t pos = m_pos;

    // find the end of this token.
    for (; pos < m_string.length(); pos++)
    {
        if (quoted_string && m_string[pos] == wxT('\"'))
            inquote = !inquote;

        if (!inquote)
        {
            // Check to see if we have found the end of this token.
            // Tokens normally end with a ',' delimiter.
            if (m_string[pos] == wxT(','))
                break;

            // Last token is delimited by '\n' or by end of string.
            if (m_string[pos] == wxT('\n') && pos == m_string.length()-1)
                break;
        }
    }

    if (quoted_string && !inquote)
    {
        token.assign(m_string, m_pos + 1, pos - m_pos - 2);  // Remove leading and trailing quotes

        // Remove double doublequote chars, replace with single doublequote chars
        token.Replace(wxT("\"\""),wxT("\""),true);
    }
    else
        token.assign(m_string, m_pos, pos - m_pos);

    if (quoted_string && inquote)
            wxLogNotice(wxT("unterminated double quoted string: %s\n"),token);

    m_pos = pos + 1;    // Skip token and delimiter

    if (m_pos > m_string.length())  // Perhaps no delimiter if at end of string if orig string didn't have '\n'.
        m_pos = m_string.length();

    return token;
}

bool CSVLineTokenizer::HasMoreLines() const
{
    if ( m_string.find_first_not_of(wxT('\n'), m_pos) != wxString::npos )
        // there are non line-end characters left, so we do have more lines
        return true;
    return false;
}

wxString CSVLineTokenizer::GetNextLine(bool & partial)
{
    wxString token;
    partial = true;

    if ( !HasMoreLines() )
        return token;

    // find the end of this line.  CSV lines end in "\n", but 
    // CSV lines may have "\n" chars inside double-quoted strings, so we need to find that out.

    bool inquote = false;
    for (size_t pos = m_pos; pos < m_string.length(); pos++)
    {
        if (m_string[pos] == wxT('\"'))
            inquote = !inquote;

        if (m_string[pos] == wxT('\n') && !inquote)
        {
            // Good, we found a complete log line terminated 
            // by "\n", and the "\n" wasn't in a quoted string.

            size_t len = pos - m_pos + 1;   // return the line, including the trailing "\n"
            token.assign(m_string, m_pos, len);
            m_pos = pos + 1;                // point to next line.
            partial = false;
            return token;
        }
    }

    // no more delimiters, so the line is everything till the end of
    // string, but we don't have all of the CSV the line... Some must still be coming.
     
    token.assign(m_string, m_pos, wxString::npos);
    partial = true;

    m_pos = m_string.length();
  
    return token;
}