/* parsecsv.c
Copyright (c) 2003-2014 HandBrake Team
This file is part of the HandBrake source code
Homepage: .
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
*/
#include
#include "hb.h"
#include "parsecsv.h"
/* Internal declarations */
#define is_newline(_x) ( (_x) == 13 || \
(_x) == 11 || \
(_x) == 10 )
#define is_white(_x) ( (_x) == '\t' || \
(_x) == ' ' || \
is_newline(_x) )
#define is_sep(_x) ( (_x) == ',' )
#define is_esc(_x) ( (_x) == '\\' )
#define CSV_CHAR_ERROR 0x8000
#define CSV_CHAR_EOF 0x4000
#define CSV_CHAR_ROWSEP 0x2000
#define CSV_CHAR_COLSEP 0x1000
#define CSV_PARSE_NORMAL 0x0000
#define CSV_PARSE_SEEK 0x0001
#define CSV_PARSE_ESC 0x0002
static uint16_t hb_parse_character( hb_csv_file_t * file );
static void hb_trim_end( char *text );
/* Open a CSV File */
hb_csv_file_t *hb_open_csv_file( const char *filepath )
{
hb_csv_file_t *file = NULL;
FILE * fileref;
if( filepath == NULL )
{
return file;
}
fileref = hb_fopen(filepath, "r");
if( fileref == NULL )
{
return file;
}
file = malloc( sizeof( hb_csv_file_t ) );
if( file == NULL )
{
return file;
}
file->fileref = fileref;
file->eof = 0;
file->parse_state = CSV_PARSE_SEEK;
file->curr_col = 0;
file->curr_row = 0;
return file;
}
void hb_close_csv_file( hb_csv_file_t *file )
{
if( file == NULL )
{
return;
}
fclose( file->fileref );
free( file );
}
/* Parse CSV Cells */
hb_csv_cell_t *hb_read_next_cell( hb_csv_file_t *file )
{
hb_csv_cell_t *cell = NULL;
uint16_t c;
int index;
if( file == NULL )
{
return cell;
}
if( file->eof )
{
return cell;
}
cell = malloc( sizeof( hb_csv_cell_t ) );
if( cell == NULL )
{
return cell;
}
cell->cell_row = file->curr_row;
cell->cell_col = file->curr_col;
index = 0;
while( CSV_CHAR_EOF != (c = hb_parse_character( file ) ) )
{
if( c == CSV_CHAR_ROWSEP )
{
file->curr_row++;
file->curr_col = 0;
break;
}
else if( c == CSV_CHAR_COLSEP )
{
file->curr_col++;
break;
}
else
{
if( index < 1023 )
{
cell->cell_text[index] = (char)c;
index++;
}
}
}
if( c == CSV_CHAR_EOF )
{
file->eof = 1;
}
/* Terminate the cell text */
cell->cell_text[index] = '\0';
hb_trim_end( cell->cell_text );
return cell;
}
void hb_dispose_cell( hb_csv_cell_t *cell )
{
if( cell == NULL )
{
return;
}
free( cell );
}
/* Raw parsing */
static uint16_t hb_parse_character( hb_csv_file_t * file )
{
int byte;
uint16_t c = 0;
int need_char = 1;
if( file == NULL )
{
return CSV_CHAR_ERROR;
}
while( need_char )
{
byte = fgetc( file->fileref );
if( feof( file->fileref ) )
{
return CSV_CHAR_EOF;
}
if( ferror( file->fileref ) )
{
return CSV_CHAR_ERROR;
}
if( file->parse_state == CSV_PARSE_SEEK && is_white(byte) )
{
continue;
}
else if( file->parse_state != CSV_PARSE_ESC && is_esc(byte) )
{
file->parse_state = CSV_PARSE_ESC;
continue;
}
else if( file->parse_state != CSV_PARSE_ESC && is_sep(byte) )
{
file->parse_state = CSV_PARSE_SEEK;
need_char = 0;
c = CSV_CHAR_COLSEP;
}
else if( file->parse_state == CSV_PARSE_ESC )
{
file->parse_state = CSV_PARSE_NORMAL;
need_char = 0;
c = (uint16_t)byte;
}
else if( is_newline(byte) )
{
file->parse_state = CSV_PARSE_SEEK;
need_char = 0;
c = CSV_CHAR_ROWSEP;
}
else
{
file->parse_state = CSV_PARSE_NORMAL;
need_char = 0;
c = (uint16_t)byte;
}
}
return c;
}
static void hb_trim_end( char *text )
{
if( text == NULL )
{
return;
}
int i = strlen(text) - 1;
for( i = strlen(text) - 1; i >= 0 && is_white(text[i]) ; i-- )
{
text[i] = '\0';
}
}