diff options
Diffstat (limited to 'test/parsecsv.c')
-rw-r--r-- | test/parsecsv.c | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/test/parsecsv.c b/test/parsecsv.c new file mode 100644 index 000000000..f0a8a1a18 --- /dev/null +++ b/test/parsecsv.c @@ -0,0 +1,214 @@ +/* $Id: parsecsv.c $ + + This file is part of the HandBrake source code. + Homepage: <http://handbrake.m0k.org/>. + It may be used under the terms of the GNU General Public License. */ + +#include <fcntl.h> +#include "hb.h" +#include "parsecsv.h" + +/* Internal declarations */ +#define is_newline(_x) ( (_x) == 13 || \ + (_x) == 11 || \ + (_x) == 10 ) + +#define is_white(_x) ( (_x) == '\t' || \ + (_x) == ' ' || \ + is_newline(_x) ) + +#define is_sep(_x) ( (_x) == ',' ) + +#define is_esc(_x) ( (_x) == '\\' ) + +#define CSV_CHAR_ERROR 0x8000 +#define CSV_CHAR_EOF 0x4000 +#define CSV_CHAR_ROWSEP 0x2000 +#define CSV_CHAR_COLSEP 0x1000 + +#define CSV_PARSE_NORMAL 0x0000 +#define CSV_PARSE_SEEK 0x0001 +#define CSV_PARSE_ESC 0x0002 + +static uint16_t hb_parse_character( hb_csv_file_t * file ); +static void hb_trim_end( char *text ); + +/* Open a CSV File */ +hb_csv_file_t *hb_open_csv_file( const char *filepath ) +{ + hb_csv_file_t *file = NULL; + FILE * fileref; + + if( filepath == NULL ) + { + return file; + } + + fileref = fopen( filepath, "r" ); + if( fileref == NULL ) + { + return file; + } + + file = malloc( sizeof( hb_csv_file_t ) ); + file->fileref = fileref; + file->eof = 0; + file->parse_state = CSV_PARSE_SEEK; + file->curr_col = 0; + file->curr_row = 0; + return file; +} + +void hb_close_csv_file( hb_csv_file_t *file ) +{ + if( file == NULL ) + { + return; + } + + fclose( file->fileref ); + free( file ); +} + +/* Parse CSV Cells */ +hb_csv_cell_t *hb_read_next_cell( hb_csv_file_t *file ) +{ + hb_csv_cell_t *cell = NULL; + uint16_t c; + int index; + + if( file == NULL ) + { + return cell; + } + + if( file->eof ) + { + return cell; + } + + cell = malloc( sizeof( hb_csv_cell_t ) ); + cell->cell_row = file->curr_row; + cell->cell_col = file->curr_col; + index = 0; + while( CSV_CHAR_EOF != (c = hb_parse_character( file ) ) ) + { + if( c == CSV_CHAR_ROWSEP ) + { + file->curr_row++; + file->curr_col = 0; + break; + } + else if( c == CSV_CHAR_COLSEP ) + { + file->curr_col++; + break; + } + else + { + if( index < 1023 ) + { + cell->cell_text[index] = (char)c; + index++; + } + } + } + + if( c == CSV_CHAR_EOF ) + { + file->eof = 1; + } + + /* Terminate the cell text */ + cell->cell_text[index] = '\0'; + hb_trim_end( cell->cell_text ); + return cell; +} + +void hb_dispose_cell( hb_csv_cell_t *cell ) +{ + if( cell == NULL ) + { + return; + } + + free( cell ); +} + +/* Raw parsing */ +static uint16_t hb_parse_character( hb_csv_file_t * file ) +{ + int byte; + uint16_t c; + int read_result; + int need_char = 1; + + if( file == NULL ) + { + return CSV_CHAR_ERROR; + } + + while( need_char ) + { + byte = fgetc( file->fileref ); + if( feof( file->fileref ) ) + { + return CSV_CHAR_EOF; + } + if( ferror( file->fileref ) ) + { + return CSV_CHAR_ERROR; + } + + if( file->parse_state == CSV_PARSE_SEEK && is_white(byte) ) + { + continue; + } + else if( file->parse_state != CSV_PARSE_ESC && is_esc(byte) ) + { + file->parse_state = CSV_PARSE_ESC; + continue; + } + else if( file->parse_state != CSV_PARSE_ESC && is_sep(byte) ) + { + file->parse_state = CSV_PARSE_SEEK; + need_char = 0; + c = CSV_CHAR_COLSEP; + } + else if( file->parse_state == CSV_PARSE_ESC ) + { + file->parse_state = CSV_PARSE_NORMAL; + need_char = 0; + c = (uint16_t)byte; + } + else if( is_newline(byte) ) + { + file->parse_state = CSV_PARSE_SEEK; + need_char = 0; + c = CSV_CHAR_ROWSEP; + } + else + { + file->parse_state = CSV_PARSE_NORMAL; + need_char = 0; + c = (uint16_t)byte; + } + } + + return c; +} + +static void hb_trim_end( char *text ) +{ + if( text == NULL ) + { + return; + } + + int i = strlen(text) - 1; + + for( i = strlen(text) - 1; i >= 0 && is_white(text[i]) ; i-- ) + { + text[i] = '\0'; + } +}
\ No newline at end of file |