diff options
-rw-r--r-- | src/mesa/main/arbparse.c | 461 |
1 files changed, 248 insertions, 213 deletions
diff --git a/src/mesa/main/arbparse.c b/src/mesa/main/arbparse.c index 2071835049e..e8a87aeba10 100644 --- a/src/mesa/main/arbparse.c +++ b/src/mesa/main/arbparse.c @@ -46,7 +46,8 @@ /* TODO: * Fragment Program Stuff: * ----------------------------------------------------- - * - How does negating on SWZ work?? If any of the components have a -, negate? + * - How does negating on SWZ work?? If any of the components have a -, + * negate? * - how does thing like 'foo[N]' work in src registers? * * - things from Michal's email @@ -56,7 +57,8 @@ * + fix multiple cases in switches, that might change * (these are things that are #defined to the same value, but occur * only on fp or vp's, which funkifies the switch statements) - * - STATE_TEX_* STATE_CLIP_PLANE, etc and PRECISION_HINT_FASTEST/PositionInvariant + * - STATE_TEX_* STATE_CLIP_PLANE, etc and PRECISION_HINT_FASTEST/ + * PositionInvariant * * - check all limits of number of various variables * + parameters @@ -93,8 +95,9 @@ * Outstanding Questions: * ----------------------------------------------------- * - palette matrix? do we support this extension? what is the extention? - * - When can we fetch env/local params from their own register files, and when - * to we have to fetch them into the main state register file? (think arrays) + * - When can we fetch env/local params from their own register files, and + * when to we have to fetch them into the main state register file? + * (think arrays) * * Grammar Changes: * ----------------------------------------------------- @@ -116,83 +119,104 @@ typedef byte *production; /* VERSION: 0.3 */ /* - INTRODUCTION - ------------ - - The task is to check the syntax of an input string. Input string is a stream of ASCII - characters terminated with null-character ('\0'). Checking it using C language is - difficult and hard to implement without bugs. It is hard to maintain and change prior - to further syntax changes. - - This is because of high redundancy of the C code. Large blocks of code are duplicated with - only small changes. Even using macros does not solve the problem, because macros cannot - erase the complexity of the code. - - The resolution is to create a new language that will be highly oriented to our task. Once - we describe particular syntax, we are done. We can then focus on the code that implements - the language. The size and complexity of it is relatively small than the code that directly - checks the syntax. - - First, we must implement our new language. Here, the language is implemented in C, but it - could also be implemented in any other language. The code is listed below. We must take - a good care that it is bug free. This is simple because the code is simple and clean. - - Next, we must describe the syntax of our new language in itself. Once created and checked - manually that it is correct, we can use it to check another scripts. - - Note that our new language loading code does not have to check the syntax. It is because we - assume that the script describing itself is correct, and other scripts can be syntactically - checked by the former script. The loading code must only do semantic checking which leads us to - simple resolving references. - - THE LANGUAGE - ------------ - - Here I will describe the syntax of the new language (further called "Synek"). It is mainly a - sequence of declarations terminated by a semicolon. The declaration consists of a symbol, - which is an identifier, and its definition. A definition is in turn a sequence of specifiers - connected with ".and" or ".or" operator. These operators cannot be mixed together in a one - definition. Specifier can be a symbol, string, character, character range or a special - keyword ".true" or ".false". - - On the very beginning of the script there is a declaration of a root symbol and is in the form: +INTRODUCTION +------------ + +The task is to check the syntax of an input string. Input string is a +stream of ASCII characters terminated with null-character +('\0'). Checking it using C language is difficult and hard to +implement without bugs. It is hard to maintain and change prior to +further syntax changes. + +This is because of high redundancy of the C code. Large blocks of code +are duplicated with only small changes. Even using macros does not +solve the problem, because macros cannot erase the complexity of the +code. + +The resolution is to create a new language that will be highly +oriented to our task. Once we describe particular syntax, we are +done. We can then focus on the code that implements the language. The +size and complexity of it is relatively small than the code that +directly checks the syntax. + +First, we must implement our new language. Here, the language is +implemented in C, but it could also be implemented in any other +language. The code is listed below. We must take a good care that it +is bug free. This is simple because the code is simple and clean. + +Next, we must describe the syntax of our new language in itself. Once +created and checked manually that it is correct, we can use it to +check another scripts. + +Note that our new language loading code does not have to check the +syntax. It is because we assume that the script describing itself is +correct, and other scripts can be syntactically checked by the former +script. The loading code must only do semantic checking which leads us +to simple resolving references. + +THE LANGUAGE +------------ + +Here I will describe the syntax of the new language (further called +"Synek"). It is mainly a sequence of declarations terminated by a +semicolon. The declaration consists of a symbol, which is an +identifier, and its definition. A definition is in turn a sequence of +specifiers connected with ".and" or ".or" operator. These operators +cannot be mixed together in a one definition. Specifier can be a +symbol, string, character, character range or a special keyword +".true" or ".false". + +On the very beginning of the script there is a declaration of a root +symbol and is in the form: .syntax <root_symbol>; - The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if - the root symbol evaluates to true. A symbol evaluates to true if the definition associated with - the symbol evaluates to true. Definition evaluation depends on the operator used to connect - specifiers in the definition. If ".and" operator is used, definition evaluates to true if and - only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to - true if any of the specifiers evaluates to true. If definition contains only one specifier, - it is evaluated as if it was connected with ".true" keyword by ".and" operator. - - If specifier is a ".true" keyword, it always evaluates to true. - - If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false - when it does not evaluate to true. - - Character range specifier is in the form: - '<first_character>' - '<second_character>' - If specifier is a character range, it evaluates to true if character in the stream is greater - or equal to <first_character> and less or equal to <second_character>. In that situation - the stream pointer is advanced to point to next character in the stream. All C-style escape - sequences are supported although trigraph sequences are not. The comparisions are performed - on 8-bit unsigned integers. - - Character specifier is in the form: - '<single_character>' - It evaluates to true if the following character range specifier evaluates to true: - '<single_character>' - '<single_character>' - - String specifier is in the form: + +The <root_symbol> must be on of the symbols in declaration +sequence. The syntax is correct if the root symbol evaluates to +true. A symbol evaluates to true if the definition associated with the +symbol evaluates to true. Definition evaluation depends on the +operator used to connect specifiers in the definition. If ".and" +operator is used, definition evaluates to true if and only if all the +specifiers evaluate to true. If ".or" operator is used, definition +evalutes to true if any of the specifiers evaluates to true. If +definition contains only one specifier, it is evaluated as if it was +connected with ".true" keyword by ".and" operator. + +If specifier is a ".true" keyword, it always evaluates to true. + +If specifier is a ".false" keyword, it always evaluates to +false. Specifier evaluates to false when it does not evaluate to true. + +Character range specifier is in the form: + '<first_character>' - '<second_character>' + +If specifier is a character range, it evaluates to true if character +in the stream is greater or equal to <first_character> and less or +equal to <second_character>. In that situation the stream pointer is +advanced to point to next character in the stream. All C-style escape +sequences are supported although trigraph sequences are not. The +comparisions are performed on 8-bit unsigned integers. + +Character specifier is in the form: + '<single_character>' + +It evaluates to true if the following character range specifier evaluates to +true: + '<single_character>' - '<single_character>' + +String specifier is in the form: "<string>" - Let N be the number of characters in <string>. Let <string>[i] designate i-th character in - <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N) - the following character specifier evaluates to true: - '<string>[i]' - If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'. - - Symbol specifier can be optionally preceded by a ".loop" keyword in the form: - .loop <symbol> (1) + +Let N be the number of characters in <string>. Let <string>[i] +designate i-th character in <string>. Then the string specifier +evaluates to true if and only if for i in the range [0, N) the +following character specifier evaluates to true: + '<string>[i]' + +If <string>[i] is a quotation mark, '<string>[i]' is replaced with +'\<string>[i]'. + +Symbol specifier can be optionally preceded by a ".loop" keyword in the form: + .loop <symbol> (1) where <symbol> is defined as follows: <symbol> <definition>; (2) Construction (1) is replaced by the following code: @@ -202,134 +226,148 @@ typedef byte *production; <symbol$2> <symbol> .and <symbol$1>; <symbol> <definition>; - ESCAPE SEQUENCES - ---------------- - - Synek supports all escape sequences in character specifiers. The mapping table is listed below. - All occurences of the characters in the first column are replaced with the corresponding - character in the second column. - - Escape sequence Represents - ------------------------------------------------------------------------------------------------ - \a Bell (alert) - \b Backspace - \f Formfeed - \n New line - \r Carriage return - \t Horizontal tab - \v Vertical tab - \' Single quotation mark - \" Double quotation mark - \\ Backslash - \? Literal question mark - \ooo ASCII character in octal notation - \xhhh ASCII character in hexadecimal notation - ------------------------------------------------------------------------------------------------ - - RAISING ERRORS - -------------- - - Any specifier can be followed by a special construction that is executed when the specifier - evaluates to false. The construction is in the form: - .error <ERROR_TEXT> - <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is - in the form: - .errtext <ERROR_TEXT> "<error_desc>" - When specifier evaluates to false and this construction is present, parsing is stopped - immediately and <error_desc> is returned as a result of parsing. The error position is also - returned and it is meant as an offset from the beggining of the stream to the character that - was valid so far. Example: - - (**** syntax script ****) - - .syntax program; - .errtext MISSING_SEMICOLON "missing ';'" - program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and - .loop space .and '\0'; - declaration "declare" .and .loop space .and identifier; - space ' '; +ESCAPE SEQUENCES +---------------- + +Synek supports all escape sequences in character specifiers. The +mapping table is listed below. All occurences of the characters in +the first column are replaced with the corresponding character in the +second column. + + Escape sequence Represents + ----------------------------------------------------------------------- + \a Bell (alert) + \b Backspace + \f Formfeed + \n New line + \r Carriage return + \t Horizontal tab + \v Vertical tab + \' Single quotation mark + \" Double quotation mark + \\ Backslash + \? Literal question mark + \ooo ASCII character in octal notation + \xhhh ASCII character in hexadecimal notation + ----------------------------------------------------------------------- + + +RAISING ERRORS +-------------- + +Any specifier can be followed by a special construction that is +executed when the specifier evaluates to false. The construction is in +the form: + .error <ERROR_TEXT> + +<ERROR_TEXT> is an identifier declared earlier by error text +declaration. The declaration is in the form: + + .errtext <ERROR_TEXT> "<error_desc>" + +When specifier evaluates to false and this construction is present, +parsing is stopped immediately and <error_desc> is returned as a +result of parsing. The error position is also returned and it is meant +as an offset from the beggining of the stream to the character that +was valid so far. Example: + + (**** syntax script ****) + + .syntax program; + .errtext MISSING_SEMICOLON "missing ';'" + program declaration .and .loop space .and ';' + .error MISSING_SEMICOLON .and + .loop space .and '\0'; + declaration "declare" .and .loop space .and identifier; + space ' '; (**** sample code ****) - - declare foo , - - In the example above checking the sample code will result in error message "missing ';'" and - error position 12. The sample code is not correct. Note the presence of '\0' specifier to - assure that there is no code after semicolon - only spaces. - <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case, - the identifier and dollar signs are replaced by a string retrieved by invoking symbol with - the identifier name. The starting position is the error position. The lenght of the resulting - string is the position after invoking the symbol. - - PRODUCTION - ---------- - - Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers - that evaluate to true. That is, every specifier and optional error construction can be followed - by a number of emit constructions that are in the form: - .emit <parameter> - <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by - 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration - in the form: + declare foo , + +In the example above checking the sample code will result in error +message "missing ';'" and error position 12. The sample code is not +correct. Note the presence of '\0' specifier to assure that there is +no code after semicolon - only spaces. <error_desc> can optionally +contain identifier surrounded by dollar signs $. In such a case, the +identifier and dollar signs are replaced by a string retrieved by +invoking symbol with the identifier name. The starting position is the +error position. The lenght of the resulting string is the position +after invoking the symbol. + + +PRODUCTION +---------- + +Synek not only checks the syntax but it can also produce (emit) bytes +associated with specifiers that evaluate to true. That is, every +specifier and optional error construction can be followed by a number +of emit constructions that are in the form: + .emit <parameter> + +<paramater> can be a HEX number, identifier, a star * or a dollar +$. HEX number is preceded by 0x or 0X. If <parameter> is an +identifier, it must be earlier declared by emit code declaration in +the form: .emtcode <identifier> <hex_number> - When given specifier evaluates to true, all emits associated with the specifier are output - in order they were declared. A star means that last-read character should be output instead - of constant value. Example: - - (**** syntax script ****) +When given specifier evaluates to true, all emits associated with the +specifier are output in order they were declared. A star means that +last-read character should be output instead of constant +value. Example: - .syntax foobar; - .emtcode WORD_FOO 0x01 - .emtcode WORD_BAR 0x02 - foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00; - FOO "foo" .and SPACE; - BAR "bar" .and SPACE; - SPACE ' ' .or '\0'; + (**** syntax script ****) - (**** sample text 1 ****) + .syntax foobar; + .emtcode WORD_FOO 0x01 + .emtcode WORD_BAR 0x02 + foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00; + FOO "foo" .and SPACE; + BAR "bar" .and SPACE; + SPACE ' ' .or '\0'; - foo + (**** sample text 1 ****) - (**** sample text 2 ****) + foo - foobar + (**** sample text 2 ****) - For both samples the result will be one-element array. For first sample text it will be - value 1, for second - 0. Note that every text will be accepted because of presence of - .true as an alternative. + foobar - Another example: +For both samples the result will be one-element array. For first +sample text it will be value 1, for second - 0. Note that every text +will be accepted because of presence of .true as an alternative. - (**** syntax script ****) +Another example: - .syntax declaration; - .emtcode VARIABLE 0x01 - declaration "declare" .and .loop space .and - identifier .emit VARIABLE .and (1) - .true .emit 0x00 .and (2) - .loop space .and ';'; - space ' ' .or '\t'; - identifier .loop id_char .emit *; (3) - id_char 'a'-'z' .or 'A'-'Z' .or '_'; + (**** syntax script ****) + .syntax declaration; + .emtcode VARIABLE 0x01 + declaration "declare" .and .loop space .and + identifier .emit VARIABLE .and (1) + .true .emit 0x00 .and (2) + .loop space .and ';'; + space ' ' .or '\t'; + identifier .loop id_char .emit *; (3) + id_char 'a'-'z' .or 'A'-'Z' .or '_'; (**** sample code ****) - - declare fubar; - - In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to - true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used - to terminate the string with null to signal when the string ends. Specifier (3) outputs - all characters that make declared identifier. The result of sample code will be the - following array: + declare fubar; + +In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If +it evaluates to true, VARIABLE constant and then production of the +symbol is output. Specifier (2) is used to terminate the string with +null to signal when the string ends. Specifier (3) outputs all +characters that make declared identifier. The result of sample code +will be the following array: { 1, 'f', 'u', 'b', 'a', 'r', 0 } - If .emit is followed by dollar $, it means that current position should be output. Current - position is a 32-bit unsigned integer distance from the very beginning of the parsed string to - first character consumed by the specifier associated with the .emit instruction. Current - position is stored in the output buffer in Little-Endian convention (the lowest byte comes - first). -*/ +If .emit is followed by dollar $, it means that current position +should be output. Current position is a 32-bit unsigned integer +distance from the very beginning of the parsed string to first +character consumed by the specifier associated with the .emit +instruction. Current position is stored in the output buffer in +Little-Endian convention (the lowest byte comes first). */ + /** * This is the text describing the rules to parse the grammar @@ -707,8 +745,8 @@ set_last_error (const byte * msg, byte * param, GLint pos) } /* - memory management routines -*/ + * memory management routines + */ static GLvoid * mem_alloc (GLsizei size) { @@ -741,8 +779,8 @@ str_duplicate (const byte * str) } /* - emit type typedef -*/ + * emit type typedef + */ typedef enum emit_type_ { et_byte, /* explicit number */ @@ -752,8 +790,8 @@ typedef enum emit_type_ emit_type; /* - emit typedef -*/ + * emit typedef + */ typedef struct emit_ { emit_type m_emit_type; @@ -1067,10 +1105,10 @@ barray_append (barray ** ba, barray ** nb) return 0; } -/* - * adds emit chain pointed by em to the end of array pointed by *ba, - * returns 0 on success, - * returns 1 otherwise + +/** + * Adds emit chain pointed by em to the end of array pointed by *ba. + * \return 0 on success, 1 otherwise. */ static GLint barray_push (barray ** ba, emit * em, byte c, GLuint pos) @@ -1117,7 +1155,7 @@ barray_push (barray ** ba, emit * em, byte c, GLuint pos) return 0; } -/* +/** * string to string map typedef */ typedef struct map_str_ @@ -1159,11 +1197,10 @@ map_str_append (map_str ** ma, map_str ** nm) *ma = *nm; } -/* +/** * searches the map for specified key, * if the key is matched, *data is filled with data associated with the key, - * returns 0 if the key is matched, - * returns 1 otherwise + * \return 0 if the key is matched, 1 otherwise */ static GLint map_str_find (map_str ** ma, const byte * key, byte ** data) @@ -1184,7 +1221,7 @@ map_str_find (map_str ** ma, const byte * key, byte ** data) return 1; } -/* +/** * string to byte map typedef */ typedef struct map_byte_ @@ -1224,11 +1261,10 @@ map_byte_append (map_byte ** ma, map_byte ** nm) *ma = *nm; } -/* - * searches the map for specified key, - * if the key is matched, *data is filled with data associated with the key, - * returns 0 if the is matched, - * returns 1 otherwise +/** + * Searches the map for specified key, + * If the key is matched, *data is filled with data associated with the key, + * \return 0 if the is matched, 1 otherwise */ static GLint map_byte_find (map_byte ** ma, const byte * key, byte * data) @@ -1286,11 +1322,10 @@ map_def_append (map_def ** ma, map_def ** nm) *ma = *nm; } -/* +/** * searches the map for specified key, * if the key is matched, *data is filled with data associated with the key, - * returns 0 if the is matched, - * returns 1 otherwise + * \return 0 if the is matched, 1 otherwise */ static GLint map_def_find (map_def ** ma, const byte * key, defntn ** data) |