Mercurial > repos > rhope
view string.rhope @ 54:243d013a49cb
Defer processing of string literals until after simpler ones to avoid a segfault
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Thu, 29 Apr 2010 01:12:43 -0400 |
parents | 079200bc3e75 |
children | 048046186d22 |
line wrap: on
line source
Blueprint Null String { } Null String[:out(Null String)] { out <- Build[Null String()] } Depth@Null String[in:out(Int32)] { out <- 0i32 } Blueprint Base String { Buffer Length(Int32,Naked) } Print@Base String[string:out] { //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]] { out <- write[1i32, [Array[1]]Append[10u8], 1i64] } } UTF8 Expect[num,arr,index,count,consumed:out] { byte <- [arr]Index[index] { If[[128u8]>[byte]] { //Error: ASCII byte when we were expecting part of a mutlibyte sequence //treat each byte as a separate character ncount <- [1i32]+[[count]+[consumed]] }{ If[[192u8]>[byte]] { If[[num]=[1]] { //Sequence is complete count as single character ncount <- [1i32]+[count] }{ out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] } }{ //Error: too high to be a continuation byte ncount <- [1i32]+[[count]+[consumed]] } } }{ //Error: string ended in the middle of a multi-byte sequence out <- [count]+[consumed] } Val[ncount] { [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } } Count UTF8[arr,index,count:out] { byte <- [arr]Index[index] If[[128u8]>[byte]] { ncount <- [1i32]+[count] } { If[[192u8]>[byte]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character ncount <- [1i32]+[count] }{ If[[224u8]>[byte]] { out <- UTF8 Expect[1, arr, [index]+[1], count, 1] }{ If[[240u8]>[byte]] { out <- UTF8 Expect[2, arr, [index]+[1], count, 1] }{ If[[245u8]>[byte]] { out <- UTF8 Expect[3, arr, [index]+[1], count, 1] }{ //Error: Out of range of Unicode standard //treat as a single character ncount <- [1i32]+[count] } } } } } [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } Blueprint String { Left Right L Offset(Int32,Naked) L Length(Int32,Naked) Depth(Int32,Naked) Length(Int32,Naked) } String[in(Array):out(Base String)] { out <- [[Build[Base String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] }