Mercurial > repos > rhope
diff string.rhope @ 51:7d6a6906b648
Added integer type conversions and started on the implementation of String
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Thu, 22 Apr 2010 02:18:26 -0400 |
parents | |
children | 079200bc3e75 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/string.rhope Thu Apr 22 02:18:26 2010 -0400 @@ -0,0 +1,134 @@ + +Blueprint Null String +{ +} + +Null String[:out(Null String)] +{ + out <- Build[Null String()] +} + +Depth@Null String[in:out(Int32)] +{ + out <- 0i32 +} + +Blueprint Base String +{ + Buffer + Length(Int32,Naked) +} + +UTF8 Expect[num,arr,index,count,consumed:out] +{ + byte <- [arr]Index[index] + { + If[[128u8]>[byte]] + { + //Error: ASCII byte when we were expecting part of a mutlibyte sequence + //treat each byte as a separate character + ncount <- [1i32]+[[count]+[consumed]] + }{ + If[[192u8]>[byte]] + { + If[[num]=[1]] + { + //Sequence is complete count as single character + ncount <- [1i32]+[count] + }{ + out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] + } + }{ + //Error: too high to be a continuation byte + ncount <- [1i32]+[[count]+[consumed]] + } + } + }{ + //Error: string ended in the middle of a multi-byte sequence + out <- [count]+[consumed] + } + Val[ncount] + { + [arr]Next[index] + { + out <- Count UTF8[arr, ~, ncount] + }{ + out <- Val[ncount] + } + } +} + +Count UTF8[arr,index,count:out] +{ + byte <- [arr]Index[index] + If[[128u8]>[byte]] + { ncount <- [1i32]+[count] } + { + If[[192u8]>[byte]] + { + //Error: Encoding for 2nd,3rd or 4th byte of sequence + //treat as a single character + ncount <- [1i32]+[count] + }{ + If[[224u8]>[byte]] + { + out <- UTF8 Expect[1, arr, [index]+[1], count, 1] + }{ + If[[240u8]>[byte]] + { + out <- UTF8 Expect[2, arr, [index]+[1], count, 1] + }{ + If[[245u8]>[byte]] + { + out <- UTF8 Expect[3, arr, [index]+[1], count, 1] + }{ + //Error: Out of range of Unicode standard + //treat as a single character + ncount <- [1i32]+[count] + } + } + } + } + } + [arr]Next[index] + { + out <- Count UTF8[arr, ~, ncount] + }{ + out <- Val[ncount] + } +} + +Blueprint String +{ + Left + Right + L Offset(Int32,Naked) + L Length(Int32,Naked) + Depth(Int32,Naked) + Length(Int32,Naked) +} + +String[in(Array):out(Base String)] +{ + out <- [[Build[Base String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] +} + +Main[] +{ + text <- [[[[[[[[[[[[[Array[1] + ]Append[36u8] + ]Append[194u8] + ]Append[162u8] + ]Append[236u8] + ]Append[130u8] + ]Append[172u8] + ]Append[240u8] + ]Append[164u8] + ]Append[173u8] + ]Append[162u8] + ]Append[194u8] + ]Append[36u8] + ]Append[162u8] + Print[Count UTF8[text, 0, 0]] +} +