Mercurial > repos > rhope
view string.rhope @ 75:0083b2f7b3c7
Partially working implementation of List. Modified build scripts to allow use of other compilers. Fixed some bugs involving method implementations on different types returning different numbers of outputs. Added Fold to the 'builtins' in the comipler.
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Tue, 06 Jul 2010 07:52:59 -0400 |
parents | f7bcf3db1342 |
children | a748300a4143 |
line wrap: on
line source
UTF8 Expect[num,arr,index,count,consumed:out] { byte <- [arr]Index[index] { If[[128u8]>[byte]] { //Error: ASCII byte when we were expecting part of a mutlibyte sequence //treat each byte as a separate character ncount <- [1i32]+[[count]+[consumed]] }{ If[[192u8]>[byte]] { If[[num]=[1]] { //Sequence is complete count as single character ncount <- [1i32]+[count] }{ out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] } }{ //Error: too high to be a continuation byte ncount <- [1i32]+[[count]+[consumed]] } } }{ //Error: string ended in the middle of a multi-byte sequence out <- [count]+[consumed] } Val[ncount] { [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } } Count UTF8[arr,index,count:out] { byte <- [arr]Index[index] If[[128u8]>[byte]] { ncount <- [1i32]+[count] } { If[[192u8]>[byte]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character ncount <- [1i32]+[count] }{ If[[224u8]>[byte]] { out <- UTF8 Expect[1, arr, [index]+[1], count, 1] }{ If[[240u8]>[byte]] { out <- UTF8 Expect[2, arr, [index]+[1], count, 1] }{ If[[245u8]>[byte]] { out <- UTF8 Expect[3, arr, [index]+[1], count, 1] }{ //Error: Out of range of Unicode standard //treat as a single character ncount <- [1i32]+[count] } } } } } [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } Blueprint String { Buffer Length(Int32,Naked) } String[in(Array):out(String)] { out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] } Print@String[string:out] { //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]] { out <- write[1i32, [Array[]]Append[10u8], 1i64] } } Get Char[:out] { read[0, [Array[]]Set[0, 0u8], 1i64] {} { out <- String[~] } } _String to Int32[current,index,array:out] { char <- [array]Index[index] { If[[char]<[48u8]] { out <- Val[current] }{ If[[char]>[57u8]] { out <- Val[current] }{ out <- _String to Int32[[[current]*[10i32]]+[Int32[[char]-[48u8]]], [index]+[1], array] } } }{ out <- Val[current] } } Int32@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i32]-[_String to Int32[0i32, 1, buf]] }{ out <- _String to Int32[0i32, 0, buf] } }{ out <- 0i32 } } Flatten@String[string:out] { out <- string } Slice@String[string,slicepoint:left,right] { //TODO: Handle invalid slicepoints left <- String Slice[string, 0i32, slicepoint] right <- String Slice[string, slicepoint, [[string]Length >>]-[slicepoint]] } Byte@String[string,index:out,invalid] { out,invalid <- [[string]Buffer >>]Index[index] } Length@String[string:out] { out <- [string]Length >> } _=String[left,right,index:out] { [left]Byte[index] { ,out <- If[[~]=[[right]Byte[index]]] { out <- _=String[left,right,[index]+[1]] } }{ out <- Yes } } =@String[left,right:out] { ,out <- If[[[left]Length] = [[right]Length]] { out <- _=String[left,right,0] } } Blueprint String Slice { Source Offset(Int32,Naked) Length(Int32,Naked) } String Slice[source,offset,length:out(String Slice)] { out <- [[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length] }