Mercurial > repos > rhope
view string.rhope @ 77:a748300a4143
Some untested progress on String
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Thu, 08 Jul 2010 09:57:16 -0400 |
parents | f7bcf3db1342 |
children | 4d5ea487f810 |
line wrap: on
line source
UTF8 Expect[num,arr,index,count,consumed:out] { byte <- [arr]Index[index] { If[[128u8]>[byte]] { //Error: ASCII byte when we were expecting part of a mutlibyte sequence //treat each byte as a separate character ncount <- [1i32]+[[count]+[consumed]] }{ If[[192u8]>[byte]] { If[[num]=[1]] { //Sequence is complete count as single character ncount <- [1i32]+[count] }{ out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] } }{ //Error: too high to be a continuation byte ncount <- [1i32]+[[count]+[consumed]] } } }{ //Error: string ended in the middle of a multi-byte sequence out <- [count]+[consumed] } Val[ncount] { [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } } Count UTF8[arr,index,count:out] { byte <- [arr]Index[index] If[[128u8]>[byte]] { ncount <- [1i32]+[count] } { If[[192u8]>[byte]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character ncount <- [1i32]+[count] }{ If[[224u8]>[byte]] { out <- UTF8 Expect[1, arr, [index]+[1], count, 1] }{ If[[240u8]>[byte]] { out <- UTF8 Expect[2, arr, [index]+[1], count, 1] }{ If[[245u8]>[byte]] { out <- UTF8 Expect[3, arr, [index]+[1], count, 1] }{ //Error: Out of range of Unicode standard //treat as a single character ncount <- [1i32]+[count] } } } } } [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } Blueprint String { Buffer Length(Int32,Naked) } String@Array[in:out(String)] { out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] } Print@String[string:out] { //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]] { out <- write[1i32, [Array[]]Append[10u8], 1i64] } } Get Char[:out] { read[0, [Array[]]Set[0, 0u8], 1i64] {} { out <- String[~] } } _String to Int32[current,index,array:out] { char <- [array]Index[index] { If[[char]<[48u8]] { out <- Val[current] }{ If[[char]>[57u8]] { out <- Val[current] }{ out <- _String to Int32[[[current]*[10i32]]+[Int32[[char]-[48u8]]], [index]+[1], array] } } }{ out <- Val[current] } } Int32@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i32]-[_String to Int32[0i32, 1, buf]] }{ out <- _String to Int32[0i32, 0, buf] } }{ out <- 0i32 } } Flatten@String[string:out] { out <- string } _CPOff to BOff[buff,cur,expected:outcur,outboff] { If[expected] { outcur <- cur outboff <- 0i32 }{ err <- If[[byte]>[192u8]] {} { err <- If[[byte]<[128u8]] {} { outcur <- _CPOff to BOff[buff, [cur]+[1i32], [expected]-[1i32]] {} { outboff <- [~]+[1i32] } } } Val[err] { outcur <- [cur]+[1i32] outboff <- 1i32 } } } CPOff to BOff[buff,cur,boff,cpoff:out] { If[[cur]=[cpoff]] { out <- boff }{ byte <- [buff]Index[cur] If[[byte] < [128u8]] { nboff <- [bof]+[1i32] ncur <- [cur]+[1i32] }{ If[[byte]<[192u8]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character nboff <- [bof]+[1i32] ncur <- [cur]+[1i32] }{ If[[byte]<[224u8]] { expect <- 1i32 }{ If[[byte]<[240u8]] { expect <- 2i32 }{ If[[byte]<[245u8]] { expect <- 3i32 }{ //Error nboff <- [bof]+[1i32] ncur <- [cur]+[1i32] } } } Val[expect] { ncur <- _CPOff to BOff[buff, [cur]+[1i32], expect] {} { nboff <- [1i32]+[~] } } } } out <- CPOff to BOff[buff, ncur, cpoff, nboff] } } Slice@String[string,slicepoint:left,right] { //TODO: Handle invalid slicepoints sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, slicepoint] left <- String Slice[string, 0i32, slicepoint, sliceoffset] right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]] } Byte@String[string,index:out,invalid] { out,invalid <- [[string]Buffer >>]Index[index] } Length@String[string:out] { out <- [string]Length >> } _=String[left,right,index:out] { [left]Byte[index] { ,out <- If[[~]=[[right]Byte[index]]] { out <- _=String[left,right,[index]+[1]] } }{ out <- Yes } } Eq String[left,right:out] { ,out <- If[[[left]Length] = [[right]Length]] { out <- _=String[left,right,0] } } =@String[left,right:out] { out <- Eq String[left,right] } Byte Length@String[string:out] { out <- [[string]Buffer >>]Length } Append@String[left,right:out] { out <- String Cat[left,right] } Blueprint String Slice { Source Offset(Int32,Naked) Length(Int32,Naked) ByteLen(Int32,Naked) } String Slice[source,offset,length,bytelen:out(String Slice)] { out <- [[[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length]]ByteLen <<[bytelen] } Byte@String Slice[string,index:out,invalid] { ,invalid <- If[[index]<[[string]ByteLen >>]] { out,invalid <- [[string]Source >>]Byte[[index]+[[string]Offset >>]] } } Byte Length@String Slice[string:out] { out <- [string]ByteLen >> } =@String Slice[left,right:out] { out <- Eq String[left,right] } _Flatten@String[string,dest,offset,count:out] { If[count] { out <- [string]_Flatten[[dest]Append[ [[string]Buffer >>]Index[offset] ], [offset]+[1i32], [count]-[1i32]] }{ out <- dest } } Flatten@String[string:out] { out <- string } _Flatten@String Slice[string,dest,offset,count:out] { out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count] } Flatten@String Slice[string:out] { out <- String[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ] } Append@String Slice[left,right:out] { out <- String Cat[left,right] } Blueprint String Cat { Left Right Length ByteLen } String Cat[left,right:out] { out <- [[[[Build[String Cat()] ]Left <<[left] ]Right <<[right] ]Length <<[ [[left]Length]+[[right]Length] ] ]ByteLen <<[ [[left]Byte Length]+[[right]Byte Length] ] } Append@String Cat[left,right:out] { out <- String Cat[left,right] } Byte@String Cat[string,index:out,invalid] { leftlen <- [[string]Left >>]Byte Length If[[index]<[leftlen]] { out,invalid <- [[string]Left >>]Byte[index] }{ rindex <- [index]-[leftlen] ,invalid <- If[[rindex]<[[[string]Right >>]Byte Length]] { out,invalid <- [[string]Right >>]Byte[rindex] } } } Byte Length@String Cat[string:out] { out <- [string]ByteLen >> } _Flatten@String Cat[string,dest,offset,count:out] { [string]Left >> }