view string.rhope @ 75:0083b2f7b3c7

Partially working implementation of List. Modified build scripts to allow use of other compilers. Fixed some bugs involving method implementations on different types returning different numbers of outputs. Added Fold to the 'builtins' in the comipler.
author Mike Pavone <pavone@retrodev.com>
date Tue, 06 Jul 2010 07:52:59 -0400
parents f7bcf3db1342
children a748300a4143
line wrap: on
line source


UTF8 Expect[num,arr,index,count,consumed:out]
{
	byte <- [arr]Index[index]
	{
		If[[128u8]>[byte]]
		{
			//Error: ASCII byte when we were expecting part of a mutlibyte sequence
			//treat each byte as a separate character
			ncount <- [1i32]+[[count]+[consumed]]
		}{
			If[[192u8]>[byte]]
			{
				If[[num]=[1]]
				{
					//Sequence is complete count as single character
					ncount <- [1i32]+[count]
				}{
					out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
				}
			}{
				//Error: too high to be a continuation byte
				ncount <- [1i32]+[[count]+[consumed]]
			}
		}
	}{
		//Error: string ended in the middle of a multi-byte sequence
		out <- [count]+[consumed]
	}
	Val[ncount]
	{
		[arr]Next[index]
		{
			out <- Count UTF8[arr, ~, ncount]
		}{
			out <- Val[ncount]
		}
	}
}

Count UTF8[arr,index,count:out]
{
	byte <- [arr]Index[index]
	If[[128u8]>[byte]]
	{ ncount <- [1i32]+[count] }
	{
		If[[192u8]>[byte]]
		{
			//Error: Encoding for 2nd,3rd or 4th byte of sequence
			//treat as a single character
			ncount <- [1i32]+[count]
		}{
			If[[224u8]>[byte]]
			{
				out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
			}{
				If[[240u8]>[byte]]
				{
					out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
				}{
					If[[245u8]>[byte]]
					{
						out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
					}{
						//Error: Out of range of Unicode standard
						//treat as a single character
						ncount <- [1i32]+[count]
					}
				}
			}
		}
	}
	[arr]Next[index]
	{
		out <- Count UTF8[arr, ~, ncount]
	}{
		out <- Val[ncount]
	}
}

Blueprint String
{
	Buffer
	Length(Int32,Naked)
}

String[in(Array):out(String)]
{
	out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]]
}

Print@String[string:out]
{	
	//TODO: Sanitize string (remove terminal escapes and replace invalid UTF)
	write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]]
	{ out <- write[1i32, [Array[]]Append[10u8], 1i64] }
}

Get Char[:out]
{
	read[0, [Array[]]Set[0, 0u8], 1i64]
	{}
	{ out <- String[~] }
}

_String to Int32[current,index,array:out]
{
	char <- [array]Index[index]
	{
		If[[char]<[48u8]]
		{
			out <- Val[current]
		}{
			If[[char]>[57u8]]
			{
				out <- Val[current]
			}{
				out <- _String to Int32[[[current]*[10i32]]+[Int32[[char]-[48u8]]], [index]+[1], array]
			}
		}
		
	}{
		out <- Val[current]
	}
}

Int32@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i32]-[_String to Int32[0i32, 1, buf]]
		}{
			out <- _String to Int32[0i32, 0, buf]
		}
	}{
		out <- 0i32
	}
	
}

Flatten@String[string:out]
{
	out <- string
}

Slice@String[string,slicepoint:left,right]
{
	//TODO: Handle invalid slicepoints
	left <- String Slice[string, 0i32, slicepoint]
	right <- String Slice[string, slicepoint, [[string]Length >>]-[slicepoint]]
}

Byte@String[string,index:out,invalid]
{
	out,invalid <- [[string]Buffer >>]Index[index]
}

Length@String[string:out]
{
	out <- [string]Length >>
}

_=String[left,right,index:out]
{
	[left]Byte[index]
	{
		,out <- If[[~]=[[right]Byte[index]]]
		{
			out <- _=String[left,right,[index]+[1]]
		}
	}{
		out <- Yes
	}
}

=@String[left,right:out]
{
	,out <- If[[[left]Length] = [[right]Length]] 
	{
		out <- _=String[left,right,0]
	}
}

Blueprint String Slice
{
	Source
	Offset(Int32,Naked)	
	Length(Int32,Naked)
}

String Slice[source,offset,length:out(String Slice)]
{
	out <- [[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length]
}