Thursday, November 27, 2008

Web Stripper

For my Scramble Cheater program, I needed to get a list of all possible words for a dictionary. I found a good website call Word Snipe, that listed all the words from the game, and orderd them by word length. This resulted in 105,000 words, spread out 50 at a time, on over 2000 pages. I didn't want to have to copy and past all these words by hand, so I created this program to strip them from the website. It took about 35 minutes for it to download all the words, but it was definetly a lot easier than doing it by hand.

Below is the code. Enjoy!


    public partial class Form1 : Form {
public Form1() {
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e) {
int value = 1;
while (value <= 105351) {
ParseResponse(GetResponse("http://wordsnipe.com/results.php?start=" + value + "&PHPSESSID=1af1f78a57145ebc9c14fd4441fcf077"));
value += 50;

}
}

private void ParseResponse(string htmlResponse) {
int index = 0, endIndex;
string startDiv = "<div class=\"result\">";
int startDivLength = startDiv.Length;

for (int i = 0; i < 50; i++) {
index = htmlResponse.IndexOf(startDiv, index);
index = htmlResponse.IndexOf(">", index + startDivLength) + 1;
endIndex = htmlResponse.IndexOf("</a>", index);
textBox1.AppendText(string.Format("AllWords.Add(\"{0}\");{1}", htmlResponse.Substring(index, endIndex - index), Environment.NewLine));
index = endIndex;
}
}

private string GetResponse(string urlAddress) {
Uri uri = new Uri(urlAddress);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Method = "GET";
string result = string.Empty;
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) {
using (Stream responseStream = response.GetResponseStream()) {
using (StreamReader readStream = new StreamReader(responseStream, Encoding.UTF8)) {
result = readStream.ReadToEnd();
}
}
}
return result;
}
}



No comments: