Sunday, January 22, 2006

Free Computer Textbooks

Textbook Revolution is a site that keeps track of free books in electronic form and has a section for computer / technology ones.

Electronic books, while they can be useful, are not a substitute for the real thing (which are easier to read and can be read anywhere).


Tags: ,

Friday, January 20, 2006

FireBug (Firefox Extension for Web Developers)

What do you get if you cross the DOM Inspector with the JavaScript console and a JavaScript interpreter? A very useful extension for debugging/analysing web pages in Firefox called FireBug. Features include (more details on each feature can be found on the site):

  • Log DOM Elements With Your Mouse
  • Log Objects From The Command Line
  • Log Objects From Your Web Page Scripts
  • XMLHttpRequest Spy
  • Contextual Error Display
  • Error Status Bar Indicator
  • Error Filtering

With the addition of CSS errors to the JavaScript console (which can't be filtered out), this is a very welcome extension with its error filtering.


Tags: , ,

Sunday, January 15, 2006

Clean Word Html (command line tool)

This command line tool is based on the code from Cleaning Word's Nasty HTML, and has been backported to .NET 1.1. To compile, download Snippet Compiler (the version for .NET 1.1). Then do File > New > Default.cs and clear the contents. Paste in the following code, then click Build > Build Current To File and call it CleanWordHtml. Open the Command Prompt at the location it was saved to and type CleanWordHtml for help.

Edit (18-Jan-06): remove u tags. Not all empty tags were removed. Does not remove empty table cells (as they may be used for column/row layout). Quoted class attributes are removed.

Edit (2-Feb-06): As a side effect of removing u tags, ul tags where also removed. So they are no longer removed. When reading in text from a file, line breaks were not read in, but now they are. You can now drag files onto the application (rather than resorting to the command line).

CleanWordHtml.cs

using System;
using System.Reflection;
using System.Collections.Specialized;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

[assembly: AssemblyTitle("CleanWordHtml")]
[assembly: AssemblyDescription("Cleans up HTML generated by Microsoft Word")]
[assembly: AssemblyVersion("1.0.1.*")]

public class CleanUp
{
 static bool Mso = false;
 static bool IgnoreSpans = false;
 static bool IgnoreDivs = false;
 
 static void Main(string[] args)
 {
  string help = "Cleans up HTML generated by Microsoft Word" + Environment.NewLine + Environment.NewLine
   + "Usage:" + Environment.NewLine
   + "------" + Environment.NewLine
   + "CleanWordHtml \"path to file\"" + Environment.NewLine
   + "CleanWordHtml -path \"path to file\"" + Environment.NewLine + Environment.NewLine
   + "  Other options:" + Environment.NewLine
   + "   -mso (remove only classes generated by word)" + Environment.NewLine
   + "   -ignorespans (don't remove span tags)" + Environment.NewLine
   + "   -ignoredivs (don't remove div tags)";
  
  string filepath = string.Empty;
  if ((args.Length == 0 || IsNullOrEmpty(args[0]))) 
  {
   Console.WriteLine(help);
   return;
  }
  
  if(args.Length == 1)
  {
   filepath = args[0];
  }
  else
  {
   for(int i=0; i<args.Length; i++)
   {
    if(args[i].ToLower() == "-path")
    {
     filepath = args[i+1];
    }
    if(args[i].ToLower() == "-mso")
    {
     Mso = true;
    }
    if(args[i].ToLower() == "-ignorespans")
    {
     IgnoreSpans = true;
    }
    if(args[i].ToLower() == "-ignoredivs")
    {
     IgnoreDivs = true;
    }
   }
  }
  if(IsNullOrEmpty(filepath))
  {
   Console.WriteLine(help);
   return;
  }
  if (Path.GetFileName(filepath) == filepath)
  {
   filepath = Path.Combine(Environment.CurrentDirectory, filepath);
  }
  if (!File.Exists(filepath))
  {
   Console.WriteLine("File '" + filepath + "' doesn't exist.");
   return;
  }
  string html = ReadAllText(filepath);
  Console.WriteLine("Input html is " + html.Length + " chars");
  html = CleanWordHtml(html);
  html = FixEntities(html);
  filepath = Path.Combine(Path.GetDirectoryName(filepath), Path.GetFileNameWithoutExtension(filepath) + ".modified" + Path.GetExtension(filepath));
  WriteAllText(filepath, html);
  Console.WriteLine("Cleaned html is " + html.Length + " chars. Saved to " + filepath);
 }
 
 static string CleanWordHtml(string html)
 {
  StringCollection sc = new StringCollection();
  if(!IgnoreSpans)
  {
   sc.Add(@"<(/?span|!\[)[^>]*?>");
  }
  if(!IgnoreDivs)
  {
   sc.Add(@"<(/?div|!\[)[^>]*?>");
  }
  if(!Mso)
  {
   // Get rid of classes
   sc.Add(@"\s?class=[""']?\w+[""']?");
  }
  else
  {
   // Get rid of office classes
   sc.Add(@"\s?class=[""']?Mso\w+[""']?");
  }
  // get rid of unnecessary tag spans (comments and title)
  sc.Add(@"<!--(\w|\W)+?-->");
  sc.Add(@"<title>(\w|\W)+?</title>");
  // get rid of inline style
  sc.Add(@"\s?style=[""']?\w+[""']?");
  // Get rid of unnecessary tags
  sc.Add(@"<(meta|link|/?o:|/?style|/?font|/?st\d|/?head|/?html|body|/?body|!\[)[^>]*?>");
  // Get rid of empty tags (except table cells)
  sc.Add(@"(<[^/][^(th|d)>]*>){1}(&nbsp;)*(</[^>]+>){1}");
  // remove bizarre v: element attached to <img> tag
  sc.Add(@"\s+v:\w+=""[^""]+""");
  // remove extra lines
  sc.Add(@"(" + Environment.NewLine + "){2,}");
  // remove extra spaces
  sc.Add(@"( ){2,}");
  foreach (string s in sc)
  {
   html = Regex.Replace(html, s, "", RegexOptions.IgnoreCase);
  }
  // quote unquoted attributes
  //html = Regex.Replace(html, @"(\w+=)(\w+)(?=[ >])", @"$1""$2""", RegexOptions.IgnoreCase);
  return html;
 }
 
 static string FixEntities(string html)
 {
  NameValueCollection nvc = new NameValueCollection();
  nvc.Add("“", "&ldquo;");
  nvc.Add("”", "&rdquo;");
  nvc.Add("—", "&mdash;");
  foreach (string key in nvc.Keys)
  {
   html = html.Replace(key, nvc[key]);
  }
  return html;
 }
 
 static bool IsNullOrEmpty(string value)
 {
  if (value != null)
  {
   return (value.Length == 0);
  }
  return true;
 }

 static string ReadAllText(string path)
 {
  StringBuilder sb = new StringBuilder();
  using (StreamReader sr = new StreamReader(path)) 
  {
   String line;
   // Read and display lines from the file until the end of 
   // the file is reached.
   while ((line = sr.ReadLine()) != null) 
   {
    sb.Append(line + Environment.NewLine);
   }
  }
  return sb.ToString();
 }
 
 static void WriteAllText(string path, string contents)
 {
  WriteAllText(path, contents, new UTF8Encoding(false, true));
 }
 
 static void WriteAllText(string path, string contents, Encoding encoding)
 {
  using (StreamWriter sw = new StreamWriter(path, false, encoding))
  {
   sw.Write(contents);
  }
 }
}

Friday, January 13, 2006

Free PDF Generators

Adobe Acrobat isn't the only tool able to create PDF documents, there are others, some of them free. Two good ones are:

  • PDFCreator (many features, can be intimidating to less tech-savvy users). Download.
  • PrimoPDF (less features than PDFCreator, but easier to use)

Tags: ,

Programmer's Notepad Forums

The developer of Programmer's Notepad has setup some forums where you can discuss the program, ask for help etc. Uses bbPress, a very light-weight forum software by some of the developers of WordPress (which powers pnotepad.org).


Tags: , ,

Wednesday, January 04, 2006

HyperLinks and UserControls (ASP.NET)

If you have a file (/myapp/page.aspx) which references a user control (/myapp/controls/control.ascx) that contains a HyperLink, you find that the link generated is relative to the control, not the page. For example if the following code was in the control:

<asp:HyperLink Text="Foo" NavigateUrl="bar.aspx" runat="server" />

The following output would be generated:

<a href="controls/bar.aspx">Foo</a>

This may not the desired outcome. You can set the link relative to the server root (by using /myapp/bar.aspx), site root (by using ~/bar.aspx), or even the control itself (../bar.aspx) but if you want to make it relative to the calling page more work is involved. For doing this you can use the Uri class and Request.Url:

<asp:HyperLink id="lnkFoo" Text="Foo" NavigateUrl='<%#(new Uri(Request.Url,"bar.aspx")).AbsolutePath%>' runat="server" />

If the link is in a Repeater (or something else that is bound to data) you do not need the id attribute. However if it is not, you have to add the following to the page load event of the control:

FindControl("lnkFoo").DataBind();

You then end up with a link that works:

<a id="myrepeater_lnkFoo" href="/myapp/bar.aspx">Foo</a>

Tags: ,

Tuesday, January 03, 2006

anchorWrap (JavaScript)

Wraps a node with an anchor (i.e. HyperLink, JavaScript prompt).

// 'node' can be an existing node, or a string (id of node)
// 'anchor' can be a URL string, or a precreated anchor
// 'target' is the target frame to go to, and is optional
function anchorWrap(node,anchor,target)
{
 if(!document.createElement) return;
 var newanchor,parent,sibling;
 if(typeof(node) == "string")
 {
  node = document.getElementById(node);
 }
 if(!node || !node.parentNode) return;
 if(typeof(anchor) == "string")
 {
  newanchor = document.createElement("a");
  newanchor.href = anchor;
 }
 else
 {
  newanchor = anchor;
 }
 if(!newanchor) return;
 // if href is not set (which may be the case when it performs a javascript action), set it to #, so the link is seen
 if(!newanchor.href) newanchor.href = "#";
 // if target is defined, set it
 if(typeof(target) == "string")
 {
  newanchor.target = target;
 }
 // get the sibling and the parent node (so we can insert in the right place)
 sibling = node.nextSibling;
 parent = node.parentNode;
 // add the node to the new anchor
 newanchor.appendChild(node);
 // insert new anchor before sibling, or at the end if there is no sibling
 if(sibling)
 {
  parent.insertBefore(newanchor,sibling);
 }
 else
 {
  parent.appendChild(newanchor);
 }
}

Example uses:

// element with id 'foo', open http://webdevel.blogspot.com in new window
anchorWrap("foo", "http://webdevel.blogspot.com", "_blank");
// element with id 'bar', open http://webdevel.blogspot.com in same window
anchorWrap("bar", "http://webdevel.blogspot.com");
// get element 'baz', style it so it is bold
var baz = document.getElementById("baz");
baz.style.fontWeight = "bold";
// create JavaScript anchor
var jsanchor = document.createElement("a");
jsanchor.onclick = function()
{
 alert("Hello World");
 return false;
}
// wrap 'baz' element with the created JavaScript anchor
anchorWrap(baz, jsanchor);

Tags: ,