Monday, October 03, 2005

getInnerText (JavaScript)

This function gets the inner text of the supplied node. It is recursive, so includes inner text of any child nodes. <br /> tags will be converted to linebreaks. Whitespace can be optionally excluded.

Should work in any W3C DOM level 1 compliant browser.

Simple to use - just do:

myNode = document.getElementById("myNode");
// include whitespace
var text = getInnerText(myNode);
var textNoWhiteSpace = getInnerText(myNode,true);

And the script itself:

function getInnerText(node,ignorewhitespace)
{
 var text = "";
 // if the node has children, loop through them
 if(node.hasChildNodes())
 {
  var children = node.childNodes;
  for(var i=0; i<children.length; i++)
  {
   // if node is a text node append it
   if(children[i].nodeName == "#text")
   {
    if(ignorewhitespace)
    {
     if(!/^\s+$/.test(children[i].nodeValue))
     {
      text = text.concat(children[i].nodeValue);
     }
    }
    else
    {
     text = text.concat(children[i].nodeValue);
    }
   }
   // if node is a line break append \n
   else if(children[i].nodeName == "BR")
   {
    text = text.concat("\n");
   }
   // otherwise call this function again to get the text
   else
   {
    text = text.concat(getInnerText(children[i]));
   }
  }
 }
 // it has no children, so get the text
 else
 {
  // if node is a text node append it
  if(node.nodeName == "#text")
  {
   text = text.concat(node.nodeValue);
  }
  // if node is a line break append \n
  else if(node.nodeName == "BR")
  {
   text = text.concat("\n");
  }
 }
 return text;
}

Tags: ,

1 comment:

Anonymous said...

Great job,
It works pretty well in a majority of browsers.

Thanks,
Vinh