Mega Code Archive

 
Categories / XML / XSLT StyleSheet
 

Word count

File: Data.xml  <poem>    <author>author 1</author>    <date>1912</date>    <title>Song</title>   <stanza>       <line>line 1</line>       <line>line 2</line>       <line>line 3</line>       <line>line 4</line>    </stanza>    <stanza>       <line>line 5</line>       <line>line 6</line>       <line>line 7</line>       <line>line 8</line>    </stanza>    <stanza>       <line>line 9</line>       <line>line 10</line>       <line>line 11</line>       <line>line 12</line>    </stanza> </poem> File: Transform.xslt <?xml version="1.0" encoding="iso-8859-1"?> <xsl:stylesheet version="2.0"   xmlns:xsl="http://www.w3.org/1999/XSL/Transform">   <xsl:output method="xml" indent="yes" />   <xsl:template match="/">     <wordcount>       <xsl:for-each-group group-by="."         select="           for $w in tokenize(string(.), '\W+') return lower-case($w)">         <xsl:sort select="count(current-group())"           order="descending" />         <word word="{current-grouping-key()}"           frequency="{count(current-group())}" />       </xsl:for-each-group>     </wordcount>   </xsl:template> </xsl:stylesheet> Output: <?xml version="1.0" encoding="UTF-8"?> <wordcount>    <word word="line" frequency="12"/>    <word word="" frequency="2"/>    <word word="1" frequency="2"/>    <word word="author" frequency="1"/>    <word word="1912" frequency="1"/>    <word word="song" frequency="1"/>    <word word="2" frequency="1"/>    <word word="3" frequency="1"/>    <word word="4" frequency="1"/>    <word word="5" frequency="1"/>    <word word="6" frequency="1"/>    <word word="7" frequency="1"/>    <word word="8" frequency="1"/>    <word word="9" frequency="1"/>    <word word="10" frequency="1"/>    <word word="11" frequency="1"/>    <word word="12" frequency="1"/> </wordcount>