I've been reading that Vista adds it's tagging data directly to the file (here and here). I tagged some JPEGs and then opened them up in a text file to see what the XMP data would look like. Then I decided to write some quick and dirty code to read the meta data Windows Vista adds to photos. The code opens the file and reads it line-by-line until it reaches the XMP section. Then it pulls out the Title, Subject, Comments, Rating, and Tags that Vista Photo Gallery adds.
public class VistaMetaExtractor { public static VistaMetaInfo GetMetaInfo(string filename) { VistaMetaInfo metaInfo = null; // Find XMP data in file (it might be faster to read the enter file into memory for files under 10MB) string xmpData = FindStringInFile(filename, "<xmp:xmpmeta", "</xmp:xmpmeta>"); if (xmpData != string.Empty) { // change namespace definitions (i.e. xmlns:prefix##="http://www.w3.org/2000/xmlns/" ) xmpData = System.Text.RegularExpressions.Regex.Replace(xmpData, @"xmlns:prefix(?:(\d{1,3}))=""http://www.w3.org/2000/xmlns/""", @"xmlns:prefix$1=""http://randomurl.org"""); XmlDocument xmlDocument = new XmlDocument(); xmlDocument.LoadXml(@"<?xml version=""1.0""?>" + xmpData); // add namespaces XmlNamespaceManager nsMan = new XmlNamespaceManager(xmlDocument.NameTable); nsMan.AddNamespace("xmp", "http://ns.adobe.com/xap/1.0/"); nsMan.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); nsMan.AddNamespace("MicrosoftPhoto", "http://ns.microsoft.com/photo/1.0"); nsMan.AddNamespace("dc", "http://purl.org/dc/elements/1.1/"); nsMan.AddNamespace("tiff", "http://ns.adobe.com/tiff/1.0/"); nsMan.AddNamespace("exif", "http://ns.adobe.com/exif/1.0/"); // 2. fill in details from XMP data metaInfo = new VistaMetaInfo(); metaInfo.FileInfo = new FileInfo(filename); // TAGS XmlNodeList tagNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:subject/rdf:Bag/rdf:li", nsMan); metaInfo.Tags = new string[tagNodes.Count]; for (int i = 0; i < tagNodes.Count; i++) { metaInfo.Tags[ i ] = tagNodes[ i ].InnerText; } // TITLE XmlNodeList titleNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:title/rdf:Alt/rdf:li", nsMan); metaInfo.Title = (titleNodes.Count > 0) ? titleNodes[0].InnerText : ""; // SUBJECT XmlNodeList subjectNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:description/rdf:Alt/rdf:li", nsMan); metaInfo.Subject = (subjectNodes.Count > 0) ? subjectNodes[0].InnerText : ""; // COMMENTS XmlNodeList commentNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/exif:UserComment/rdf:Alt/rdf:li", nsMan); metaInfo.Comments = (commentNodes.Count > 0) ? commentNodes[0].InnerText : ""; // RATING XmlNodeList vistaRating = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/MicrosoftPhoto:Rating", nsMan); metaInfo.VistaRating = (vistaRating.Count > 0) ? Convert.ToInt32(vistaRating[0].InnerText) : 0; // STARS XmlNodeList ratingNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/xmp:Rating", nsMan); metaInfo.Rating = (ratingNodes.Count > 0) ? Convert.ToInt32(ratingNodes[0].InnerText) : 0; } return metaInfo; } private static string FindStringInFile(string filename, string startString, string endString) { string output = string.Empty; bool inString = false; bool done = false; StreamReader sr = new StreamReader(filename); while (sr.Peek() >= 0 && !done) { string line = sr.ReadLine(); if (inString) { // check for final int endIndex = line.IndexOf(endString); if (endIndex > -1) { output += line.Substring(0, endIndex + endString.Length); done = true; } else { // keep appending if not at the end output += line; } } else { // check for start int startIndex = line.IndexOf(startString); if (startIndex > -1) { output += line.Substring(startIndex); inString = true; } } } sr.Close(); return output; } } public class VistaMetaInfo { private FileInfo _fileInfo; private int _rating; private int _vistaRating; private string _title; private string _comments; private string _subject; private string[] _tags; public FileInfo FileInfo { get { return _fileInfo; } set { _fileInfo = value; } } public int Rating { get { return _rating; } set { _rating = value; } } public int VistaRating { get { return _vistaRating; } set { _vistaRating = value; } } public string Title { get { return _title; } set { _title = value; } } public string Subject { get { return _subject; } set { _subject = value; } } public string Comments { get { return _comments; } set { _comments = value; } } public string[] Tags { get { return _tags; } set { _tags = value; } } }
It works pretty well and seems fast enough to make a simple gallery application..
You should use the Windows Imaging Component APIs to access the metadata. They are much easier to use. Here is a link to more information:
http://windowssdk.msdn.microsoft.com/en-us/library/ms735422.aspx
Adam, thanks for the tip. I was working on a quick web-based photo gallery in ASP.NET. I would like to use the WIC APIs, but it looks like it’d be harder to use the them to do basic web tasks. I’ll check it out thought. Thanks!
Hello John,
there is bug in your implementation. This Code only works if there is no line feed between the start / end tag.
It’s better to load the whole file into a buffer and walking through them.
Best regards…
@Daniel, thanks for the tip about the line feed. I don’t however agree that you should load the entire file. It’s better to only load what you need. The XMP data is in the first few kilobytes and images these days are many megabytes. Loading all that image data would be a massive memory and resource drain. Still though, reading the file as text and relying on line feeds was probably a bad idea. Using a buffer and converting to text on the fly would be better. Thanks again for the tip!
I have used your code and have a small fix so it works whether the xmp is on one line or formatted:
[code]
private static string FindStringInFile(string filename, string startString, string endString)
{
string output = string.Empty;
int startIndex = 0;
int endIndex = 0;
int length = 0;
bool start = false;
bool done = false;
StreamReader sr = new StreamReader(filename);
while (sr.Peek() >= 0 && !done)
{
string line = sr.ReadLine();
// what to do. read line by line.
// if start is in current line, get position
// if end is in current line, get end position
// output from start to end
// if end is not in current line, append output and loop to next line
// check for start
startIndex = line.IndexOf(startString);
if (startIndex > -1)
{
// check for end
start = true;
endIndex = line.IndexOf(endString);
if (endIndex > -1)
{
length = (endIndex – startIndex) + endString.Length;
output += line.Substring(startIndex, length);
// since we have the whole line we do not need to read more
done = true;
}
else
{
output += line.Substring(startIndex);
}
}
else if (start)
{
// here the line is not same as start line but we still
// need to check for end of line.
endIndex = line.IndexOf(endString);
if (endIndex > -1)
{
length = endIndex + endString.Length;
output += line.Substring(0, length);
// since we have the whole line we do not need to read more
done = true;
}
else
{
// you get here if: a: you are not at start, b: end is not in line. put the whole line.
output += line.ToString();
}
}
}
sr.Close();
return output;
}
[/code]
Thanks John and Steve. Create post. John’s code compiled and read the metadata as is, then Steve’s addition worked as well.
Best Regards,
Glenn
Developer of DBGallery: Photo DATAbase System
(http://grrsystems.com/DBGallery)