Thursday, November 04, 2010

How to extract data from MS Word dropdown fields

When converting a Word form to a web form, I was looking for a way to extract the items of the dropdown fields found in the Word document to avoid typing them manually in my web app. I found that you can access the field definitions from C# by using the Word  Interop COM library. The following is a program that extracts each item in the dropdown boxes and prints them to the screen:

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using Microsoft.Office.Interop.Word;

 

namespace WordDataExtractTest

{

    class Program

    {

        static void Main(string[] args)

        {

            Application wordapp = new Application();

            var strDocName = "doc_file_with_form.doc";

            var doc = wordapp.Documents.Open(strDocName);

            foreach( FormField form_field in doc.FormFields)

            {

                if (form_field.Type == WdFieldType.wdFieldFormDropDown)

                {

                    AddLine(form_field.Name);

                    foreach (ListEntry entry in form_field.DropDown.ListEntries)

                    {

                        if (!string.IsNullOrWhiteSpace(entry.Name)

                        {

                            AddLine(" ,"+ entry.Name);

                        }

                    }

                }

            }

            Console.In.ReadLine();

        }

 

        public static void AddLine(string line)

        {

            Console.Out.WriteLine(line);

        }

    }

}