diff --git a/src/DocumentLibrary/Document.py b/src/DocumentLibrary/Document.py
index b039905..446c26e 100644
--- a/src/DocumentLibrary/Document.py
+++ b/src/DocumentLibrary/Document.py
@@ -323,7 +323,7 @@
         for p in self.propertyMap():
             if p.get('type', '') in ['string', 'text'] and not p.get('hidden', 0):
                 v = self.getProperty(p['id'])
-                if v: r.append(unicode(v, 'latin-1', 'ignore'))
+                if v: r.append(v)
 
         # Add the titles of the topics assigned to this document
         for t in self.topicMap():
diff --git a/src/DocumentLibrary/DocumentStore.py b/src/DocumentLibrary/DocumentStore.py
index 48a5b65..09858bc 100644
--- a/src/DocumentLibrary/DocumentStore.py
+++ b/src/DocumentLibrary/DocumentStore.py
@@ -390,7 +390,7 @@
         """Deletes a document from the document store"""
 
         document=self.query(id=REQUEST.id)[0]
-        topics=document.topics
+        topics=getattr(document, 'topics', [])
 
         self.manage_delObjects(REQUEST.id)
         self.afterDelete(topics)
diff --git a/src/DocumentLibrary/etools.py b/src/DocumentLibrary/etools.py
index 7d41904..8bb1afc 100755
--- a/src/DocumentLibrary/etools.py
+++ b/src/DocumentLibrary/etools.py
@@ -6,7 +6,7 @@
 import os.path
 import re
 import sha
-from OFS.Image import File
+#from OFS.Image import File
 import sys
 
 search_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
@@ -14,25 +14,37 @@
 tool = "zope-document-library"
 email = "alex@floop.org.uk"
 
-def getMatches(title, authors, date):
+def getMatches(title, authors, date, maxtitlewords=0, filterauthor=0):
     """
       title is the title of the publication,
       authors is a list of surnames (optionally append initials)
       date is the publish date in the form YYYY/MM/DD (MM+DD optional)
+      if maxtitlewords is not 0, only the this number of the longest words
+      in the title will be used in the search
      """
     
-    results = []
     terms = []
     if (title != None) and (title != ''):
-        terms.append(title) # should use [ti], but doesn't work as expected
+        if maxtitlewords == 0:
+            terms.append(title) # should use [ti], but doesn't work as expected
+        else:
+            longwords=title.split()
+            longwords.sort(lambda x, y: len(y) - len(x))
+            for word in longwords[:maxtitlewords]:
+                terms.append(word + '[ti]')
     if (authors != None):
         for author in authors:
             if (author != ''):
                 terms.append(author + '[au]')
+        if len(authors) > 0:
+            first_author = authors[0]
+        else:
+            first_author = None
     if (date != None):
         terms.append(date + '[dp]')
 
     search_term = string.join(terms, ' AND ')
+    print search_term
     params = urllib.urlencode({
         'db': 'pubmed',
         'term': search_term,
@@ -45,13 +57,18 @@
     ids = []
     for id_node in dom.getElementsByTagName('Id'):
         ids.append(id_node.firstChild.data)
-#    webenv = dom.getElementsByTagName('WebEnv')[0].firstChild.data
+    return getIds(ids, filterauthor, first_author)
+
+def getIds(ids, filterauthor=0, first_author=None):
+    results = []
     params = urllib.urlencode({
         'db': 'pubmed',
         'id': string.join(ids,','),
         'report': 'xml',
         'mode': 'text',
-        'rettype': 'abstract'
+        'rettype': 'abstract',
+        'retmax': '100',
+        'dispmax': '100'
         })
     result = urllib.urlopen(fetch_url, params)
 #    result = open('test2.xml')
@@ -67,9 +84,19 @@
         author_nodes = article.getElementsByTagName('Author')
         authors = []
         for author_node in author_nodes:
-            author = author_node.getElementsByTagName('LastName')[0].firstChild.data
-            author = author + " " + author_node.getElementsByTagName('Initials')[0].firstChild.data
-            authors.append(author)
+            try:
+                author = author_node.getElementsByTagName('LastName')[0].firstChild.data
+                author = author + " " + author_node.getElementsByTagName('Initials')[0].firstChild.data
+                authors.append(author)
+            except:
+                pass
+        if (filterauthor != 0) and (first_author != None): # first author in search must be first author in returned list
+            if len(authors) < 0:
+                continue
+            lc_author = authors[0].lower()
+            if lc_author.find(first_author.lower()) == -1:
+                continue
+            
         article_info['authors'] = authors
         pubdate_nodes = article.getElementsByTagName('PubDate')
         date = None
@@ -92,15 +119,38 @@
             article_info['abstract'] = article.getElementsByTagName('AbstractText')[0].firstChild.data
         except:
             article_info['abstract'] = ''
+        try:
+            article_info['journal'] = pm_article.getElementsByTagName('MedlineTA')[0].firstChild.data
+        except:
+            article_info['journal'] = ''
+        try:
+            article_info['pages'] = article.getElementsByTagName('MedlinePgn')[0].firstChild.data
+        except:
+            article_info['pages'] = ''
+        try:
+            article_info['volume'] = article.getElementsByTagName('Volume')[0].firstChild.data
+        except:
+            article_info['volume'] = ''
+        try:
+            article_info['issue'] = article.getElementsByTagName('Issue')[0].firstChild.data
+        except:
+            article_info['issue'] = ''
         results.append(article_info)
                 
     return results
 
 #print getMatches('Molecular evolution of CXC chemokines and receptors',
-#                 ['Shields'], None)
+#                 ['Shields'], None, 2)
 
-print getMatches('Regulation of CD27 Expression on Subsets of Mature T-Lymphocytes',
-                 ['Hintzen'], None)
+#print getMatches('Regulation of CD27 Expression on Subsets of Mature T-Lymphocytes',
+#                 ['Hintzen'], None, 2)
+#print getMatches('IL12 therapy and cytokine production by PBMC in chronic Hepatitis C',
+#                 ['Berg'], '2000')
+#print getMatches('cyclic regulation of CD45 isoform expression in a long term human CD4posiCD45RAposi T cell line',
+#                 ['Rothstein'], '1991', 1)
+matches = getMatches(None, ['Chang'], '2002', 0)
+print matches
+print len(matches)
 
 file_re = re.compile('([^0-9]*)\s*([0-9]*)\s*(.*)(\.pdf|\.doc)')
 
@@ -126,8 +176,8 @@
         doc_author = mo.group(1)
         doc_year = mo.group(2)
         doc_title = mo.group(3)
-        if doc_title == '':
-            doc_title = os.path.split(filename)[1]
+#        if doc_title == '':
+#            doc_title = os.path.split(filename)[1]
         doc_ext = mo.group(4)
     file = open(filename)
     hash = sha.new(file.read())