3969: [NarWeb] Narrated Web Site: Newlines and white space are not preserved in note text

Patch of Tim Lyons and  Benny Malengier


svn: r15923
This commit is contained in:
Benny Malengier 2010-09-23 21:03:15 +00:00
parent f748668c89
commit 0c5b792c76
8 changed files with 340 additions and 116 deletions

View File

@ -3,7 +3,7 @@
#
# Copyright (C) 2000-2006 Donald N. Allingham
# Copyright (C) 2007-2009 Brian G. Matherly
# Copyright (C) 2009 Benny Malengier <benny.malengier@gramps-project.org>
# Copyright (C) 2009-2010 Benny Malengier <benny.malengier@gramps-project.org>
# Copyright (C) 2010 Peter Landgren
#
# This program is free software; you can redistribute it and/or modify
@ -63,28 +63,35 @@ _WIDTH_IN_CHARS = 72
def reformat_para(para='',left=0,right=72,just=LEFT,right_pad=0,first=0):
if not para.strip():
return "\n"
words = para.split()
lines = []
line = ''
word = 0
end_words = 0
real_left = left+first
while not end_words:
if len(words[word]) > right-real_left: # Handle very long words
line = words[word]
word +=1
if word >= len(words):
end_words = 1
else: # Compose line of words
while len(line)+len(words[word]) <= right-real_left:
line += words[word]+' '
word += 1
alllines = para.split('\n')
for realline in alllines:
words = realline.split()
line = ''
word = 0
end_words = 0
while not end_words:
if not words:
lines.append("\n")
break
if len(words[word]) > right-real_left: # Handle very long words
line = words[word]
word +=1
if word >= len(words):
end_words = 1
break
lines.append(line)
real_left = left
line = ''
else: # Compose line of words
while len(line)+len(words[word]) <= right-real_left:
line += words[word]+' '
word += 1
if word >= len(words):
end_words = 1
break
lines.append(line)
#first line finished, discard first
real_left = left
line = ''
if just==CENTER:
if right_pad:
return '\n'.join(
@ -375,6 +382,8 @@ class AsciiDoc(BaseDoc,TextDoc):
some way. Eg, a textdoc could remove all tags, or could make sure
a link is clickable. AsciiDoc prints the html without handling it
"""
if contains_html:
return
text = str(styledtext)
if format:
#Preformatted note, keep all white spaces, tabs, LF's
@ -390,8 +399,8 @@ class AsciiDoc(BaseDoc,TextDoc):
else:
for line in text.split('\n\n'):
self.start_paragraph(style_name)
line = line.replace('\n',' ')
line = ' '.join(line.split())
#line = line.replace('\n',' ')
#line = ' '.join(line.split())
self.write_text(line)
self.end_paragraph()

View File

@ -3,8 +3,10 @@
#
# Copyright (C) 2000-2006 Donald N. Allingham
# Copyright (C) 2007-2009 Brian G. Matherly
# Copyright (C) 2009 Benny Malengier <benny.malengier@gramps-project.org>
# Copyright (C) 2009-2010 Benny Malengier <benny.malengier@gramps-project.org>
# Copyright (C) 2010 Peter Landgren
# Copyright (C) 2010 Tim Lyons
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -47,7 +49,7 @@ from gui.utils import open_file_with_default_application
import ImgManip
import const
from gen.plug.docgen import BaseDoc, TextDoc, FONT_SANS_SERIF
from libhtmlbackend import HtmlBackend
from libhtmlbackend import HtmlBackend, process_spaces
from libhtml import Html
#------------------------------------------------------------------------
@ -482,10 +484,6 @@ class HtmlDoc(BaseDoc, TextDoc):
"""
text = str(styledtext)
s_tags = styledtext.get_tags()
#FIXME: following split should be regex to match \n\s*\n instead?
markuptext = self._backend.add_markup_from_styled(text, s_tags,
split='\n\n')
self.htmllist += [Html('div', id='grampsstylednote')]
if contains_html:
#just dump the note out as it is. Adding markup would be dangerous
@ -493,25 +491,48 @@ class HtmlDoc(BaseDoc, TextDoc):
self.start_paragraph(style_name)
self.__write_text(text, markup=True)
self.end_paragraph()
elif format == 1:
#preformatted, retain whitespace.
#so use \n\n for paragraph detection
#FIXME: following split should be regex to match \n\s*\n instead?
self.htmllist += [Html('pre', indent=None, inline=True)]
for line in markuptext.split('\n\n'):
self.start_paragraph(style_name)
for realline in line.split('\n'):
self.__write_text(realline, markup=True)
self.htmllist[-1] += Html('br')
else:
s_tags = styledtext.get_tags()
markuptext = self._backend.add_markup_from_styled(text, s_tags,
split='\n')
self.start_paragraph(style_name)
inpara = True
self._empty = 1 # para is empty
# we explicitly set _empty because start and end para do not seem
# to do a very good job at setting them
linenb = 1
# The code is tricky here, because we don't want to start a new para
# at the end of the last line if there is no newline there.
# Instead, we want to just end the current para.
for line in markuptext.split('\n'):
[line, sigcount] = process_spaces(line, format)
if sigcount == 0:
if inpara == False:
# needed for runs of three or more newlines
self.start_paragraph(style_name)
inpara = True
self._empty = 1 # para is empty
self.end_paragraph()
inpara = False
linenb = 1
else:
if inpara == False:
self.start_paragraph(style_name)
inpara = True
self._empty = 1 # para is empty
if linenb > 1:
self.htmllist[-1] += Html('br')
self.__write_text(line, markup=True)
self._empty = 0 # para is not empty
linenb += 1
if inpara == True:
self.end_paragraph()
#end pre element
self.__reduce_list()
elif format == 0:
#flowed
#FIXME: following split should be regex to match \n\s*\n instead?
for line in markuptext.split('\n\n'):
if sigcount == 0:
# if the last line was blank, then as well as outputting the previous para,
# which we have just done,
# we also output a new blank para
self.start_paragraph(style_name)
self.__write_text(line, markup=True)
self._empty = 1 # para is empty
self.end_paragraph()
#end div element
self.__reduce_list()

View File

@ -641,9 +641,11 @@ class LaTeXDoc(BaseDoc, TextDoc):
else:
for line in markuptext.split('\n\n'):
self.start_paragraph(style_name)
self._backend.write(line)
for realline in line.split('\n'):
self._backend.write(realline)
self._backend.write("\\newline\n")
self.end_paragraph()
self._backend.write("\n\\vspace*{0.5cm} \n\end{minipage}\n\n")
self._backend.write("\n\\vspace*{0.5cm} \n\\end{minipage}\n\n")
def write_endnotes_ref(self, text, style_name):
"""

View File

@ -1379,48 +1379,52 @@ class ODFDoc(BaseDoc, TextDoc, DrawDoc):
def write_styled_note(self, styledtext, format, style_name,
contains_html=False):
"""
Convenience function to write a styledtext to the latex doc.
Convenience function to write a styledtext to the ODF doc.
styledtext : assumed a StyledText object to write
format : = 0 : Flowed, = 1 : Preformatted
style_name : name of the style to use for default presentation
contains_html: bool, the backend should not check if html is present.
If contains_html=True, then the textdoc is free to handle that in
some way. Eg, a textdoc could remove all tags, or could make sure
a link is clickable. RTFDoc prints the html without handling it
a link is clickable. ODFDoc prints the html without handling it
"""
text = str(styledtext)
s_tags = styledtext.get_tags()
text = text.replace('&', '\1') # must be the first
text = text.replace('<', '\2')
text = text.replace('>', '\3')
markuptext = self._backend.add_markup_from_styled(text, s_tags)
markuptext = self._backend.add_markup_from_styled(text, s_tags, '\n')
# we need to know if we have new styles to add.
# if markuptext contains : FontColor, FontFace, FontSize ...
# we must prepare the new styles for the styles.xml file.
# We are looking for the following format :
# style-name="([a-zA-Z0-9]*)__([a-zA-Z0-9 ])">
# The first element is the StyleType and the second one is the value
start = 0
while True:
while 1:
m = NewStyle.search(markuptext, start)
if not m:
break
self.StyleList.append(
[m.group(1) + m.group(2), m.group(1), m.group(2)]
)
self.StyleList.append([m.group(1)+m.group(2),
m.group(1),
m.group(2)])
start = m.end()
linenb = 1
self.start_paragraph(style_name)
markuptext = markuptext.replace('\1', '&amp;') # must be the first
markuptext = markuptext.replace('\2', '&lt;')
markuptext = markuptext.replace('\3', '&gt;')
for l, line in enumerate(markuptext.split('\n')):
if l:
self.cntnt.write('<text:line-break/>')
self.cntnt.write(line)
for line in markuptext.split('\n'):
[line, sigcount] = process_spaces(line, format)
if sigcount == 0:
self.end_paragraph()
self.start_paragraph(style_name)
linenb = 1
else:
if ( linenb > 1 ):
self.cntnt.write('<text:line-break/>')
self.cntnt.write(line)
linenb += 1
self.end_paragraph()
def write_text(self, text, mark=None):
@ -1704,3 +1708,48 @@ class ODFDoc(BaseDoc, TextDoc, DrawDoc):
'</draw:text-box>'
)
self.cntnt.write('</draw:frame>\n')
def process_spaces(line, format):
"""
Function to process spaces in text lines for flowed and pre-formatted notes.
line : text to process
format : = 0 : Flowed, = 1 : Preformatted
If the text is flowed (format==0), then leading spaces (after ignoring XML)
are removed. Embedded multiple spaces are reduced to one by ODF
If the text is pre-formatted (format==1). then all spaces (after ignoring XML)
are replaced by "<text:s/>"
Returns the processed text, and the number of significant (i.e. non-white-space) chars.
"""
txt = ""
xml = False
sigcount = 0
# we loop through every character, which is very inefficient, but an attempt to use
# a regex replace didn't always work. This was the code that was replaced.
# Problem, we may not replace ' ' in xml tags, so we use a regex
# self.cntnt.write(re.sub(' (?=([^(<|>)]*<[^>]*>)*[^>]*$)',
# "<text:s/>", line))
for char in line:
if char == '<' and xml == False:
xml = True
txt += char
elif char == '>' and xml == True:
xml = False
txt += char
elif xml == True:
txt += char
elif char == " " or char == "\t":
if format == 0 and sigcount == 0:
pass
elif format == 1:
#preformatted, section White-space characters of
# http://docs.oasis-open.org/office/v1.1/OS/OpenDocument-v1.1-html/OpenDocument-v1.1.html#5.1.1.White-space%20Characters|outline
txt += "<text:s/>"
else:
txt += char
else:
sigcount += 1
txt += char
return [txt, sigcount]

View File

@ -227,6 +227,12 @@ class RTFDoc(BaseDoc,TextDoc):
#
#--------------------------------------------------------------------
def end_paragraph(self):
# FIXME: I don't understand why no end paragraph marker is output when
# we are inside a table. Since at least version 3.2.2, this seems to mean that
# there is no new paragraph after the first line of a table entry.
# For example in the birth cell, the first paragraph should be the
# description (21 Jan 1900 in London); if there is a note following this,
# there is no newline between the description and the note.
if not self.in_table:
self.f.write(self.text)
if self.opened:
@ -404,11 +410,11 @@ class RTFDoc(BaseDoc,TextDoc):
self.f.write('\n')
index = index+1
self.f.write('}}\\par\n')
def write_styled_note(self, styledtext, format, style_name,
contains_html=False):
"""
Convenience function to write a styledtext to the latex doc.
Convenience function to write a styledtext to the RTF doc.
styledtext : assumed a StyledText object to write
format : = 0 : Flowed, = 1 : Preformatted
style_name : name of the style to use for default presentation
@ -418,41 +424,31 @@ class RTFDoc(BaseDoc,TextDoc):
a link is clickable. RTFDoc prints the html without handling it
"""
text = str(styledtext)
if format:
# Preformatted note
for line in text.split('\n'):
self.start_paragraph(style_name)
self.write_text(line)
self.start_paragraph(style_name)
linenb = 1
for line in text.split('\n'):
[line, sigcount] = process_spaces(line, format)
if sigcount == 0:
if self.in_table:
# # Add LF when in table as in indiv_complete report
self.write_text('\n')
self.end_paragraph()
else:
firstline = True
for line in text.split('\n\n'):
self.start_paragraph(style_name)
if len(line) > 0:
# Remember first char, can be a LF.
firstchar = line[0]
# Replace all LF's with space and reformat.
line = line.replace('\n',' ')
line = ' '.join(line.split())
# If remembered first char is LF, insert in front of lines
#This takes care of the case with even number of empty lines.
if firstchar == '\n':
line = firstchar + line
#Insert LF's if not first line.
if not firstline:
line = '\n\n' + line
else:
# If odd number of empty lines line will be empty.
line = '\n\n'
linenb = 1
else:
if ( linenb > 1 ):
self.write_text('\\line ')
self.write_text(line)
self.end_paragraph()
firstline = False
self.start_paragraph(style_name)
linenb += 1
# FIXME: I don't understand why these newlines are necessary.
# It may be related to the behaviour of end_paragraph inside tables, and
# write_text converting \n to end paragraph.
# This code prevents the whole document going wrong, but seems to produce an extra
# paragraph mark at the end of each table cell.
if self.in_table:
# # Add LF when in table as in indiv_complete report
self.write_text('\n')
self.end_paragraph()
self.end_paragraph()
def write_endnotes_ref(self,text,style_name):
"""
@ -497,3 +493,40 @@ class RTFDoc(BaseDoc,TextDoc):
self.text += '\\%s' % i
else:
self.text += i
def process_spaces (self, line, format):
"""
Function to process spaces in text lines for flowed and pre-formatted notes.
line : text to process
format : = 0 : Flowed, = 1 : Preformatted
If the text is flowed (format==0), then leading spaces
are removed, and multiple spaces are reduced to one.
If the text is pre-formatted (format==1). then all spaces are preserved
Note that xml is just treated like any other text,
because it will be from the original note, and it is just printed, not interpreted.
Returns the processed text, and the number of significant (i.e. non-white-space) chars.
"""
txt = ""
xml = False
space = False
sigcount = 0
# we loop through every character, which is very inefficient, but an attempt to use
# a regex replace didn't always work.
for char in line:
if char == " " or char == "\t":
if format == 1:
txt += char
elif format == 0 and sigcount == 0:
pass
elif format == 0 and space == False:
space = True
txt += char
elif format == 0 and space == True:
pass
else:
sigcount += 1
space = False
txt += char
return [txt, sigcount]

View File

@ -1335,10 +1335,12 @@ class CairoDoc(BaseDoc, TextDoc, DrawDoc):
#FIXME: following split should be regex to match \n\s*\n instead?
for line in markuptext.split('\n\n'):
self.start_paragraph(style_name)
#flowed, make normal whitespace go away
line = line.replace('\n',' ')
line = ' '.join(line.split())
self.__write_text(line, markup=True)
#flowed, normal whitespace goes away, but we keep linebreak
lines = line.split('\n')
newlines = []
for singleline in lines:
newlines.append(' '.join(singleline.split()))
self.__write_text('\n'.join(newlines), markup=True)
self.end_paragraph()
def __write_text(self, text, mark=None, markup=False):

View File

@ -46,6 +46,106 @@ from gen.plug.docbackend import DocBackend
from libhtml import Html
from Utils import xml_lang
#------------------------------------------------------------------------
#
# Functions
#
#------------------------------------------------------------------------
def process_spaces(intext, format):
"""
Function to process spaces in text lines for pre-formatted notes.
line : text to process
format : = 0 : Flowed, = 1 : Preformatted
If the text is pre-formatted (format==1), then leading spaces (after ignoring XML)
are replaced by alternating non-breaking spaces and ordinary spaces.
After the first non-space character, single spaces are left
but multiple spaces are replaced by alternating NBSP and space
If the text is flowed, the text is unchanged.
Returns the processed text, and the number of significant
(i.e. non-xml non-white-space) chars.
"""
NORMAL=1
SPACE=2
NBSP=3
XML=4
SPACEHOLD=5
sigcount = 0
state = NORMAL
outtext = ""
if format == 1:
# Pre-formatted
for char in intext:
if state == NORMAL:
if char == " ":
if sigcount == 0:
state = NBSP
outtext += "&nbsp;"
else:
state = SPACEHOLD
elif char == "<":
state = XML
outtext += char
else:
sigcount += 1
outtext += char
elif state == SPACE:
if char == " ":
state = NBSP
outtext += "&nbsp;"
elif char == "<":
state = XML
outtext += char
else:
sigcount += 1
state = NORMAL
outtext += char
elif state == NBSP:
if char == " ":
state = SPACE
elif char == "<":
state = XML
else:
sigcount += 1
state = NORMAL
outtext += char
elif state == XML:
if char == ">":
state = NORMAL
outtext += char
elif state == SPACEHOLD:
if char == " ":
outtext += "&nbsp; "
state = NORMAL
elif char == "<":
outtext += " "+char
state = XML
else:
outtext += " "+char
sigcount += 1
state = NORMAL
else:
# format == 0 flowed
for char in intext:
if char == '<' and state == NORMAL:
state = XML
outtext += char
elif char == '>' and state == XML:
state = NORMAL
outtext += char
elif state == XML:
outtext += char
else:
sigcount += 1
outtext += char
return [outtext, sigcount]
#------------------------------------------------------------------------
#
# Document Backend class for html pages

View File

@ -60,6 +60,7 @@ from cStringIO import StringIO
from textwrap import TextWrapper
from unicodedata import normalize
from collections import defaultdict
import re
import operator
from decimal import Decimal
@ -104,7 +105,7 @@ from libhtml import Html
# import styled notes from
# src/plugins/lib/libhtmlbackend.py
from libhtmlbackend import HtmlBackend
from libhtmlbackend import HtmlBackend, process_spaces
from libgedcom import make_gedcom_date
from PlaceUtils import conv_lat_lon
@ -488,13 +489,6 @@ class BasePage(object):
# return text of the note to its callers
return text
#################################################
#
# Will produce styled notes for NarrativeWeb by using:
# src/plugins/lib/libhtmlbackend.py
#
#################################################
def styled_note(self, styledtext, format, contains_html=False):
"""
styledtext : assumed a StyledText object to write
@ -507,23 +501,37 @@ class BasePage(object):
return ''
s_tags = styledtext.get_tags()
#FIXME: following split should be regex to match \n\s*\n instead?
markuptext = self._backend.add_markup_from_styled(text, s_tags,
split='\n\n')
split='\n')
htmllist = Html("div", id = "grampsstylednote")
if contains_html:
htmllist.extend((Html('p') + text))
elif format == 1:
#preformatted, retain whitespace.
#so use \n\n for paragraph detection
htmllist += Html("pre", indent=None) + markuptext.split('\n\n')
elif format == 0:
#flowed, use \n\n for paragraph detection
htmllist.extend(
(Html('p') + para.split('\n'))
for para in markuptext.split("\n\n"))
else:
linelist = []
linenb = 1
for line in markuptext.split('\n'):
[line, sigcount] = process_spaces(line, format)
if sigcount == 0:
# The rendering of an empty paragraph '<p></p>'
# is undefined so we use a non-breaking space
if linenb == 1:
linelist.append('&nbsp;')
htmllist.extend(Html('p') + linelist)
linelist = []
linenb = 1
else:
if linenb > 1:
linelist[-1] += '<br>'
linelist.append(line)
linenb += 1
if linenb > 1:
htmllist.extend(Html('p') + linelist)
# if the last line was blank, then as well as outputting the previous para,
# which we have just done,
# we also output a new blank para
if sigcount == 0:
linelist = ["&nbsp;"]
htmllist.extend(Html('p') + linelist)
return htmllist
def dump_notes(self, notelist):