# Copyright 2011 Marco Conti # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # sourced from https://github.com/marcoconti83/read-ods-with-odfpy # further altered locally from odf import opendocument from odf.table import Table, TableRow, TableCell from odf.text import P class ODSReader(object): # loads the file def __init__(self, file=None, content=None, clonespannedcolumns=None): if not content: self.clonespannedcolumns = clonespannedcolumns self.doc = opendocument.load(file) else: self.clonespannedcolumns = clonespannedcolumns self.doc = content self.SHEETS = {} for sheet in self.doc.spreadsheet.getElementsByType(Table): self.readSheet(sheet) # reads a sheet in the sheet dictionary, storing each sheet as an # array (rows) of arrays (columns) def readSheet(self, sheet): name = sheet.getAttribute("name") rows = sheet.getElementsByType(TableRow) arrRows = [] # for each row for row in rows: arrCells = [] cells = row.getElementsByType(TableCell) # for each cell for count, cell in enumerate(cells, start=1): # repeated value? repeat = 0 if count != len(cells): repeat = cell.getAttribute("numbercolumnsrepeated") if not repeat: repeat = 1 spanned = int(cell.getAttribute('numbercolumnsspanned') or 0) # clone spanned cells if self.clonespannedcolumns is not None and spanned > 1: repeat = spanned ps = cell.getElementsByType(P) textContent = u"" # for each text/text:span node for p in ps: for n in p.childNodes: if n.nodeType == 1 and n.tagName == "text:span": for c in n.childNodes: if c.nodeType == 3: textContent = u'{}{}'.format(textContent, n.data) if n.nodeType == 3: textContent = u'{}{}'.format(textContent, n.data) if textContent: if not textContent.startswith("#"): # ignore comments cells for rr in range(int(repeat)): # repeated? arrCells.append(textContent) else: for rr in range(int(repeat)): arrCells.append("") # if row contained something if arrCells: arrRows.append(arrCells) #else: # print ("Empty or commented row (", row_comment, ")") self.SHEETS[name] = arrRows # returns a sheet as an array (rows) of arrays (columns) def getSheet(self, name): return self.SHEETS[name] def getFirstSheet(self): return next(iter(self.SHEETS.itervalues()))