Enable MHTML reader (#44)
* Enable mhtml loader * Use default supported file types * Add tests and bump version
This commit is contained in:
parent
fbe983ccb3
commit
456f020caf
|
@ -9,7 +9,9 @@ from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
|
||||||
from kotaemon.loaders import (
|
from kotaemon.loaders import (
|
||||||
AdobeReader,
|
AdobeReader,
|
||||||
DirectoryReader,
|
DirectoryReader,
|
||||||
|
HtmlReader,
|
||||||
MathpixPDFReader,
|
MathpixPDFReader,
|
||||||
|
MhtmlReader,
|
||||||
OCRReader,
|
OCRReader,
|
||||||
PandasExcelReader,
|
PandasExcelReader,
|
||||||
UnstructuredReader,
|
UnstructuredReader,
|
||||||
|
@ -20,6 +22,13 @@ KH_DEFAULT_FILE_EXTRACTORS: dict[str, Type[BaseReader]] = {
|
||||||
".docx": UnstructuredReader,
|
".docx": UnstructuredReader,
|
||||||
".xls": UnstructuredReader,
|
".xls": UnstructuredReader,
|
||||||
".doc": UnstructuredReader,
|
".doc": UnstructuredReader,
|
||||||
|
".html": HtmlReader,
|
||||||
|
".mhtml": MhtmlReader,
|
||||||
|
".png": UnstructuredReader,
|
||||||
|
".jpeg": UnstructuredReader,
|
||||||
|
".jpg": UnstructuredReader,
|
||||||
|
".tiff": UnstructuredReader,
|
||||||
|
".tif": UnstructuredReader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ from .base import AutoReader, BaseReader
|
||||||
from .composite_loader import DirectoryReader
|
from .composite_loader import DirectoryReader
|
||||||
from .docx_loader import DocxReader
|
from .docx_loader import DocxReader
|
||||||
from .excel_loader import PandasExcelReader
|
from .excel_loader import PandasExcelReader
|
||||||
from .html_loader import HtmlReader
|
from .html_loader import HtmlReader, MhtmlReader
|
||||||
from .mathpix_loader import MathpixPDFReader
|
from .mathpix_loader import MathpixPDFReader
|
||||||
from .ocr_loader import ImageReader, OCRReader
|
from .ocr_loader import ImageReader, OCRReader
|
||||||
from .unstructured_loader import UnstructuredReader
|
from .unstructured_loader import UnstructuredReader
|
||||||
|
@ -19,5 +19,6 @@ __all__ = [
|
||||||
"UnstructuredReader",
|
"UnstructuredReader",
|
||||||
"DocxReader",
|
"DocxReader",
|
||||||
"HtmlReader",
|
"HtmlReader",
|
||||||
|
"MhtmlReader",
|
||||||
"AdobeReader",
|
"AdobeReader",
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
import email
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional
|
from typing import Optional
|
||||||
|
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
|
@ -33,7 +34,7 @@ class HtmlReader(BaseReader):
|
||||||
|
|
||||||
def load_data(
|
def load_data(
|
||||||
self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs
|
self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs
|
||||||
) -> List[Document]:
|
) -> list[Document]:
|
||||||
"""Load data using Html reader
|
"""Load data using Html reader
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -70,3 +71,78 @@ class HtmlReader(BaseReader):
|
||||||
]
|
]
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
|
||||||
|
class MhtmlReader(BaseReader):
|
||||||
|
"""Parse `MHTML` files with `BeautifulSoup`."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
open_encoding: Optional[str] = None,
|
||||||
|
bs_kwargs: Optional[dict] = None,
|
||||||
|
get_text_separator: str = "",
|
||||||
|
) -> None:
|
||||||
|
"""initialize with path, and optionally, file encoding to use, and any kwargs
|
||||||
|
to pass to the BeautifulSoup object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to file to load.
|
||||||
|
open_encoding: The encoding to use when opening the file.
|
||||||
|
bs_kwargs: Any kwargs to pass to the BeautifulSoup object.
|
||||||
|
get_text_separator: The separator to use when getting the text
|
||||||
|
from the soup.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import bs4 # noqa:F401
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"beautifulsoup4 package not found, please install it with "
|
||||||
|
"`pip install beautifulsoup4`"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.open_encoding = open_encoding
|
||||||
|
if bs_kwargs is None:
|
||||||
|
bs_kwargs = {"features": "lxml"}
|
||||||
|
self.bs_kwargs = bs_kwargs
|
||||||
|
self.get_text_separator = get_text_separator
|
||||||
|
|
||||||
|
def load_data(
|
||||||
|
self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs
|
||||||
|
) -> list[Document]:
|
||||||
|
"""Load MHTML document into document objects."""
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
extra_info = extra_info or {}
|
||||||
|
metadata: dict = extra_info
|
||||||
|
page = []
|
||||||
|
with open(file_path, "r", encoding=self.open_encoding) as f:
|
||||||
|
message = email.message_from_string(f.read())
|
||||||
|
parts = message.get_payload()
|
||||||
|
|
||||||
|
if not isinstance(parts, list):
|
||||||
|
parts = [message]
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
if part.get_content_type() == "text/html":
|
||||||
|
html = part.get_payload(decode=True).decode()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, **self.bs_kwargs)
|
||||||
|
text = soup.get_text(self.get_text_separator)
|
||||||
|
|
||||||
|
if soup.title:
|
||||||
|
title = str(soup.title.string)
|
||||||
|
else:
|
||||||
|
title = ""
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"source": str(file_path),
|
||||||
|
"title": title,
|
||||||
|
**extra_info,
|
||||||
|
}
|
||||||
|
lines = [line for line in text.split("\n") if line.strip()]
|
||||||
|
text = "\n\n".join(lines)
|
||||||
|
if text:
|
||||||
|
page.append(text)
|
||||||
|
|
||||||
|
return [Document(text="\n\n".join(page), metadata=metadata)]
|
||||||
|
|
|
@ -11,7 +11,7 @@ packages.find.exclude = ["tests*", "env*"]
|
||||||
# metadata and dependencies
|
# metadata and dependencies
|
||||||
[project]
|
[project]
|
||||||
name = "kotaemon"
|
name = "kotaemon"
|
||||||
version = "0.3.10"
|
version = "0.3.11"
|
||||||
requires-python = ">= 3.10"
|
requires-python = ">= 3.10"
|
||||||
description = "Kotaemon core library for AI development."
|
description = "Kotaemon core library for AI development."
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
@ -63,6 +63,7 @@ adv = [
|
||||||
"llama-cpp-python",
|
"llama-cpp-python",
|
||||||
"pdfservices-sdk @ git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements",
|
"pdfservices-sdk @ git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements",
|
||||||
"fastembed",
|
"fastembed",
|
||||||
|
"beautifulsoup4",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ipython",
|
"ipython",
|
||||||
|
|
690
libs/kotaemon/tests/resources/dummy.mhtml
Normal file
690
libs/kotaemon/tests/resources/dummy.mhtml
Normal file
|
@ -0,0 +1,690 @@
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: multipart/related; boundary="----=_NextPart_01CF5AE5.5C24CD00"
|
||||||
|
|
||||||
|
This document is a Single File Web Page, also known as a Web Archive file. If you are seeing this message, your browser or editor doesn't support Web Archive files. Please download a browser that supports Web Archive, such as Windows® Internet Explorer®.
|
||||||
|
|
||||||
|
------=_NextPart_01CF5AE5.5C24CD00
|
||||||
|
Content-Location: file:///C:/D16BB227/testing.htm
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Content-Type: text/html; charset="us-ascii"
|
||||||
|
|
||||||
|
<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
|
||||||
|
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
|
||||||
|
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
|
||||||
|
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
|
||||||
|
xmlns=3D"http://www.w3.org/TR/REC-html40">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=3DContent-Type content=3D"text/html; charset=3Dus-ascii">
|
||||||
|
<meta name=3DProgId content=3DWord.Document>
|
||||||
|
<meta name=3DGenerator content=3D"Microsoft Word 12">
|
||||||
|
<meta name=3DOriginator content=3D"Microsoft Word 12">
|
||||||
|
<link rel=3DFile-List href=3D"testing_files/filelist.xml">
|
||||||
|
<!--[if gte mso 9]><xml>
|
||||||
|
<o:DocumentProperties>
|
||||||
|
<o:Author>dtobias</o:Author>
|
||||||
|
<o:Template>testing.mht</o:Template>
|
||||||
|
<o:LastAuthor>dtobias</o:LastAuthor>
|
||||||
|
<o:Revision>2</o:Revision>
|
||||||
|
<o:TotalTime>1</o:TotalTime>
|
||||||
|
<o:LastPrinted>2014-04-18T13:05:00Z</o:LastPrinted>
|
||||||
|
<o:Created>2014-04-18T13:05:00Z</o:Created>
|
||||||
|
<o:LastSaved>2014-04-18T13:05:00Z</o:LastSaved>
|
||||||
|
<o:Pages>1</o:Pages>
|
||||||
|
<o:Words>49</o:Words>
|
||||||
|
<o:Characters>280</o:Characters>
|
||||||
|
<o:Company>Microsoft</o:Company>
|
||||||
|
<o:Lines>2</o:Lines>
|
||||||
|
<o:Paragraphs>1</o:Paragraphs>
|
||||||
|
<o:CharactersWithSpaces>328</o:CharactersWithSpaces>
|
||||||
|
<o:Version>12.00</o:Version>
|
||||||
|
</o:DocumentProperties>
|
||||||
|
</xml><![endif]-->
|
||||||
|
<link rel=3DthemeData href=3D"testing_files/themedata.thmx">
|
||||||
|
<link rel=3DcolorSchemeMapping href=3D"testing_files/colorschememapping.xml=
|
||||||
|
">
|
||||||
|
<!--[if gte mso 9]><xml>
|
||||||
|
<w:WordDocument>
|
||||||
|
<w:SpellingState>Clean</w:SpellingState>
|
||||||
|
<w:TrackMoves>false</w:TrackMoves>
|
||||||
|
<w:TrackFormatting/>
|
||||||
|
<w:PunctuationKerning/>
|
||||||
|
<w:ValidateAgainstSchemas/>
|
||||||
|
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
||||||
|
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
||||||
|
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
||||||
|
<w:DoNotPromoteQF/>
|
||||||
|
<w:LidThemeOther>EN-US</w:LidThemeOther>
|
||||||
|
<w:LidThemeAsian>X-NONE</w:LidThemeAsian>
|
||||||
|
<w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>
|
||||||
|
<w:Compatibility>
|
||||||
|
<w:BreakWrappedTables/>
|
||||||
|
<w:SnapToGridInCell/>
|
||||||
|
<w:WrapTextWithPunct/>
|
||||||
|
<w:UseAsianBreakRules/>
|
||||||
|
<w:DontGrowAutofit/>
|
||||||
|
<w:SplitPgBreakAndParaMark/>
|
||||||
|
<w:DontVertAlignCellWithSp/>
|
||||||
|
<w:DontBreakConstrainedForcedTables/>
|
||||||
|
<w:DontVertAlignInTxbx/>
|
||||||
|
<w:Word11KerningPairs/>
|
||||||
|
<w:CachedColBalance/>
|
||||||
|
</w:Compatibility>
|
||||||
|
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
||||||
|
<m:mathPr>
|
||||||
|
<m:mathFont m:val=3D"Cambria Math"/>
|
||||||
|
<m:brkBin m:val=3D"before"/>
|
||||||
|
<m:brkBinSub m:val=3D"--"/>
|
||||||
|
<m:smallFrac m:val=3D"off"/>
|
||||||
|
<m:dispDef/>
|
||||||
|
<m:lMargin m:val=3D"0"/>
|
||||||
|
<m:rMargin m:val=3D"0"/>
|
||||||
|
<m:defJc m:val=3D"centerGroup"/>
|
||||||
|
<m:wrapIndent m:val=3D"1440"/>
|
||||||
|
<m:intLim m:val=3D"subSup"/>
|
||||||
|
<m:naryLim m:val=3D"undOvr"/>
|
||||||
|
</m:mathPr></w:WordDocument>
|
||||||
|
</xml><![endif]--><!--[if gte mso 9]><xml>
|
||||||
|
<w:LatentStyles DefLockedState=3D"false" DefUnhideWhenUsed=3D"true"
|
||||||
|
DefSemiHidden=3D"true" DefQFormat=3D"false" DefPriority=3D"99"
|
||||||
|
LatentStyleCount=3D"267">
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"0" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Normal"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"heading 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 7"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 8"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
|
||||||
|
heading 9"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 7"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 8"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 9"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"35" QFormat=3D"true" Name=3D=
|
||||||
|
"caption"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"10" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Title"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"1" Name=3D"Default Paragraph=
|
||||||
|
Font"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"11" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtitle"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"22" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Strong"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"20" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Emphasis"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"59" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Table Grid"/>
|
||||||
|
<w:LsdException Locked=3D"false" UnhideWhenUsed=3D"false" Name=3D"Placeho=
|
||||||
|
lder Text"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"1" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"No Spacing"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" UnhideWhenUsed=3D"false" Name=3D"Revisio=
|
||||||
|
n"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"34" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"List Paragraph"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"29" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Quote"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"30" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Quote"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 1"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 2"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 3"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 4"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 5"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light List Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 6"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"19" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtle Emphasis"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"21" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Emphasis"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"31" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtle Reference"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"32" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Reference"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"33" SemiHidden=3D"false"
|
||||||
|
UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Book Title"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"37" Name=3D"Bibliography"/>
|
||||||
|
<w:LsdException Locked=3D"false" Priority=3D"39" QFormat=3D"true" Name=3D=
|
||||||
|
"TOC Heading"/>
|
||||||
|
</w:LatentStyles>
|
||||||
|
</xml><![endif]-->
|
||||||
|
<style>
|
||||||
|
<!--
|
||||||
|
/* Font Definitions */
|
||||||
|
@font-face
|
||||||
|
{font-family:"Cambria Math";
|
||||||
|
panose-1:2 4 5 3 5 4 6 3 2 4;
|
||||||
|
mso-font-charset:1;
|
||||||
|
mso-generic-font-family:roman;
|
||||||
|
mso-font-format:other;
|
||||||
|
mso-font-pitch:variable;
|
||||||
|
mso-font-signature:0 0 0 0 0 0;}
|
||||||
|
@font-face
|
||||||
|
{font-family:Cambria;
|
||||||
|
panose-1:2 4 5 3 5 4 6 3 2 4;
|
||||||
|
mso-font-charset:0;
|
||||||
|
mso-generic-font-family:roman;
|
||||||
|
mso-font-pitch:variable;
|
||||||
|
mso-font-signature:-536870145 1073743103 0 0 415 0;}
|
||||||
|
@font-face
|
||||||
|
{font-family:Calibri;
|
||||||
|
panose-1:2 15 5 2 2 2 4 3 2 4;
|
||||||
|
mso-font-charset:0;
|
||||||
|
mso-generic-font-family:swiss;
|
||||||
|
mso-font-pitch:variable;
|
||||||
|
mso-font-signature:-536870145 1073786111 1 0 415 0;}
|
||||||
|
/* Style Definitions */
|
||||||
|
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
||||||
|
{mso-style-unhide:no;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-parent:"";
|
||||||
|
margin-top:0in;
|
||||||
|
margin-right:0in;
|
||||||
|
margin-bottom:10.0pt;
|
||||||
|
margin-left:0in;
|
||||||
|
line-height:115%;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
font-size:11.0pt;
|
||||||
|
font-family:"Calibri","sans-serif";
|
||||||
|
mso-fareast-font-family:Calibri;
|
||||||
|
mso-bidi-font-family:"Times New Roman";}
|
||||||
|
p.MsoTitle, li.MsoTitle, div.MsoTitle
|
||||||
|
{mso-style-priority:10;
|
||||||
|
mso-style-unhide:no;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-link:"Title Char";
|
||||||
|
mso-style-next:Normal;
|
||||||
|
margin-top:0in;
|
||||||
|
margin-right:0in;
|
||||||
|
margin-bottom:15.0pt;
|
||||||
|
margin-left:0in;
|
||||||
|
mso-add-space:auto;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
border:none;
|
||||||
|
mso-border-bottom-alt:solid #4F81BD 1.0pt;
|
||||||
|
padding:0in;
|
||||||
|
mso-padding-alt:0in 0in 4.0pt 0in;
|
||||||
|
font-size:26.0pt;
|
||||||
|
font-family:"Cambria","serif";
|
||||||
|
mso-fareast-font-family:"Times New Roman";
|
||||||
|
mso-bidi-font-family:"Times New Roman";
|
||||||
|
color:#17365D;
|
||||||
|
letter-spacing:.25pt;
|
||||||
|
mso-font-kerning:14.0pt;}
|
||||||
|
p.MsoTitleCxSpFirst, li.MsoTitleCxSpFirst, div.MsoTitleCxSpFirst
|
||||||
|
{mso-style-priority:10;
|
||||||
|
mso-style-unhide:no;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-link:"Title Char";
|
||||||
|
mso-style-next:Normal;
|
||||||
|
mso-style-type:export-only;
|
||||||
|
margin:0in;
|
||||||
|
margin-bottom:.0001pt;
|
||||||
|
mso-add-space:auto;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
border:none;
|
||||||
|
mso-border-bottom-alt:solid #4F81BD 1.0pt;
|
||||||
|
padding:0in;
|
||||||
|
mso-padding-alt:0in 0in 4.0pt 0in;
|
||||||
|
font-size:26.0pt;
|
||||||
|
font-family:"Cambria","serif";
|
||||||
|
mso-fareast-font-family:"Times New Roman";
|
||||||
|
mso-bidi-font-family:"Times New Roman";
|
||||||
|
color:#17365D;
|
||||||
|
letter-spacing:.25pt;
|
||||||
|
mso-font-kerning:14.0pt;}
|
||||||
|
p.MsoTitleCxSpMiddle, li.MsoTitleCxSpMiddle, div.MsoTitleCxSpMiddle
|
||||||
|
{mso-style-priority:10;
|
||||||
|
mso-style-unhide:no;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-link:"Title Char";
|
||||||
|
mso-style-next:Normal;
|
||||||
|
mso-style-type:export-only;
|
||||||
|
margin:0in;
|
||||||
|
margin-bottom:.0001pt;
|
||||||
|
mso-add-space:auto;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
border:none;
|
||||||
|
mso-border-bottom-alt:solid #4F81BD 1.0pt;
|
||||||
|
padding:0in;
|
||||||
|
mso-padding-alt:0in 0in 4.0pt 0in;
|
||||||
|
font-size:26.0pt;
|
||||||
|
font-family:"Cambria","serif";
|
||||||
|
mso-fareast-font-family:"Times New Roman";
|
||||||
|
mso-bidi-font-family:"Times New Roman";
|
||||||
|
color:#17365D;
|
||||||
|
letter-spacing:.25pt;
|
||||||
|
mso-font-kerning:14.0pt;}
|
||||||
|
p.MsoTitleCxSpLast, li.MsoTitleCxSpLast, div.MsoTitleCxSpLast
|
||||||
|
{mso-style-priority:10;
|
||||||
|
mso-style-unhide:no;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-link:"Title Char";
|
||||||
|
mso-style-next:Normal;
|
||||||
|
mso-style-type:export-only;
|
||||||
|
margin-top:0in;
|
||||||
|
margin-right:0in;
|
||||||
|
margin-bottom:15.0pt;
|
||||||
|
margin-left:0in;
|
||||||
|
mso-add-space:auto;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
border:none;
|
||||||
|
mso-border-bottom-alt:solid #4F81BD 1.0pt;
|
||||||
|
padding:0in;
|
||||||
|
mso-padding-alt:0in 0in 4.0pt 0in;
|
||||||
|
font-size:26.0pt;
|
||||||
|
font-family:"Cambria","serif";
|
||||||
|
mso-fareast-font-family:"Times New Roman";
|
||||||
|
mso-bidi-font-family:"Times New Roman";
|
||||||
|
color:#17365D;
|
||||||
|
letter-spacing:.25pt;
|
||||||
|
mso-font-kerning:14.0pt;}
|
||||||
|
span.TitleChar
|
||||||
|
{mso-style-name:"Title Char";
|
||||||
|
mso-style-priority:10;
|
||||||
|
mso-style-unhide:no;
|
||||||
|
mso-style-locked:yes;
|
||||||
|
mso-style-link:Title;
|
||||||
|
mso-ansi-font-size:26.0pt;
|
||||||
|
mso-bidi-font-size:26.0pt;
|
||||||
|
font-family:"Cambria","serif";
|
||||||
|
mso-ascii-font-family:Cambria;
|
||||||
|
mso-fareast-font-family:"Times New Roman";
|
||||||
|
mso-hansi-font-family:Cambria;
|
||||||
|
mso-bidi-font-family:"Times New Roman";
|
||||||
|
color:#17365D;
|
||||||
|
letter-spacing:.25pt;
|
||||||
|
mso-font-kerning:14.0pt;}
|
||||||
|
span.SpellE
|
||||||
|
{mso-style-name:"";
|
||||||
|
mso-spl-e:yes;}
|
||||||
|
.MsoChpDefault
|
||||||
|
{mso-style-type:export-only;
|
||||||
|
mso-default-props:yes;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-ansi-font-size:10.0pt;
|
||||||
|
mso-bidi-font-size:10.0pt;
|
||||||
|
mso-ascii-font-family:Calibri;
|
||||||
|
mso-fareast-font-family:Calibri;
|
||||||
|
mso-hansi-font-family:Calibri;}
|
||||||
|
@page WordSection1
|
||||||
|
{size:8.5in 11.0in;
|
||||||
|
margin:1.0in 1.0in 1.0in 1.0in;
|
||||||
|
mso-header-margin:.5in;
|
||||||
|
mso-footer-margin:.5in;
|
||||||
|
mso-paper-source:0;}
|
||||||
|
div.WordSection1
|
||||||
|
{page:WordSection1;}
|
||||||
|
-->
|
||||||
|
</style>
|
||||||
|
<!--[if gte mso 10]>
|
||||||
|
<style>
|
||||||
|
/* Style Definitions */
|
||||||
|
table.MsoNormalTable
|
||||||
|
{mso-style-name:"Table Normal";
|
||||||
|
mso-tstyle-rowband-size:0;
|
||||||
|
mso-tstyle-colband-size:0;
|
||||||
|
mso-style-noshow:yes;
|
||||||
|
mso-style-priority:99;
|
||||||
|
mso-style-qformat:yes;
|
||||||
|
mso-style-parent:"";
|
||||||
|
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
||||||
|
mso-para-margin:0in;
|
||||||
|
mso-para-margin-bottom:.0001pt;
|
||||||
|
mso-pagination:widow-orphan;
|
||||||
|
font-size:10.0pt;
|
||||||
|
font-family:"Calibri","sans-serif";}
|
||||||
|
</style>
|
||||||
|
<![endif]--><!--[if gte mso 9]><xml>
|
||||||
|
<o:shapedefaults v:ext=3D"edit" spidmax=3D"2050"/>
|
||||||
|
</xml><![endif]--><!--[if gte mso 9]><xml>
|
||||||
|
<o:shapelayout v:ext=3D"edit">
|
||||||
|
<o:idmap v:ext=3D"edit" data=3D"1"/>
|
||||||
|
</o:shapelayout></xml><![endif]-->
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body lang=3DEN-US style=3D'tab-interval:.5in'>
|
||||||
|
|
||||||
|
<div class=3DWordSection1>
|
||||||
|
|
||||||
|
<div style=3D'mso-element:para-border-div;border:none;border-bottom:solid #=
|
||||||
|
4F81BD 1.0pt;
|
||||||
|
padding:0in 0in 4.0pt 0in'>
|
||||||
|
|
||||||
|
<p class=3DMsoTitle>This is a test.</p>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p class=3DMsoNormal>This is <b style=3D'mso-bidi-font-weight:normal'>bold,=
|
||||||
|
<i
|
||||||
|
style=3D'mso-bidi-font-style:normal'>italic, and <u>underlined.</u></i></b>=
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p class=3DMsoNormal><span class=3DSpellE>asdakl</span> <span class=3DSpell=
|
||||||
|
E>fskljf</span>
|
||||||
|
<span class=3DSpellE>sklf</span> <span class=3DSpellE>jkslaf</span>; <span
|
||||||
|
class=3DSpellE>djks</span> <span class=3DSpellE>dlkfa</span> <span class=3D=
|
||||||
|
SpellE>sk</span>
|
||||||
|
<span class=3DSpellE>sdjkl</span> <span class=3DSpellE>ksjkl</span> <span
|
||||||
|
class=3DSpellE>jsjk</span> <span class=3DSpellE>skdjjks</span> <span class=
|
||||||
|
=3DSpellE>i</span>
|
||||||
|
w <span class=3DSpellE>ie</span> <span class=3DSpellE>sjkfksd</span> <span
|
||||||
|
class=3DSpellE>fjisdf</span> <span class=3DSpellE>jks</span> <span class=3D=
|
||||||
|
SpellE>fjs</span>
|
||||||
|
<span class=3DSpellE>kdj</span> <span class=3DSpellE>fsk</span> <span class=
|
||||||
|
=3DSpellE>dfjskd</span>
|
||||||
|
<span class=3DSpellE>fjskd</span> <span class=3DSpellE>fjsd</span> <span
|
||||||
|
class=3DSpellE>kfjsk</span> f <span class=3DSpellE>jskdf</span> <span class=
|
||||||
|
=3DSpellE>jskd</span>
|
||||||
|
<span class=3DSpellE>fjsk</span> <span class=3DSpellE>dfjskdf</span> <span
|
||||||
|
class=3DSpellE>jsifj</span> <span class=3DSpellE>sifj</span> <span class=3D=
|
||||||
|
SpellE>sk</span>
|
||||||
|
<span class=3DSpellE>fjks</span> <span class=3DSpellE>fjksd</span> <span
|
||||||
|
class=3DSpellE>fjskdf</span> <span class=3DSpellE>kjs</span> <span class=3D=
|
||||||
|
SpellE>jdfksk</span>
|
||||||
|
<span class=3DSpellE>fdjs</span> <span class=3DSpellE>fksj</span> <span
|
||||||
|
class=3DSpellE>fks</span> <span class=3DSpellE>dfjs</span> <span class=3DSp=
|
||||||
|
ellE>dfks</span>
|
||||||
|
<span class=3DSpellE>fdjsk</span> <span class=3DSpellE>fjskdfjskdf</span> <=
|
||||||
|
span
|
||||||
|
class=3DSpellE>sjkf</span> <span class=3DSpellE>skjf</span> <span class=3DS=
|
||||||
|
pellE>sjkdf</span>
|
||||||
|
<span class=3DSpellE>skfjsfjk</span> s</p>
|
||||||
|
|
||||||
|
<p class=3DMsoNormal>The end.</p>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
|
|
||||||
|
------=_NextPart_01CF5AE5.5C24CD00
|
||||||
|
Content-Location: file:///C:/D16BB227/testing_files/themedata.thmx
|
||||||
|
Content-Transfer-Encoding: base64
|
||||||
|
Content-Type: application/vnd.ms-officetheme
|
||||||
|
|
||||||
|
UEsDBBQABgAIAAAAIQCCirwT+gAAABwCAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbKyRy2rDMBBF
|
||||||
|
94X+g9C22HK6KKXYzqJJd30s0g8Y5LEtao+ENAnJ33fsuFC6CC10IxBizpl7Va6P46AOGJPzVOlV
|
||||||
|
XmiFZH3jqKv0++4pu9cqMVADgyes9AmTXtfXV+XuFDApmaZU6Z45PBiTbI8jpNwHJHlpfRyB5Ro7
|
||||||
|
E8B+QIfmtijujPXESJzxxNB1+SoLRNegeoPILzCKx7Cg8Pv5DCSAmAtYq8czYVqi0hDC4CywRDAH
|
||||||
|
an7oM9+2zmLj7X4UaT6DF9jNBDO/XGD1P+ov5wZb2A+stkfp4lx/xCH9LdtSay6Tc/7Uu5AuGC6X
|
||||||
|
t7Rh5r+tPwEAAP//AwBQSwMEFAAGAAgAAAAhAKXWp+fAAAAANgEAAAsAAABfcmVscy8ucmVsc4SP
|
||||||
|
z2rDMAyH74W9g9F9UdLDGCV2L6WQQy+jfQDhKH9oIhvbG+vbT8cGCrsIhKTv96k9/q6L+eGU5yAW
|
||||||
|
mqoGw+JDP8to4XY9v3+CyYWkpyUIW3hwhqN727VfvFDRozzNMRulSLYwlRIPiNlPvFKuQmTRyRDS
|
||||||
|
SkXbNGIkf6eRcV/XH5ieGeA2TNP1FlLXN2Cuj6jJ/7PDMMyeT8F/ryzlRQRuN5RMaeRioagv41O9
|
||||||
|
kKhlqtQe0LW4+db9AQAA//8DAFBLAwQUAAYACAAAACEAa3mWFoMAAACKAAAAHAAAAHRoZW1lL3Ro
|
||||||
|
ZW1lL3RoZW1lTWFuYWdlci54bWwMzE0KwyAQQOF9oXeQ2TdjuyhFYrLLrrv2AEOcGkHHoNKf29fl
|
||||||
|
44M3zt8U1ZtLDVksnAcNimXNLoi38Hwspxuo2kgcxSxs4ccV5ul4GMm0jRPfSchzUX0j1ZCFrbXd
|
||||||
|
INa1K9Uh7yzdXrkkaj2LR1fo0/cp4kXrKyYKAjj9AQAA//8DAFBLAwQUAAYACAAAACEAlrWt4pYG
|
||||||
|
AABQGwAAFgAAAHRoZW1lL3RoZW1lL3RoZW1lMS54bWzsWU9v2zYUvw/YdyB0b2MndhoHdYrYsZst
|
||||||
|
TRvEboceaYmW2FCiQNJJfRva44ABw7phhxXYbYdhW4EW2KX7NNk6bB3Qr7BHUpLFWF6SNtiKrT4k
|
||||||
|
Evnj+/8eH6mr1+7HDB0SISlP2l79cs1DJPF5QJOw7d0e9i+teUgqnASY8YS0vSmR3rWN99+7itdV
|
||||||
|
RGKCYH0i13Hbi5RK15eWpA/DWF7mKUlgbsxFjBW8inApEPgI6MZsablWW12KMU08lOAYyN4aj6lP
|
||||||
|
0FCT9DZy4j0Gr4mSesBnYqBJE2eFwQYHdY2QU9llAh1i1vaAT8CPhuS+8hDDUsFE26uZn7e0cXUJ
|
||||||
|
r2eLmFqwtrSub37ZumxBcLBseIpwVDCt9xutK1sFfQNgah7X6/W6vXpBzwCw74OmVpYyzUZ/rd7J
|
||||||
|
aZZA9nGedrfWrDVcfIn+ypzMrU6n02xlsliiBmQfG3P4tdpqY3PZwRuQxTfn8I3OZre76uANyOJX
|
||||||
|
5/D9K63Vhos3oIjR5GAOrR3a72fUC8iYs+1K+BrA12oZfIaCaCiiS7MY80QtirUY3+OiDwANZFjR
|
||||||
|
BKlpSsbYhyju4ngkKNYM8DrBpRk75Mu5Ic0LSV/QVLW9D1MMGTGj9+r596+eP0XHD54dP/jp+OHD
|
||||||
|
4wc/WkLOqm2chOVVL7/97M/HH6M/nn7z8tEX1XhZxv/6wye//Px5NRDSZybOiy+f/PbsyYuvPv39
|
||||||
|
u0cV8E2BR2X4kMZEopvkCO3zGBQzVnElJyNxvhXDCNPyis0klDjBmksF/Z6KHPTNKWaZdxw5OsS1
|
||||||
|
4B0B5aMKeH1yzxF4EImJohWcd6LYAe5yzjpcVFphR/MqmXk4ScJq5mJSxu1jfFjFu4sTx7+9SQp1
|
||||||
|
Mw9LR/FuRBwx9xhOFA5JQhTSc/yAkArt7lLq2HWX+oJLPlboLkUdTCtNMqQjJ5pmi7ZpDH6ZVukM
|
||||||
|
/nZss3sHdTir0nqLHLpIyArMKoQfEuaY8TqeKBxXkRzimJUNfgOrqErIwVT4ZVxPKvB0SBhHvYBI
|
||||||
|
WbXmlgB9S07fwVCxKt2+y6axixSKHlTRvIE5LyO3+EE3wnFahR3QJCpjP5AHEKIY7XFVBd/lbobo
|
||||||
|
d/ADTha6+w4ljrtPrwa3aeiINAsQPTMR2pdQqp0KHNPk78oxo1CPbQxcXDmGAvji68cVkfW2FuJN
|
||||||
|
2JOqMmH7RPldhDtZdLtcBPTtr7lbeJLsEQjz+Y3nXcl9V3K9/3zJXZTPZy20s9oKZVf3DbYpNi1y
|
||||||
|
vLBDHlPGBmrKyA1pmmQJ+0TQh0G9zpwOSXFiSiN4zOq6gwsFNmuQ4OojqqJBhFNosOueJhLKjHQo
|
||||||
|
UcolHOzMcCVtjYcmXdljYVMfGGw9kFjt8sAOr+jh/FxQkDG7TWgOnzmjFU3grMxWrmREQe3XYVbX
|
||||||
|
Qp2ZW92IZkqdw61QGXw4rxoMFtaEBgRB2wJWXoXzuWYNBxPMSKDtbvfe3C3GCxfpIhnhgGQ+0nrP
|
||||||
|
+6hunJTHirkJgNip8JE+5J1itRK3lib7BtzO4qQyu8YCdrn33sRLeQTPvKTz9kQ6sqScnCxBR22v
|
||||||
|
1VxuesjHadsbw5kWHuMUvC51z4dZCBdDvhI27E9NZpPlM2+2csXcJKjDNYW1+5zCTh1IhVRbWEY2
|
||||||
|
NMxUFgIs0Zys/MtNMOtFKWAj/TWkWFmDYPjXpAA7uq4l4zHxVdnZpRFtO/ualVI+UUQMouAIjdhE
|
||||||
|
7GNwvw5V0CegEq4mTEXQL3CPpq1tptzinCVd+fbK4Ow4ZmmEs3KrUzTPZAs3eVzIYN5K4oFulbIb
|
||||||
|
5c6vikn5C1KlHMb/M1X0fgI3BSuB9oAP17gCI52vbY8LFXGoQmlE/b6AxsHUDogWuIuFaQgquEw2
|
||||||
|
/wU51P9tzlkaJq3hwKf2aYgEhf1IRYKQPShLJvpOIVbP9i5LkmWETESVxJWpFXtEDgkb6hq4qvd2
|
||||||
|
D0UQ6qaaZGXA4E7Gn/ueZdAo1E1OOd+cGlLsvTYH/unOxyYzKOXWYdPQ5PYvRKzYVe16szzfe8uK
|
||||||
|
6IlZm9XIswKYlbaCVpb2rynCObdaW7HmNF5u5sKBF+c1hsGiIUrhvgfpP7D/UeEz+2VCb6hDvg+1
|
||||||
|
FcGHBk0Mwgai+pJtPJAukHZwBI2THbTBpElZ02atk7ZavllfcKdb8D1hbC3ZWfx9TmMXzZnLzsnF
|
||||||
|
izR2ZmHH1nZsoanBsydTFIbG+UHGOMZ80ip/deKje+DoLbjfnzAlTTDBNyWBofUcmDyA5LcczdKN
|
||||||
|
vwAAAP//AwBQSwMEFAAGAAgAAAAhAA3RkJ+2AAAAGwEAACcAAAB0aGVtZS90aGVtZS9fcmVscy90
|
||||||
|
aGVtZU1hbmFnZXIueG1sLnJlbHOEj00KwjAUhPeCdwhvb9O6EJEm3YjQrdQDhOQ1DTY/JFHs7Q2u
|
||||||
|
LAguh2G+mWm7l53JE2My3jFoqhoIOumVcZrBbbjsjkBSFk6J2TtksGCCjm837RVnkUsoTSYkUigu
|
||||||
|
MZhyDidKk5zQilT5gK44o49W5CKjpkHIu9BI93V9oPGbAXzFJL1iEHvVABmWUJr/s/04GolnLx8W
|
||||||
|
Xf5RQXPZhQUoosbM4CObqkwEylu6usTfAAAA//8DAFBLAQItABQABgAIAAAAIQCCirwT+gAAABwC
|
||||||
|
AAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0AFAAGAAgAAAAhAKXW
|
||||||
|
p+fAAAAANgEAAAsAAAAAAAAAAAAAAAAAKwEAAF9yZWxzLy5yZWxzUEsBAi0AFAAGAAgAAAAhAGt5
|
||||||
|
lhaDAAAAigAAABwAAAAAAAAAAAAAAAAAFAIAAHRoZW1lL3RoZW1lL3RoZW1lTWFuYWdlci54bWxQ
|
||||||
|
SwECLQAUAAYACAAAACEAlrWt4pYGAABQGwAAFgAAAAAAAAAAAAAAAADRAgAAdGhlbWUvdGhlbWUv
|
||||||
|
dGhlbWUxLnhtbFBLAQItABQABgAIAAAAIQAN0ZCftgAAABsBAAAnAAAAAAAAAAAAAAAAAJsJAAB0
|
||||||
|
aGVtZS90aGVtZS9fcmVscy90aGVtZU1hbmFnZXIueG1sLnJlbHNQSwUGAAAAAAUABQBdAQAAlgoA
|
||||||
|
AAAA
|
||||||
|
|
||||||
|
------=_NextPart_01CF5AE5.5C24CD00
|
||||||
|
Content-Location: file:///C:/D16BB227/testing_files/colorschememapping.xml
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Content-Type: text/xml
|
||||||
|
|
||||||
|
<?xml version=3D"1.0" encoding=3D"UTF-8" standalone=3D"yes"?>
|
||||||
|
<a:clrMap xmlns:a=3D"http://schemas.openxmlformats.org/drawingml/2006/main"=
|
||||||
|
bg1=3D"lt1" tx1=3D"dk1" bg2=3D"lt2" tx2=3D"dk2" accent1=3D"accent1" accent=
|
||||||
|
2=3D"accent2" accent3=3D"accent3" accent4=3D"accent4" accent5=3D"accent5" a=
|
||||||
|
ccent6=3D"accent6" hlink=3D"hlink" folHlink=3D"folHlink"/>
|
||||||
|
------=_NextPart_01CF5AE5.5C24CD00
|
||||||
|
Content-Location: file:///C:/D16BB227/testing_files/filelist.xml
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Content-Type: text/xml; charset="utf-8"
|
||||||
|
|
||||||
|
<xml xmlns:o=3D"urn:schemas-microsoft-com:office:office">
|
||||||
|
<o:MainFile HRef=3D"../testing.htm"/>
|
||||||
|
<o:File HRef=3D"themedata.thmx"/>
|
||||||
|
<o:File HRef=3D"colorschememapping.xml"/>
|
||||||
|
<o:File HRef=3D"filelist.xml"/>
|
||||||
|
</xml>
|
||||||
|
------=_NextPart_01CF5AE5.5C24CD00--
|
|
@ -4,7 +4,13 @@ from langchain.schema import Document as LangchainDocument
|
||||||
from llama_index.node_parser import SimpleNodeParser
|
from llama_index.node_parser import SimpleNodeParser
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.loaders import AutoReader, DocxReader, HtmlReader, UnstructuredReader
|
from kotaemon.loaders import (
|
||||||
|
AutoReader,
|
||||||
|
DocxReader,
|
||||||
|
HtmlReader,
|
||||||
|
MhtmlReader,
|
||||||
|
UnstructuredReader,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_docx_reader():
|
def test_docx_reader():
|
||||||
|
@ -61,3 +67,12 @@ def test_unstructured_pdf_reader():
|
||||||
documents = reader.load_data(input_path, split_documents=True)
|
documents = reader.load_data(input_path, split_documents=True)
|
||||||
# check document reader output
|
# check document reader output
|
||||||
assert len(documents) == 1
|
assert len(documents) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_mhtml_reader():
|
||||||
|
reader = MhtmlReader()
|
||||||
|
input_path = Path(__file__).parent / "resources" / "dummy.mhtml"
|
||||||
|
docs = reader.load_data(input_path)
|
||||||
|
|
||||||
|
assert len(docs) == 1
|
||||||
|
assert docs[0].text.startswith("This is a test")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user