Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
T
traffic-front
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangxiaoming
traffic-front
Commits
52ba18e3
Commit
52ba18e3
authored
Apr 26, 2018
by
eddie.woo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
660c9f0d
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
341 additions
and
0 deletions
+341
-0
java-axp-1.0-SNAPSHOT.jar
atms-api/lib/java-axp-1.0-SNAPSHOT.jar
+0
-0
pom.xml
atms-api/pom.xml
+18
-0
PDFTest.java
atms-api/src/test/java/pwc/taxtech/atms/common/PDFTest.java
+34
-0
POITest.java
atms-api/src/test/java/pwc/taxtech/atms/common/POITest.java
+79
-0
TikaTest.java
atms-api/src/test/java/pwc/taxtech/atms/common/TikaTest.java
+38
-0
TmpFunction.java
...pi/src/test/java/pwc/taxtech/atms/common/TmpFunction.java
+34
-0
TmpXPSParser.java
...i/src/test/java/pwc/taxtech/atms/common/TmpXPSParser.java
+138
-0
No files found.
atms-api/lib/java-axp-1.0-SNAPSHOT.jar
0 → 100644
View file @
52ba18e3
File added
atms-api/pom.xml
View file @
52ba18e3
...
...
@@ -278,6 +278,24 @@
<version>
3.9.1
</version>
<!-- Stay on 1.7.1 to support Java 6 -->
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.apache.tika
</groupId>
<artifactId>
tika-core
</artifactId>
<version>
1.17
</version>
</dependency>
<dependency>
<groupId>
org.apache.tika
</groupId>
<artifactId>
tika-parsers
</artifactId>
<version>
1.17
</version>
</dependency>
<dependency>
<groupId>
com.javaaxp
</groupId>
<artifactId>
java-axp
</artifactId>
<version>
1.0
</version>
<scope>
system
</scope>
<systemPath>
${basedir}/lib/java-axp-1.0-SNAPSHOT.jar
</systemPath>
</dependency>
</dependencies>
<build>
<finalName>
atms-api
</finalName>
...
...
atms-api/src/test/java/pwc/taxtech/atms/common/PDFTest.java
0 → 100644
View file @
52ba18e3
package
pwc
.
taxtech
.
atms
.
common
;
import
org.apache.tika.metadata.Metadata
;
import
org.apache.tika.parser.ParseContext
;
import
org.apache.tika.parser.pdf.PDFParser
;
import
org.apache.tika.parser.pdf.PDFParserConfig
;
import
org.apache.tika.sax.BodyContentHandler
;
import
java.io.File
;
import
java.io.FileInputStream
;
public
class
PDFTest
{
public
static
void
main
(
String
[]
args
)
{
try
{
BodyContentHandler
handler
=
new
BodyContentHandler
();
Metadata
metadata
=
new
Metadata
();
FileInputStream
inputstream
=
new
FileInputStream
(
new
File
(
"C:\\woo\\test.pdf"
));
ParseContext
pcontext
=
new
ParseContext
();
PDFParser
pdfparser
=
new
PDFParser
();
pdfparser
.
getPDFParserConfig
().
setSortByPosition
(
true
);
//参见底层实现
// pdfparser.getPDFParserConfig().setEnableAutoSpace(false);
// pdfparser.getPDFParserConfig().setAverageCharTolerance(1f);
// pdfparser.getPDFParserConfig().setSpacingTolerance(20f);
pdfparser
.
parse
(
inputstream
,
handler
,
metadata
,
pcontext
);
System
.
out
.
println
(
handler
.
toString
());
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
atms-api/src/test/java/pwc/taxtech/atms/common/POITest.java
0 → 100644
View file @
52ba18e3
package
pwc
.
taxtech
.
atms
.
common
;
import
org.apache.poi.openxml4j.exceptions.InvalidFormatException
;
import
org.apache.poi.ss.formula.eval.ValueEval
;
import
org.apache.poi.ss.formula.functions.FreeRefFunction
;
import
org.apache.poi.ss.formula.udf.AggregatingUDFFinder
;
import
org.apache.poi.ss.formula.udf.DefaultUDFFinder
;
import
org.apache.poi.ss.formula.udf.UDFFinder
;
import
org.apache.poi.ss.usermodel.*
;
import
org.apache.poi.ss.util.CellReference
;
import
java.io.*
;
public
class
POITest
{
public
static
void
main
(
String
[]
args
)
{
File
workbookFile
=
new
File
(
"C:\\source\\test - Copy.xlsx"
);
try
{
FileInputStream
fis
=
new
FileInputStream
(
workbookFile
);
Workbook
workbook
=
WorkbookFactory
.
create
(
fis
);
String
[]
functionNames
=
{
"TmpFunction"
};
FreeRefFunction
[]
functionImpls
=
{
new
TmpFunction
()};
UDFFinder
udfs
=
new
DefaultUDFFinder
(
functionNames
,
functionImpls
);
UDFFinder
udfToolpack
=
new
AggregatingUDFFinder
(
udfs
);
workbook
.
addToolPack
(
udfToolpack
);
FormulaEvaluator
evaluator
=
workbook
.
getCreationHelper
().
createFormulaEvaluator
();
int
sheetNum
=
workbook
.
getNumberOfSheets
();
Sheet
st1
=
workbook
.
getSheetAt
(
0
);
st1
.
getRow
(
1
).
getCell
(
0
).
setCellType
(
CellType
.
NUMERIC
);
st1
.
getRow
(
1
).
getCell
(
0
).
setCellValue
(
15
);
evaluator
.
evaluateAll
();
for
(
int
i
=
0
;
i
<
sheetNum
;
i
++)
{
Sheet
tmpSheet
=
workbook
.
getSheetAt
(
i
);
for
(
int
r
=
tmpSheet
.
getFirstRowNum
();
r
<=
tmpSheet
.
getLastRowNum
();
r
++)
{
Row
row
=
tmpSheet
.
getRow
(
r
);
if
(
null
==
row
)
{
continue
;
}
for
(
int
c
=
row
.
getFirstCellNum
();
c
<=
row
.
getLastCellNum
();
c
++)
{
System
.
out
.
println
(
"row :"
+
r
+
" cell: "
+
c
);
Cell
tmp
=
row
.
getCell
(
c
);
if
(
null
!=
tmp
&&
tmp
.
getCellTypeEnum
().
equals
(
CellType
.
FORMULA
))
{
// CellValue v = evaluator.evaluate(tmp);
tmp
.
setCellType
(
CellType
.
NUMERIC
);
tmp
.
setCellValue
(
tmp
.
getNumericCellValue
());
}
}
}
}
FileOutputStream
excelFileOutPutStream
=
new
FileOutputStream
(
"C:\\source\\test - Copy.xlsx"
);
workbook
.
write
(
excelFileOutPutStream
);
excelFileOutPutStream
.
flush
();
excelFileOutPutStream
.
close
();
}
catch
(
FileNotFoundException
e
)
{
e
.
printStackTrace
();
}
catch
(
InvalidFormatException
e
)
{
e
.
printStackTrace
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
}
atms-api/src/test/java/pwc/taxtech/atms/common/TikaTest.java
0 → 100644
View file @
52ba18e3
package
pwc
.
taxtech
.
atms
.
common
;
import
org.apache.tika.metadata.Metadata
;
import
org.apache.tika.parser.ParseContext
;
import
org.apache.tika.sax.BodyContentHandler
;
import
org.xml.sax.ContentHandler
;
import
java.io.*
;
public
class
TikaTest
{
public
static
void
main
(
String
[]
args
)
{
try
{
// Tika tika = new Tika();
File
xpsFile
=
new
File
(
"C:\\woo\\海关稽核结果.xps"
);
InputStream
inputStream
=
new
FileInputStream
(
xpsFile
);
// String FileName = xpsFile.getName();
// Metadata metadata = new Metadata();
// if (FileName != null && FileName.length() > 0)
// metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
// String MimeType = tika.detect(inputStream, metadata);
//
// metadata.add(Metadata.CONTENT_TYPE, MimeType);
// inputStream.close();
// inputStream = new FileInputStream(xpsFile);
// Reader reader = tika.parse(inputStream, metadata);
// String content = IOUtils.toString(reader);
// inputStream.close();
Metadata
metadata
=
new
Metadata
();
ContentHandler
handler
=
new
BodyContentHandler
();
new
TmpXPSParser
().
parse
(
inputStream
,
handler
,
metadata
,
new
ParseContext
());
String
content
=
handler
.
toString
();
System
.
out
.
println
(
content
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
atms-api/src/test/java/pwc/taxtech/atms/common/TmpFunction.java
0 → 100644
View file @
52ba18e3
package
pwc
.
taxtech
.
atms
.
common
;
import
org.apache.poi.ss.formula.OperationEvaluationContext
;
import
org.apache.poi.ss.formula.eval.*
;
import
org.apache.poi.ss.formula.functions.FreeRefFunction
;
import
org.apache.poi.ss.util.CellReference
;
public
class
TmpFunction
implements
FreeRefFunction
{
@Override
public
ValueEval
evaluate
(
ValueEval
[]
valueEvals
,
OperationEvaluationContext
operationEvaluationContext
)
{
// if (valueEvals.length != 3) {
// return ErrorEval.VALUE_INVALID;
// }
try
{
ValueEval
v1
=
OperandResolver
.
getSingleValue
(
valueEvals
[
0
],
operationEvaluationContext
.
getRowIndex
(),
operationEvaluationContext
.
getColumnIndex
());
ValueEval
v2
=
OperandResolver
.
getSingleValue
(
valueEvals
[
1
],
operationEvaluationContext
.
getRowIndex
(),
operationEvaluationContext
.
getColumnIndex
());
String
val1
=
OperandResolver
.
coerceValueToString
(
v1
);
int
val2
=
OperandResolver
.
coerceValueToInt
(
v2
);
CellReference
reference
=
new
CellReference
(
val1
);
return
new
NumberEval
(
operationEvaluationContext
.
getWorkbook
().
getSheet
(
0
)
.
getCell
(
reference
.
getRow
(),
reference
.
getCol
()).
getNumericCellValue
()
+
val2
);
}
catch
(
EvaluationException
e
)
{
e
.
printStackTrace
();
}
return
null
;
}
}
atms-api/src/test/java/pwc/taxtech/atms/common/TmpXPSParser.java
0 → 100644
View file @
52ba18e3
package
pwc
.
taxtech
.
atms
.
common
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Set
;
import
javaaxp.core.service.IXPSAccess
;
import
javaaxp.core.service.IXPSPageAccess
;
import
javaaxp.core.service.XPSError
;
import
javaaxp.core.service.impl.XPSServiceImpl
;
import
javaaxp.core.service.impl.document.jaxb.CTCanvas
;
import
javaaxp.core.service.impl.document.jaxb.CTGlyphs
;
import
javaaxp.core.service.impl.document.jaxb.CTPath
;
import
javaaxp.core.service.model.document.page.IFixedPage
;
import
org.apache.tika.exception.TikaException
;
import
org.apache.tika.metadata.Metadata
;
import
org.apache.tika.mime.MediaType
;
import
org.apache.tika.parser.ParseContext
;
import
org.apache.tika.parser.Parser
;
import
org.apache.tika.sax.XHTMLContentHandler
;
import
org.xml.sax.ContentHandler
;
import
org.xml.sax.SAXException
;
public
class
TmpXPSParser
implements
Parser
{
private
double
currentXPosition
=
0
;
/**
*
*/
private
static
final
long
serialVersionUID
=
-
3528366722867144747L
;
private
static
final
Set
<
MediaType
>
SUPPORTED_TYPES
=
Collections
.
singleton
(
MediaType
.
application
(
"vnd.ms-xpsdocument"
));
private
static
final
String
XPS_MIME_TYPE
=
"application/vnd.ms-xpsdocument"
;
private
XHTMLContentHandler
fileXHTML
;
public
Set
<
MediaType
>
getSupportedTypes
(
ParseContext
context
)
{
return
SUPPORTED_TYPES
;
}
public
void
parse
(
InputStream
stream
,
ContentHandler
handler
,
Metadata
metadata
,
ParseContext
context
)
throws
IOException
,
SAXException
,
TikaException
{
metadata
.
set
(
Metadata
.
CONTENT_TYPE
,
XPS_MIME_TYPE
);
fileXHTML
=
new
XHTMLContentHandler
(
handler
,
metadata
);
try
{
parseXPS
(
stream
);
}
catch
(
XPSError
e
)
{
throw
new
IOException
(
e
);
}
stream
.
close
();
}
private
void
parseXPS
(
InputStream
inputStream
)
throws
XPSError
,
SAXException
{
IXPSAccess
xpsAccess
=
XPSServiceImpl
.
getInstance
().
getXPSAccess
(
inputStream
);
xhtmlStartDocument
();
int
firstDocNum
=
xpsAccess
.
getDocumentAccess
().
getFirstDocNum
();
int
lastDocNum
=
xpsAccess
.
getDocumentAccess
().
getLastDocNum
();
for
(
int
i
=
firstDocNum
;
i
<=
lastDocNum
;
i
++)
{
IXPSPageAccess
xpsPageAccess
=
xpsAccess
.
getPageAccess
(
i
);
int
firstPageNum
=
xpsPageAccess
.
getFirstPageNum
();
int
lastPageNum
=
xpsPageAccess
.
getLastPageNum
();
for
(
int
j
=
firstPageNum
;
j
<=
lastPageNum
;
j
++)
{
IFixedPage
fixedPage
=
xpsPageAccess
.
getPage
(
j
);
parseObjs
(
fixedPage
.
getPathOrGlyphsOrCanvas
());
}
}
xhtmlEndDocument
();
}
private
void
parseObjs
(
List
<
Object
>
objs
)
throws
XPSError
,
SAXException
{
for
(
Object
o
:
objs
)
parseObj
(
o
);
}
private
void
parseObj
(
Object
xpsObj
)
throws
XPSError
,
SAXException
{
if
(
xpsObj
instanceof
CTCanvas
)
{
CTCanvas
c
=
(
CTCanvas
)
xpsObj
;
xhtmlStartCanvas
();
parseObjs
(
c
.
getPathOrGlyphsOrCanvas
());
xhtmlEndCanvas
();
}
else
if
(
xpsObj
instanceof
CTGlyphs
)
{
CTGlyphs
c
=
(
CTGlyphs
)
xpsObj
;
if
(
c
.
getOriginX
()
<
currentXPosition
)
{
fileXHTML
.
startElement
(
"div"
);
fileXHTML
.
characters
(
" "
);
fileXHTML
.
endElement
(
"div"
);
}
String
text
=
c
.
getUnicodeString
();
xhtmlParagraph
(
text
);
currentXPosition
=
c
.
getOriginX
();
}
else
if
(
xpsObj
instanceof
CTPath
)
{
}
else
{
System
.
out
.
println
(
"Unhandled type : "
+
xpsObj
.
getClass
().
getCanonicalName
());
}
}
private
void
xhtmlStartDocument
()
throws
SAXException
{
fileXHTML
.
startDocument
();
}
private
void
xhtmlEndDocument
()
throws
SAXException
{
fileXHTML
.
endDocument
();
}
private
void
xhtmlStartCanvas
()
throws
SAXException
{
fileXHTML
.
startElement
(
"div"
);
}
private
void
xhtmlEndCanvas
()
throws
SAXException
{
fileXHTML
.
endElement
(
"div"
);
}
private
void
xhtmlParagraph
(
String
text
)
throws
SAXException
{
fileXHTML
.
startElement
(
"span"
);
fileXHTML
.
characters
(
text
);
fileXHTML
.
endElement
(
"span"
);
}
/**
* @deprecated This method will be removed in Apache Tika 1.0.
*/
public
void
parse
(
InputStream
stream
,
ContentHandler
handler
,
Metadata
metadata
)
throws
IOException
,
SAXException
,
TikaException
{
parse
(
stream
,
handler
,
metadata
,
new
ParseContext
());
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment