Commit ccd8c4b7 by mahaisong

fix:完毕

parent 96dc661a
This source diff could not be displayed because it is too large. You can view the blob instead.
<?xml version="1.0" encoding="utf-8" ?>
<log4net>
<appender name="errorAppender" type="log4net.Appender.RollingFileAppender">
<filter type="log4net.Filter.LevelMatchFilter">
<levelToMatch value="ERROR" />
</filter>
<filter type="log4net.Filter.DenyAllFilter" />
<file value="Logs\err.log" />
<encoding value="utf-8"/>
<preserveLogFileNameExtension value="true" />
<appendToFile value="true" />
<rollingStyle value="Date" />
<datePattern value="yyyyMMdd" />
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="%date [%thread] %-5level %logger - %message%newline" />
</layout>
</appender>
<appender name="infoAppender" type="log4net.Appender.RollingFileAppender">
<filter type="log4net.Filter.LevelMatchFilter">
<levelToMatch value="INFO" />
</filter>
<filter type="log4net.Filter.DenyAllFilter" />
<file value="Logs\info.log" />
<encoding value="utf-8"/>
<preserveLogFileNameExtension value="true" />
<appendToFile value="true" />
<rollingStyle value="Date" />
<datePattern value="yyyyMMdd" />
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="%date [%thread] %-5level %logger - %message%newline" />
</layout>
</appender>
<appender name="debugAppender" type="log4net.Appender.RollingFileAppender">
<filter type="log4net.Filter.LevelMatchFilter">
<levelToMatch value="DEBUG" />
</filter>
<filter type="log4net.Filter.DenyAllFilter" />
<file value="Logs\debug.log" />
<encoding value="utf-8"/>
<preserveLogFileNameExtension value="true" />
<appendToFile value="true" />
<rollingStyle value="Date" />
<datePattern value="yyyyMMdd" />
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="%date [%thread] %-5level %logger - %message%newline" />
</layout>
</appender>
<appender name="perfAppender" type="log4net.Appender.RollingFileAppender">
<filter type="log4net.Filter.LevelMatchFilter">
<levelToMatch value="INFO" />
</filter>
<filter type="log4net.Filter.DenyAllFilter" />
<file value="Logs\perf.log" />
<encoding value="utf-8"/>
<preserveLogFileNameExtension value="true" />
<appendToFile value="true" />
<rollingStyle value="Date" />
<datePattern value="yyyyMMdd" />
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="%date %logger - %message%newline" />
</layout>
</appender>
<root>
<level value="ALL" />
<appender-ref ref="errorAppender" />
<appender-ref ref="infoAppender" />
<appender-ref ref="debugAppender" />
</root>
<logger name="Performance" additivity="false">
<level value="ALL" />
<appender-ref ref="perfAppender" />
</logger>
</log4net>
\ No newline at end of file
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<configSections>
<section name="log4net" type="log4net.Config.Log4NetConfigurationSectionHandler, log4net" />
<sectionGroup name="common">
<section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
</sectionGroup>
</configSections>
<connectionStrings>
<add name="ESDatabase" connectionString="host=mech.palaspom.com|tank.palaspom.com;port=19235;defaultIndex=palas" />
</connectionStrings>
<common>
<logging>
<factoryAdapter type="Common.Logging.Log4Net.Log4NetLoggerFactoryAdapter, Common.Logging.Log4Net208">
<!--FILE
FILE-WATCH log4net节点在其他独立文件内部。
INLINE log4net节点在配置app.config内部。
EXTERNAL-->
<arg key="configType" value="FILE-WATCH" />
<arg key="configFile" value="~/Config/log4net.config" />
</factoryAdapter>
</logging>
</common>
<appSettings>
</appSettings>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
</startup>
</configuration>
\ No newline at end of file
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<configSections>
<section name="log4net" type="log4net.Config.Log4NetConfigurationSectionHandler, log4net" />
<sectionGroup name="common">
<section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
</sectionGroup>
</configSections>
<connectionStrings>
<add name="ESDatabase" connectionString="host=mech.palaspom.com|tank.palaspom.com;port=19235;defaultIndex=palas" />
</connectionStrings>
<common>
<logging>
<factoryAdapter type="Common.Logging.Log4Net.Log4NetLoggerFactoryAdapter, Common.Logging.Log4Net208">
<!--FILE
FILE-WATCH log4net节点在其他独立文件内部。
INLINE log4net节点在配置app.config内部。
EXTERNAL-->
<arg key="configType" value="FILE-WATCH" />
<arg key="configFile" value="~/Config/log4net.config" />
</factoryAdapter>
</logging>
</common>
<appSettings>
</appSettings>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
</startup>
</configuration>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity version="1.0.0.0" name="MyApplication.app"/>
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v2">
<security>
<requestedPrivileges xmlns="urn:schemas-microsoft-com:asm.v3">
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
</requestedPrivileges>
</security>
</trustInfo>
</assembly>
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
2018-05-07 13:34:22,122 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:开始执行:C:\Users\admin\Desktop\hive_export.csv
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:1.1开始: 读取并统计行数。
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:1.2初始化分块:单块10240B,文件共分为44979块。
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:1.3处理中:并行分块处理此文件…………
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:1.4处理完毕:
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: 处理行数:共8224718行!与文件行数相同,标题列1行不算。
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: 处理完成用时:6270毫秒!
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: 统计行数后:共32013行!
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:2.1开始: 并行调用ES,返回内容。
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:2.2初始化线程数:
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: 共360个线程并读取执行!
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: CPU单核心并行读取ES线程数:90个线程;CPU核心数:4
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:2.3处理中:并行调用ES………………注意本机CPU,适当增减单个CPU线程数
2018-05-07 13:34:22,123 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:2.4处理完毕:
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: ES全部处理完成用时:693404毫秒!
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: ES中找到的条数有:24719条
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: ES中找不到的条数有:7294条
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:3.1开始:写入JSON文件:
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:3.2初始化文件数量: 1个。
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:3.3并行写入文件中。
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:3.4写入文件完毕:
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2: 写入文件完成用时:1195毫秒!
2018-05-07 13:34:22,124 [1] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_2:完毕:程序执行完成。
2018-05-07 13:51:20,119 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:开始执行(2018-05-07 13:37:57.043):C:\Users\admin\Desktop\hive_export.csv
2018-05-07 13:51:20,119 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:1.1开始: 读取并统计行数。
2018-05-07 13:51:20,119 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:1.2初始化分块:单块10240B,文件共分为44979块。
2018-05-07 13:51:20,119 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:1.3处理中:并行分块处理此文件…………
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:1.4处理完毕:
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: 处理行数:共8224718行!与文件行数相同,标题列1行不算。
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: 处理完成用时:6953毫秒!
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: 统计行数后:共32013行!
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:2.1开始(2018-05-07 13:38:04.024): 并行调用ES,返回内容。
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:2.2初始化线程数:
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: 共240个线程并读取执行!
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: CPU单核心并行读取ES线程数:60个线程;CPU核心数:4
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:2.3处理中:并行调用ES………………注意本机CPU,适当增减单个CPU线程数
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:2.4处理完毕:
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: ES全部处理完成用时:769390毫秒!
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: ES中找到的条数有:24797条
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: ES中找不到的条数有:7216条
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:3.1开始(2018-05-07 13:50:54.239):写入JSON文件:
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:3.2初始化文件数量: 1个。
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:3.3并行写入文件中。
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:3.4写入文件完毕:
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2: 写入文件完成用时:989毫秒!
2018-05-07 13:51:20,120 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__16_2:完毕:程序执行完成。总用时:771208.3834毫秒
2018-05-07 14:52:29,956 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:开始执行(2018-05-07 14:38:54.460):C:\Users\admin\Desktop\hive_export.csv
2018-05-07 14:52:29,956 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:1.1开始: 读取并统计行数。
2018-05-07 14:52:29,956 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:1.2初始化分块:单块10240B,文件共分为44979块。
2018-05-07 14:52:29,956 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:1.3处理中:并行分块处理此文件…………
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:1.4处理完毕:
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: 处理行数:共8224718行!与文件行数相同,标题列1行不算。
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: 处理完成用时:7990毫秒!
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: 统计行数后:共32013行!
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:2.1开始(2018-05-07 14:39:02.481): 并行调用ES,返回内容。
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:2.2初始化线程数:
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: 共120个线程并读取执行!
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: CPU单核心并行读取ES线程数:30个线程;CPU核心数:4
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:2.3处理中:并行调用ES………………注意本机CPU,适当增减单个CPU线程数
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:2.4处理完毕:
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: ES全部处理完成用时:775546毫秒!
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: ES中找到的条数有:25042条
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: ES中找不到的条数有:6971条
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:3.1开始(2018-05-07 14:51:58.357):写入JSON文件:
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:3.2初始化文件数量: 3个。
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:3.3并行写入文件中。
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:3.4写入文件完毕:
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3: 写入文件完成用时:396毫秒!
2018-05-07 14:52:29,957 [9] INFO loginfo - CsvCount_ES.Form1.<ES_Complete>b__15_3:完毕:程序执行完成。总用时:776275.4973毫秒
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
//要求通过newsid=itemid(ES中的)一一对应。
//要求通过newsid=itemid(ES中的)一一对应。
//要求取得以下字段,并返回统一包装的JSON结构。(AnalyzeData只有1个,里面的数据是common+reader(两者其中有值,值也是一样的,任取其一。可能存在相互补充的情况,合并为1个。))
[
{
"ItemID": "文章ID",
"ClickCount": "点击次数",
"Url": "文章Url",
"CleanTitle": "标题",
"CleanText": "正文",
"PubDate": "页面显示发布时间",
"MediaName": "21世纪经济报道",
"DuplicationID": "相似转载ID(早先入库的相似文章ID,第一篇该字段为空,其他均为第一篇的ID)",
"AnalyzeData语义分析结果字段": {
"MarketIDs": " 品类/市场common编号reader编号:有值任取其一",
"BlockIDs": "主题/板块编号common编号,reader编号:有值任取其一",
"StockIDs": "股票编号common编号,reader编号:有值任取其一",
"IG": ""
}
}
]
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Config\log4net.config
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\OutPut\模板.json
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\CsvCount_ES.exe.config
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\CsvCount_ES.exe
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\CsvCount_ES.pdb
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Core.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Log4Net208.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Elasticsearch.Net.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\log4net.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\MongoDB.Bson.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Nest.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Newtonsoft.Json.dll
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.pdb
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Core.pdb
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Core.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Log4Net208.pdb
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Common.Logging.Log4Net208.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Elasticsearch.Net.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\log4net.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\MongoDB.Bson.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Nest.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\bin\Release\Newtonsoft.Json.xml
D:\smallproject\6.CsvCount_ES\CsvCount_ES\obj\Release\CsvCount_ES.Form1.resources
D:\smallproject\6.CsvCount_ES\CsvCount_ES\obj\Release\CsvCount_ES.Properties.Resources.resources
D:\smallproject\6.CsvCount_ES\CsvCount_ES\obj\Release\CsvCount_ES.csproj.GenerateResource.Cache
D:\smallproject\6.CsvCount_ES\CsvCount_ES\obj\Release\CsvCount_ES.exe
D:\smallproject\6.CsvCount_ES\CsvCount_ES\obj\Release\CsvCount_ES.pdb
1.建议网络带宽最低1M以上。(可以配置线程数30以上)
1.建议网络带宽最低1M以上。(可以配置线程数30以上)
2.建议CPU最低4核以上。(可以配置线程数为60~100以上)
3.建议原始文件小于1G的,输出1个文件即可。
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment