This commit is contained in:
SunYanCN 2018-10-08 16:21:20 +08:00
parent dbf49b6b6c
commit bf15de4119
11 changed files with 5103 additions and 0 deletions

11
HOSVD.py Normal file
View File

@ -0,0 +1,11 @@
def hosvd(X):
U = [None for _ in range(X.ndims())]
dims = X.ndims()
S = X
for d in range(dims):
C = base.unfold(X,d) #mode n分解
U1,S1,V1 = np.linalg.svd(C) #SVD分解
S = base.tensor_times_mat(S, U1.T,d) #迭代求解核心张量
U[d] = U1
core = S
return U,core #返回伴随矩阵和核心张量

17
Makefile Normal file
View File

@ -0,0 +1,17 @@
out/slides.pdf: slides.tex
# you need to run pdflatex command twice if you're having issues
# getting TOC or top progress bar to show up in resulting pdf
mkdir -p out
pdflatex --output-directory=out slides.tex
view-xpdf: out/slides.pdf
xpdf out/slides.pdf & disown
view-okular: out/slides.pdf
okular out/slides.pdf & disown
view-acroread: out/slides.pdf
acroread out/slides.pdf & disown
clean:
rm -rf out

19
README.md Normal file
View File

@ -0,0 +1,19 @@
# Beamer Template
这是一个学术性的LaTeX Beamer模板优点是非常简洁适用于多公式多算法的应用场景缺点是不非常灵活。你也可以随意使用和修改这个模板而不用经过我的同意因为你本来就不需要。
# Usage
首先,你应当从[Github](https://github.com/YiZhiXiaoGuLI/Latex-Beamer-Template)上下载该模板确保你的电脑Windows上安装了Ctex中文套装和TexStudio编辑器不用担心费用两者均是开源软件使用Xelatex编译文档即可。
# Demo
敬请参阅`slides.pdf`.
# Others
这个模板是我参阅的一个南方科技大学的模板,原模板是英文的,我将其修改为中文并加入了常用的算法和表格等内容,在此感谢原作者。
# Me
如果你有任何问题或者有改进的意见也可以联系我sunyanqinyin@alinyin.com。

View File

@ -0,0 +1,17 @@
%\DefineNamedColor{named}{sustechgreen} {cmyk}{0,1,0.65,0.34}
\DefineNamedColor{named}{sustechdarkblue} {rgb}{0.13,0.28,0.53}
\DefineNamedColor{named}{sustechgreen} {rgb}{0.08,0.65,0.6}
\DefineNamedColor{named}{sustechblue} {rgb}{0.0,0.39,0.62}
\DefineNamedColor{named}{sustechpaleblue} {rgb}{0,0.56,0.83}
\DefineNamedColor{named}{sustechgold} {rgb}{0.87,0.7,0.36}
\DefineNamedColor{named}{darkgreen} {rgb}{0,0.16,0.14}
\mode<presentation>
\setbeamercolor{alerted text}{fg=green!80!yellow}
\setbeamercolor*{palette primary}{bg=sustechdarkblue,fg=white}
\setbeamercolor*{palette secondary}{fg=white,bg=sustechpaleblue}
\setbeamercolor*{palette tertiary}{fg=white,bg=sustechblue}
\setbeamercolor*{palette quaternary}{fg=white,bg=yellow}
\setbeamercolor*{structure}{fg=sustechdarkblue,bg=white}
\setbeamercolor{frametitle}{bg=sustechdarkblue,fg=white}
\mode<all>

259
build-daemon.py Normal file
View File

@ -0,0 +1,259 @@
#!/usr/bin/env python
"""Disk And Execution MONitor (Daemon)
Configurable daemon behaviors:
1.) The current working directory set to the "/" directory.
2.) The current file creation mode mask set to 0.
3.) Close all open files (1024).
4.) Redirect standard I/O streams to "/dev/null".
A failed call to fork() now raises an exception.
References:
1) Advanced Programming in the Unix Environment: W. Richard Stevens
2) Unix Programming Frequently Asked Questions:
http://www.erlenstar.demon.co.uk/unix/faq_toc.html
"""
__author__ = "Chad J. Schroeder"
__copyright__ = "Copyright (C) 2005 Chad J. Schroeder"
__revision__ = "$Id$"
__version__ = "0.2"
# Standard Python modules.
import os # Miscellaneous OS interfaces.
import time
import sys # System-specific parameters and functions.
# Default daemon parameters.
# File mode creation mask of the daemon.
UMASK = 0
# Default working directory for the daemon.
WORKDIR = os.getcwd()
# Default maximum for the number of available file descriptors.
MAXFD = 1024
PIDFILE = "build-daemon.pid"
# The standard I/O file descriptors are redirected to /dev/null by default.
if (hasattr(os, "devnull")):
REDIRECT_TO = os.devnull
else:
REDIRECT_TO = "/dev/null"
def create_daemon():
"""Detach a process from the controlling terminal and run it in the
background as a daemon.
"""
try:
# Fork a child process so the parent can exit. This returns control to
# the command-line or shell. It also guarantees that the child will not
# be a process group leader, since the child receives a new process ID
# and inherits the parent's process group ID. This step is required
# to insure that the next call to os.setsid is successful.
pid = os.fork()
except OSError, e:
raise Exception, "%s [%d]" % (e.strerror, e.errno)
if (pid == 0): # The first child.
# To become the session leader of this new session and the process group
# leader of the new process group, we call os.setsid(). The process is
# also guaranteed not to have a controlling terminal.
os.setsid()
# Is ignoring SIGHUP necessary?
#
# It's often suggested that the SIGHUP signal should be ignored before
# the second fork to avoid premature termination of the process. The
# reason is that when the first child terminates, all processes, e.g.
# the second child, in the orphaned group will be sent a SIGHUP.
#
# "However, as part of the session management system, there are exactly
# two cases where SIGHUP is sent on the death of a process:
#
# 1) When the process that dies is the session leader of a session that
# is attached to a terminal device, SIGHUP is sent to all processes
# in the foreground process group of that terminal device.
# 2) When the death of a process causes a process group to become
# orphaned, and one or more processes in the orphaned group are
# stopped, then SIGHUP and SIGCONT are sent to all members of the
# orphaned group." [2]
#
# The first case can be ignored since the child is guaranteed not to have
# a controlling terminal. The second case isn't so easy to dismiss.
# The process group is orphaned when the first child terminates and
# POSIX.1 requires that every STOPPED process in an orphaned process
# group be sent a SIGHUP signal followed by a SIGCONT signal. Since the
# second child is not STOPPED though, we can safely forego ignoring the
# SIGHUP signal. In any case, there are no ill-effects if it is ignored.
#
# import signal # Set handlers for asynchronous events.
# signal.signal(signal.SIGHUP, signal.SIG_IGN)
try:
# Fork a second child and exit immediately to prevent zombies. This
# causes the second child process to be orphaned, making the init
# process responsible for its cleanup. And, since the first child is
# a session leader without a controlling terminal, it's possible for
# it to acquire one by opening a terminal in the future (System V-
# based systems). This second fork guarantees that the child is no
# longer a session leader, preventing the daemon from ever acquiring
# a controlling terminal.
pid = os.fork() # Fork a second child.
except OSError, e:
raise Exception, "%s [%d]" % (e.strerror, e.errno)
if (pid == 0): # The second child.
# Since the current working directory may be a mounted filesystem, we
# avoid the issue of not being able to unmount the filesystem at
# shutdown time by changing it to the root directory.
os.chdir(WORKDIR)
# We probably don't want the file mode creation mask inherited from
# the parent, so we give the child complete control over permissions.
os.umask(UMASK)
else:
# exit() or _exit()? See below.
os._exit(0) # Exit parent (the first child) of the second child.
else:
# exit() or _exit()?
# _exit is like exit(), but it doesn't call any functions registered
# with atexit (and on_exit) or any registered signal handlers. It also
# closes any open file descriptors. Using exit() may cause all stdio
# streams to be flushed twice and any temporary files may be unexpectedly
# removed. It's therefore recommended that child branches of a fork()
# and the parent branch(es) of a daemon use _exit().
os._exit(0) # Exit parent of the first child.
# Close all open file descriptors. This prevents the child from keeping
# open any file descriptors inherited from the parent. There is a variety
# of methods to accomplish this task. Three are listed below.
#
# Try the system configuration variable, SC_OPEN_MAX, to obtain the maximum
# number of open file descriptors to close. If it doesn't exists, use
# the default value (configurable).
#
# try:
# maxfd = os.sysconf("SC_OPEN_MAX")
# except (AttributeError, ValueError):
# maxfd = MAXFD
#
# OR
#
# if (os.sysconf_names.has_key("SC_OPEN_MAX")):
# maxfd = os.sysconf("SC_OPEN_MAX")
# else:
# maxfd = MAXFD
#
# OR
#
# Use the getrlimit method to retrieve the maximum file descriptor number
# that can be opened by this process. If there is not limit on the
# resource, use the default value.
#
import resource # Resource usage information.
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if (maxfd == resource.RLIM_INFINITY):
maxfd = MAXFD
# Iterate through and close all file descriptors.
for fd in range(0, maxfd):
try:
os.close(fd)
except OSError: # ERROR, fd wasn't open to begin with (ignored)
pass
# Redirect the standard I/O file descriptors to the specified file. Since
# the daemon has no controlling terminal, most daemons redirect stdin,
# stdout, and stderr to /dev/null. This is done to prevent side-effects
# from reads and writes to the standard I/O file descriptors.
# This call to open is guaranteed to return the lowest file descriptor,
# which will be 0 (stdin), since it was closed above.
os.open(REDIRECT_TO, os.O_RDWR) # standard input (0)
# Duplicate standard input to standard output and standard error.
os.dup2(0, 1) # standard output (1)
os.dup2(0, 2) # standard error (2)
return(0)
def fork():
# The code, as is, will create a new file in the root directory, when
# executed with superuser privileges. The file will contain the following
# daemon related process parameters: return code, process ID, parent
# process group ID, session ID, user ID, effective user ID, real group ID,
# and the effective group ID. Notice the relationship between the daemon's
# process ID, process group ID, and its parent's process ID.
retcode = create_daemon()
procParams = """
[process info]
return_code = %s
process_id = %s
parent_process_id = %s
process_group_id = %s
session_id = %s
user_id = %s
effective_user_id = %s
real_group_id = %s
effective_group_id = %s
""" % (retcode, os.getpid(), os.getppid(), os.getpgrp(), os.getsid(0),
os.getuid(), os.geteuid(), os.getgid(), os.getegid())
open(PIDFILE, "w").write(procParams + "\n")
return retcode
def load_pid_file():
from ConfigParser import ConfigParser
cfg = ConfigParser()
cfg.read(PIDFILE)
pinfo = cfg._sections.get('process info')
for k,v in pinfo.items():
try:
pinfo[k] = int(v)
except:
pass
return pinfo
def run_daemon():
try:
while True:
os.system('make -s')
time.sleep(1)
except KeyboardInterrupt,e:
print 'shutting down...'
def main():
from optparse import OptionParser
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-f", "--fork", action="store_true", dest="fork",
default=False, help="launch daemon in background")
parser.add_option("-k", "--kill", action="store_true", dest="kill",
default=False, help="terminate ")
(options, args) = parser.parse_args()
if options.kill:
import signal
pids = load_pid_file()
pid = pids['process_id']
try:
os.kill(pid, signal.SIGTERM)
except os.error:
print 'pid %s doesnt exist' % pid
os.unlink(PIDFILE)
return
if options.fork:
retcode = fork()
run_daemon()
sys.exit(retcode)
run_daemon()
if __name__ == "__main__":
main()

671
figures/loss.pdf Normal file

File diff suppressed because one or more lines are too long

BIN
figures/sustech.pdf Normal file

Binary file not shown.

BIN
out/slides.pdf Normal file

Binary file not shown.

BIN
slides.pdf Normal file

Binary file not shown.

314
slides.tex Normal file
View File

@ -0,0 +1,314 @@
\documentclass[10pt,aspectratio=43,mathserif,table]{beamer}
%设置为 Beamer 文档类型,设置字体为 10pt长宽比为16:9数学字体为 serif 风格
\batchmode
\usepackage{graphicx}
\usepackage{animate}
\usepackage{hyperref}
%导入一些用到的宏包
\usepackage{amsmath,bm,amsfonts,amssymb,enumerate,epsfig,bbm,calc,color,ifthen,capt-of,multimedia,hyperref}
\usepackage{xeCJK} %导入中文包
\setCJKmainfont{SimHei} %字体采用黑体 Microsoft YaHei
\usetheme{Berlin} %主题
\usecolortheme{sustech} %主题颜色
\usepackage[ruled,linesnumbered]{algorithm2e}
\usepackage{fancybox}
\usepackage{xcolor}
\usepackage{times}
\usepackage{listings}
\usepackage{booktabs}
\usepackage{colortbl}
\newcommand{\Console}{Console}
\lstset{ %
backgroundcolor=\color{white}, % choose the background color
basicstyle=\footnotesize\rmfamily, % size of fonts used for the code
columns=fullflexible,
breaklines=true, % automatic line breaking only at whitespace
captionpos=b, % sets the caption-position to bottom
tabsize=4,
commentstyle=\color{mygreen}, % comment style
escapeinside={\%*}{*)}, % if you want to add LaTeX within your code
keywordstyle=\color{blue}, % keyword style
stringstyle=\color{mymauve}\ttfamily, % string literal style
numbers=left,
% frame=single,
rulesepcolor=\color{red!20!green!20!blue!20},
% identifierstyle=\color{red},
language=c
}
\setsansfont{Microsoft YaHei}
\setmainfont{Microsoft YaHei}
\definecolor{mygreen}{rgb}{0,0.6,0}
\definecolor{mymauve}{rgb}{0.58,0,0.82}
\definecolor{mygray}{gray}{.9}
\definecolor{mypink}{rgb}{.99,.91,.95}
\definecolor{mycyan}{cmyk}{.3,0,0,0}
%题目,作者,学校,日期
\title{Reinventing the Wheel: Publishing High-quality Slides}
\subtitle{\fontsize{9pt}{14pt}\textbf{利用公共网关的SMS生态系统的安全性描述}}
\author{答辩人: 李易峰 \newline \newline 指导老师: 吴亦凡教授}
\institute{\fontsize{8pt}{14pt}中北大学英雄与联盟工程学院}
\date{\today}
%学校Logo
%\pgfdeclareimage[height=0.5cm]{sustech-logo}{sustech-logo.pdf}
%\logo{\pgfuseimage{sustech-logo}\hspace*{0.3cm}}
\AtBeginSection[]
{
\begin{frame}<beamer>
\frametitle{\textbf{目录}}
\tableofcontents[currentsection]
\end{frame}
}
\beamerdefaultoverlayspecification{<+->}
% -----------------------------------------------------------------------------
\begin{document}
% -----------------------------------------------------------------------------
\frame{\titlepage}
\section[目录]{} %目录
\begin{frame}{目录}
\tableofcontents
\end{frame}
% -----------------------------------------------------------------------------
\section{引言} %引言
\subsection{研究背景}
\begin{frame}{研究背景}
\begin{columns}[T] % align columns
\begin{column}<0->{.40\textwidth}
\begin{figure}[thpb]
\centering
\resizebox{1\linewidth}{!}{
\includegraphics{figures/sustech.pdf}
}
%\includegraphics[scale=1.0]{figurefile}
\caption{SUSTech Campus}
\label{fig:campus}
\end{figure}
\end{column}%
\hfill%
\begin{column}<0->{.65\textwidth}
\begin{itemize}
\item<1-> 短信息(SMS)成为现代通讯的重要组成部分
\begin{itemize}
\item<1-> 很多组织或网站使用短信息作为身份验证的辅助通道
\end{itemize}
\item<2-> 现代短消息的发送,在抵达终端之前不接触蜂窝网络
\begin{itemize}
\item<2-> 短信息(SMS)成为现代通讯的重要组成部分
\end{itemize}
\end{itemize}
\end{column}%
\end{columns}
\end{frame}
\subsection{主要工作}
\begin{frame}{主要工作}
完成这项工作需要如下步骤
\begin{block}{具体步骤}
\begin{itemize}
\item<0-> 对SMS数据进行迄今为止最大的挖掘分析
\item<0-> 评估良性短消息服务的安全态势
\item<0-> 刻画通过SMS网关进行的恶意行为
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\frametitle{OTT服务}
\begin{figure}[!t]
\centering
\includegraphics[width=2in]{figures/sustech.pdf}
\caption{OTT服务}
\label{figure3_OTT}
\end{figure}
\begin{center}
OTT服务支持在数据网络上提供短信和语音等第三方服务。\\
OTT可以使用云服务来存储和同步SMS到用户的其他设备。
\end{center}
\end{frame}
\section{词表示模型} %自我介绍
\begin{frame}{词表示}
在NLP任务中可以利用各种词表示模型将“词”这种符号信息表示成数学上的向量形式。。将语义信息表示成稠密、低维的实值向量这样就可以用计算向量之间相似度的方法如余弦相似度来计算语义的相似度。词的向量表示可以作为各种深度学习模型的输入来使用
\begin{block}{词表示模型分类}
直接表示模型
\begin{itemize}
\item<0-> One-Hot Representation
\end{itemize}
分布式表示模型
\begin{itemize}
\item<0-> 计数模型(基于共现矩阵)
\item<0-> 预测模型(基于神经网络)
\end{itemize}
\end{block}
\end{frame}
\section{直接表示模型}
\begin{frame}{One-Hot Representation}
最简单直接的词表示是One-Hot Representation。考虑一个词表$ \mathbb V $,里面的每一个词$ w_i $都有一个编号$ i\in \{1,...,n\} $,那么词$ w_i $的one-hot表示就是一个维度为n的向量其中第$ i $个元素值非零其余元素全为0。例如
\[ w_2=[0,1,0,...,0]^\top \]
\[ w_3=[0,0,1,...,0]^\top \]
\begin{block}{缺点}
\begin{itemize}
\item<0-> 彼此正交,不能反应词间的语义关系
\item<0-> 稀疏表示,维度很高,和词典大小成正比
\end{itemize}
\end{block}
\begin{center}
\textcolor{mymauve}{仅仅是为了区分词,不包含语义信息,语义信息应该从上下文中挖掘}
\end{center}
\end{frame}
\section{研究方法与数据集特征}
\begin{frame}{研究方法与数据集特征}
\begin{columns}[c] % align columns
\begin{column}<0->{.5\textwidth}
\vspace*{1cm}
\begin{itemize}
\item 使用Scrapy框架爬取公共网关
\end{itemize}
\begin{itemize}
\item 收集8个公共短信网关在14个月的数据
\end{itemize}
\begin{itemize}
\item 共抓取386,327条数据
\end{itemize}
\end{column}%
\hfill%
\begin{column}<0->{.40\textwidth}
\begin{table}
\caption{公共网关抓取的信息数}
\footnotesize
\rowcolors{1}{mygray}{white}
\begin{tabular}{|c|c|}
\hline
\textbf{Site} & \textbf{Messages}\\
\hline
receivesmsonline.net &81313\\
\hline
receive-sms-online.info &69389\\
\hline
receive-sms-now.com &63797\\
\hline
hs3x.com &55499\\
\hline
receivesmsonline.com &44640\\
\hline
receivefreesms.com &37485\\
\hline
receive-sms-online.com &27094\\
\hline
e-receivesms.com &7107\\
\hline
\end{tabular}
\end{table}
\end{column}%
\end{columns}
\end{frame}
\begin{frame}
\frametitle{消息聚类分析}
\begin{block}{\textbf{基本思路}}
\begin{itemize}
\item<0-> 使用编辑距离矩阵将类似的消息归于一张连通图中。
\item<0-> 使用固定值替换感兴趣的消息如代码、email地址。
\item<0-> 查找归一化距离小于阈值的消息,并确定聚类边界。
\end{itemize}
\end{block}
\begin{block}{\textbf{实现步骤}}
\begin{enumerate}
\item<0-> 加载所有消息。
\item<0-> 用固定的字符串替换数字、电子邮件和URL以预处理消息。
\item<0-> 将预处理后的信息按字母排序。
\item<0-> 通过使用编辑距离阈值(0.9)来确定聚类边界。
\item<0-> 手动标记各个聚类,以确定服务提供者、消息类别等。
\end{enumerate}
\end{block}
\end{frame}
\section{算法和代码}
\subsection{算法}
\begin{frame}{算法}
\begin{algorithm}[H]
\caption{HOSVD}
\small
\KwIn{HOSVD($\mathcal{X},R_{1},R_{2}.....R_{N}$) }
\KwOut{ $\mathcal{G},A_{(1)},A_{(2)}......A_{(N)} $ }
\For{$k=1$ to $N$ }
{
$A_{(n)}\leftarrow R_{n}$left singular matrix of $X_{(n)}$
}
$\mathcal{G}=\leftarrow \mathcal{X} \times A_{(1)}^{T} \times A_{(2)}^{T}...... \times A_{(N)}^{T}$\\
\Return $\mathcal{G},A_{(1)},A_{(2)}......A_{(N)} $
\end{algorithm}
\end{frame}
\subsection{代码}
\begin{frame}[fragile]{代码}
HOSVD在Python的代码实现和分析
\lstinputlisting[lastline=11,
language=Python,
frame=single,
caption=First ten lines of some Python code,
label=python]
{HOSVD.py}
\end{frame}
\section{Future Work}
\begin{frame}{Future Work} %将来可做的方向
\begin{itemize}
\item<0-> Get more people to try this
\item<0-> Benchmark the entire system in the wild
\item<0-> Profit!
\end{itemize}
\end{frame}
\begin{frame}{Thank you}
\begin{center}
\begin{minipage}{1\textwidth}
\setbeamercolor{mybox}{fg=white, bg=black!50!blue}
\begin{beamercolorbox}[wd=0.70\textwidth, rounded=true, shadow=true]{mybox}
\LARGE \centering Thank you for listening! %结束语
\end{beamercolorbox}
\end{minipage}
\end{center}
\end{frame}
\begin{frame}{Q\&A}
\begin{center}
\begin{minipage}{1\textwidth}
\setbeamercolor{mybox}{fg=white, bg=black!50!blue}
\begin{beamercolorbox}[wd=0.70\textwidth, rounded=true, shadow=true]{mybox}
\LARGE \centering Questions? %请求提问
\end{beamercolorbox}
\end{minipage}
\end{center}
\end{frame}
% -----------------------------------------------------------------------------
\end{document}
%文档结束

3795
sustech-logo.pdf Normal file

File diff suppressed because one or more lines are too long