From 1569adcffba1ad84f3bab25c59684d0d19e6c3d4 Mon Sep 17 00:00:00 2001 From: Liao Wenzhe <13253595285@163.com> Date: Mon, 11 Apr 2022 10:22:00 +0800 Subject: [PATCH] Reinitialize --- .idea/.gitignore | 8 + .idea/dataRisk-detection-resources.iml | 8 + .idea/inspectionProfiles/Project_Default.xml | 25 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + LICENSE | 661 +++++++++++++++ README.md | 293 +++++++ README.rst | 758 ++++++++++++++++++ README_CN.rst | 553 +++++++++++++ download.py | 32 + resource_urls/1 | 1 + resource_urls/papers.txt | 6 + url_checker.py | 47 ++ 15 files changed, 2416 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/dataRisk-detection-resources.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 README.rst create mode 100644 README_CN.rst create mode 100644 download.py create mode 100644 resource_urls/1 create mode 100644 resource_urls/papers.txt create mode 100644 url_checker.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/dataRisk-detection-resources.iml b/.idea/dataRisk-detection-resources.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/dataRisk-detection-resources.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..0f92108 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,25 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..c3334de --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..d0e9274 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..dbbe355 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..318ab2b --- /dev/null +++ b/README.md @@ -0,0 +1,293 @@ + +# dataRisk-detection-resources(数据安全智能风控学习资源) +随着2021年中国《数据安全保护法》发布,意味着数据安全有望在中国形成新的风口.
+ +笔者有幸在2021年加入中国领先的数据安全创业公司之一,从事数据科学结合数据安全的前沿研究和落地工作。在探索过程中发现网络上专门针对数据安全的资料不多,遂诞生了自己整理相关资料和思考的想法,希望尽自己的微薄之力推动社区发展。
+ + +**拒绝白嫖,欢迎star!**

+笔者组建了个大数据安全技术交流的群,群友遍布硅谷,新加坡,腾讯,阿里,浙大等等,欢迎志同道合的朋友与我联系加入! + +最近更新日期为:2022/1

+## 入门综述 + +- [为什么机器学习解决网络安全问题总是失败:谈谈特征空间](https://toooold.com/2021/09/27/why_ml_fails_solving_sec_algo.html) +- [为什么机器学习解决网络安全问题总是失败:脆弱的系统工程](https://toooold.com/2021/10/11/why_ml_fails_security_frag_cn.html) +- [为什么机器学习解决网络安全问题总是失败:不合理的评估指标](https://toooold.com/2021/11/13/why_ml_fails_security_evaluation_cn.html) +- [为什么机器学习解决网络安全问题总是失败:机器学习不是万能灵药](https://toooold.com/2021/11/28/why_ml_fails_security_ml_is_not_everything_cn.html) + +## AI应用防御篇 +用AI来做应用安全防护 + +### OWASP10 +- https://salt.security/blog/what-is-the-owasp-api-security-top-10? + +### API风险发现系统 +- https://mp.weixin.qq.com/s/-9xkAROp7_A6gDjTLxfUsg + +### 风险业务 +- https://mp.weixin.qq.com/s/H9CoDtII37dKJYJ9HleY6w +- https://mp.weixin.qq.com/s/xGY1PxoH9Tlio2mWH7QLjw +- [杜跃进:数据安全治理的基本思路](https://www.secrss.com/articles/6420) +- [api安全治理思路](https://mp.weixin.qq.com/s/Q9fZrq51fLNf0itPj1p9Vw) +- 数据安全复合治理与实践白皮书 +- [参数篡改与流量重放](https://juejin.cn/post/6890798533473992717) +- [常见API攻击](https://zhuanlan.zhihu.com/p/472101085) +- [salt blog](https://salt.security/blog) +- [imperva blog](https://www.imperva.com/blog/) + + +### 恶意注册账户 +- 《Unveiling Fake Accounts at the Time of Registration: An Unsupervised Approach》 +- 《DeepScan: Exploiting Deep Learning for Malicious Account Detection in Location-Based Social Networks》 +- https://zhuanlan.zhihu.com/p/59666737 + + +### marchine learning for UEBA +- 《AI2: Training a big data machine to defend》 +- 《Big Data Security Challenges: An Overview and Application of User Behavior Analytics》 +- 《Adaptive Intrusion Detection System via Online Learning》 +- 《A multi-model approach to the detection of web-based attacks》 +- 《McPAD : A Multiple Classifier System for Accurate Payload-based Anomaly Detection》 +- 《Using Generalization and Characterization Techniques in the Anomaly-based Detection of Web Attacks》 +- 《Anomaly-Based Web Attack Detection: A Deep Learning Approach》 +- 《A Big Data Analysis Framework for Model-Based Web User Behavior Analytics》 +- 《Anomalous Payload-based Network Intrusion Detection》 +- 《Data mining for security at Google》 +- 《User and Entity Behavior Analytics for Enterprise Security》 +- 《A Comprehensive Approach to Intrusion Detection Alert Correlation》 +- 《Trafc Anomaly Detection Using K-Means Clustering》 +- 《Calculation of the Behavior Utility of a Network System: Conception and Principle》 +- 《Spectrogram: A Mixture-of-Markov-Chains Model for Anomaly Detection in Web Traffic》 +- 《用户画像相关技术》 + + +### MLOPS +- https://mp.weixin.qq.com/s/rdOqndedCSs926GiQRs2Rg + +### 入侵检测 +- https://blog.cloudflare.com/api-abuse-detection/ +- [利用机器学习检测HTTP恶意外连流量](https://www.freebuf.com/column/170483.html) +- [ExecScent: Mining for New C&C Domains in Live Networks with Adaptive Control Protocol Templates](https://www.usenix.org/system/files/conference/usenixsecurity13/sec13-paper_nelms.pdf) +- [MADE: Security Analytics for Enterprise Threat Detection](http://www.ccs.neu.edu/home/alina/papers/MADE.pdf) +- [机器学习在互联网巨头公司实践](https://mp.weixin.qq.com/s/NFqUF824Rpr4g6wYWFpSNQ) +- [机器学习在入侵检测方面的应用 - 基于ADFA-LD训练集训练入侵检测判别模型](https://www.cnblogs.com/LittleHann/p/7806093.html#_lab2_0_1) +- [datacon比赛方向三-攻击源与攻击者分析writeup](https://github.com/ReAbout/datacon) +- [基于机器学习的恶意软件加密流量检测研究分享](https://blog.riskivy.com/%e5%9f%ba%e4%ba%8e%e6%9c%ba%e5%99%a8%e5%ad%a6%e4%b9%a0%e7%9a%84%e6%81%b6%e6%84%8f%e8%bd%af%e4%bb%b6%e5%8a%a0%e5%af%86%e6%b5%81%e9%87%8f%e6%a3%80%e6%b5%8b/?from=groupmessage&isappinstalled=0) +- [anomaly-detection-through-reinforcement-learning](https://zighra.com/blogs/anomaly-detection-through-reinforcement-learning/) + +### 恶意url检测 + +- [URLNet:通过深度学习学习URL表示以进行恶意URL检测](https://arxiv.org/abs/1802.03162v2) +- [我的AI安全检测学习笔记(一)](http://4o4notfound.org/index.php/archives/127/) +- 《Compromised or Attacker-Owned: A Large Scale Classification and Study of Hosting Domains of Malicious URLs》 + +### DDOS + +- [基于KDDCUP 99数据集预测DDoS攻击](https://github.com/aviraonepiece/machine_learning) +- [基于谱分析与统计机器学习的DDoS攻击检测技术研究](http://wap.cnki.net/lunwen-1013353778.html) +- [基于机器学习的分布式拒绝服务攻击检测方法研究](http://cdmd.cnki.com.cn/Article/CDMD-90002-2007140546.htm) +- [DDoS Attacks Using Hidden Markov Models and Cooperative ReinforcementLearning*](https://pdfs.semanticscholar.org/6363/b9f28a7e037abe626a2e88fac3393c04bfda.pdfDefending ) + + +### 僵尸网络检测 +- [Win the 0-Day Racing Game Against Botnet on Cloud](https://i.blackhat.com/asia-20/Friday/asia-20-Xu-Win-The-0-Day-Racing-Game-Against-Botnet-In-Public-Cloud.pdf) +- [datacon 2020 僵尸网络检测](https://zhuanlan.zhihu.com/p/186254809) + +## dga域名检测 +- [https://www.secrss.com/articles/14369] +- [https://www.cnblogs.com/networking/p/14788479.html] + + +### 机器人流量识别 +- https://blog.cloudflare.com/api-abuse-detection/ + +### 爬虫识别 +- https://blog.csdn.net/xiao_yi_xiao/article/details/101835176 +- https://blog.csdn.net/qq_25834767/article/details/103546251 + +### 应用日志与数据库日志IP-API-SQl三层关联支撑风险发现 + +### Web安全异常检测 ### +- [LSTM识别恶意HTTP请求](https://www.cdxy.me/?p=775) +- [基于URL异常检测的机器学习模型mini部署](http://4o4notfound.org/index.php/archives/84/) +- [我的AI安全检测学习笔记(一)](http://4o4notfound.org/index.php/archives/127/) +- [基于机器学习的WEB攻击分类检测模型](https://www.freebuf.com/news/184687.html) +- [基于机器学习的攻击检测系统](https://www.freebuf.com/column/189981.html) +- [WAF建设运营及AI应用实践](https://mp.weixin.qq.com/s/fTm1hUfRmm6ujmjvSHRLUA) +- [Web安全检测中机器学习的经验之谈](https://iami.xyz/ML-IN-Webshell-Detection-Advantages-And-Disadvantages/) +- [APT detection based on machine learning](https://mp.weixin.qq.com/s?__biz=MzU5MTM5MTQ2MA==&mid=2247484139&idx=1&sn=0da63a49f341eccc0bb48c954d8ebbb4&chksm=fe2efd60c95974767521fe6a6b7257a1d05e5482fc7ddeda281bdf0f0deb20add82d1a82d8ec&mpshare=1&scene=1&srcid=&pass_ticket=bjnNiDKomd79pQvRonW%2BXsTe6JrO%2FFs6oII12dZaLBPuQOtNK6Rzh9WSJ%2B%2F89ZUA#rd) +- [RSAC 2019 | 机器学习算法分析引擎助力安全威胁推理分析](http://blog.nsfocus.net/machine-learning-algorithms-analysis-engine-security-threat-reasoning/) +- [解决机器学习和安全运营之间的最后一公里问题](https://www.anquanke.com/post/id/163637) +- [RSAC 2019 | 采用NLP机器学习来进行自动化合规风险治理](http://blog.nsfocus.net/automated-compliance-risk-management-nlp-machine-learning/) + +### 渗透测试入门 ### +- [dvwa闯关教程](https://www.freebuf.com/articles/web/274058.html) + +## 数据集 + +1、[Samples of Security Related Dats](http://link.zhihu.com/?target=http%3A//www.secrepo.com/) + +2、[DARPA Intrusion Detection Data Sets](http://link.zhihu.com/?target=https%3A//www.ll.mit.edu/ideval/data/) + +3、[Stratosphere IPS Data Sets](http://link.zhihu.com/?target=https%3A//stratosphereips.org/category/dataset.html) + +4、[Open Data Sets](http://link.zhihu.com/?target=http%3A//csr.lanl.gov/data/) + +5、[Data Capture from National Security Agency](http://link.zhihu.com/?target=http%3A//www.westpoint.edu/crc/SitePages/DataSets.aspx) + +6、[The ADFA Intrusion Detection Data Sets](http://link.zhihu.com/?target=https%3A//www.unsw.adfa.edu.au/australian-centre-for-cyber-security/cybersecurity/ADFA-IDS-Datasets/) + +7、[NSL-KDD Data Sets](http://link.zhihu.com/?target=https%3A//github.com/defcom17/NSL_KDD) + +8、[Malicious URLs Data Sets](http://link.zhihu.com/?target=http%3A//sysnet.ucsd.edu/projects/url/) + +9、[Multi-Source Cyber-Security Events](http://link.zhihu.com/?target=http%3A//csr.lanl.gov/data/cyber1/) + +10、[Malware Training Sets: A machine learning dataset for everyone](http://link.zhihu.com/?target=http%3A//marcoramilli.blogspot.cz/2016/12/malware-training-sets-machine-learning.html) + +11. [Collection of Security and Network Data Resources](http://www.covert.io/data-links/) +12. http://www.secrepo.com/ + +13. [Vulnbank_dataset](https://github.com/AnchoretY/AI_And_Web_Security_Library/tree/master/dataset/vulnbank_dataset). KDD大赛的一个竞赛项目,主要目的是使用机器学习得手段建立一个入侵检测器。其中的入侵行为主要包括:DDOS、密码暴力破解、缓冲区溢出、扫描等多种攻击行为。 + +## 优秀开源推荐 +- https://github.com/LiaoWenzhe +- https://github.com/yzhao062/pyod +- https://github.com/yzhao062/anomaly-detection-resources +- [网络安全中机器学习大合集](https://github.com/jivoi/awesome-ml-for-cybersecurity/blob/master/README_ch.md) +- [最终安全数据科学和机器学习指南](http://www.covert.io/the-definitive-security-datascience-and-machinelearning-guide/) +- [Machine Learning for Cyber Security](https://github.com/wtsxDev/Machine-Learning-for-Cyber-Security#-datasets) +- [404师傅的整理](https://github.com/404notf0und/AI-for-Security-Learning) +- [Awesome-AI-Security](https://github.com/RandomAdversary/Awesome-AI-Security) +- [awesome-ml-for-cybersecurity](https://github.com/jivoi/awesome-ml-for-cybersecurity#-datasets) +- [The Definitive Security Data Science and Machine Learning Guide](http://www.covert.io/the-definitive-security-datascience-and-machinelearning-guide/) +- https://github.com/0xMJ/AI-Security-Learning +- [乌云](https://wooyun.x10sec.org/search?keywords=aa&content_search_by=by_bugs) + +## 思维方式: +- [提出好的想法和方向](https://mp.weixin.qq.com/s/jajNXjNxfAvV-7SmLnVUAQ) +- [刘知远:好的研究想法从哪里来](https://zhuanlan.zhihu.com/p/93765082) +- [MIT人工智能实验室:如何做研究](https://blog.csdn.net/jinjinstudy/article/details/9413213) + + +## 实用工具 +- [ReadPaper论文阅读平台](https://mp.weixin.qq.com/s/iT3cCqw59707iN4-ChorFA) +- arxiv +- google scholar +- [百度rasp安全检测工具](https://rasp.baidu.com/doc/) + +## 优秀公众号 +- 安全学术圈 +- 阿里安全应急响应中心 +- 腾讯安全应急响应中心 +- 百度安全应急响应中心 +- dataFunTalk +- freebuf +- 404 Not F0und + +## 相关顶会 +- BlackHat / BlackHat Asia +- owasp +- botconf +- DEF-CON +- S&P +- CCS +- ICDFC +- USENIX Security +- PETS +- Wisec +- CODASPY +- ICSE +- NDSS +- Computer & Security +- TDSC +- RSAC + +## 相关公司 +- 全知科技 +- salt +- 绿盟科技 +- 安恒信息 +- 闪捷信息 +- 奇安信 +- imperva + +## 相关赛事 +- DataCon +- DataFountain + +## 优秀书籍 +- 《风控要略:互联网业务反欺诈之路》 +- 《web安全之机器学习入门》 +- 《web安全之深度学习实战》 +- 《web安全之强化学习与Gan》 +- 《白帽子讲web安全》 +- 《图解http》 + +## 相关博客 +- https://blog.csdn.net/Liao_Wenzhe/ +- http://iami.xyz +- https://www.cdxy.me/ +- https://blog.netlab.360.com/ +- 阿里云安全 + + +## BlackHat 上一些有意思的web攻防演讲 +- https://www.blackhat.com/docs/asia-17/materials/asia-17-Dong-Beyond-The-Blacklists-Detecting-Malicious-URL-Through-Machine-Learning.pdf +- https://i.blackhat.com/briefings/asia/2018/asia-18-Simakov-Marina-Breaking-The-Attack-Graph.pdf +- https://i.blackhat.com/asia-19/Fri-March-29/bh-asia-Pham-Automated-REST-API-Endpoint.pdf +- https://i.blackhat.com/asia-20/Friday/asia-20-Hao-Attacking-And-Defending-Machine-Learning-Applications-Of-Public-Cloud.pdf +- https://i.blackhat.com/eu-19/Wednesday/eu-19-Kettle-HTTP-Desync-Attacks-Request-Smuggling-Reborn.pdf +- https://www.blackhat.com/docs/us-17/wednesday/us-17-Gil-Web-Cache-Deception-Attack.pdf +- https://www.blackhat.com/docs/us-17/wednesday/us-17-Burnett-Ichthyology-Phishing-As-A-Science-wp.pdf +- https://i.blackhat.com/us-18/Thu-August-9/us-18-Kettle-Practical-Web-Cache-Poisoning-Redefining-Unexploitable.pdf +- https://i.blackhat.com/us-18/Thu-August-9/us-18-Kettle-Practical-Web-Cache-Poisoning-Redefining-Unexploitable.pdf +- https://i.blackhat.com/USA-19/Wednesday/us-19-Valenta-Monsters-In-The-Middleboxes-Building-Tools-For-Detecting-HTTPS-Interception.pdf +- https://i.blackhat.com/USA-20/Wednesday/us-20-Kettle-Web-Cache-Entanglement-Novel-Pathways-To-Poisoning.pdf +- https://www.163.com/dy/article/GPJBLI020511CJ6O.html +- https://i.blackhat.com/USA-20/Wednesday/us-20-Klein-HTTP-Request-Smuggling-In-2020-New-Variants-New-Defenses-And-New-Challenges.pdf +- https://i.blackhat.com/EU-21/Wednesday/EU-21-Thatcher-Practical-HTTP-Header-Smuggling.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Gavrichenkov-Breaking-HTTPS-With-BGP-Hijacking-wp.pdf +- https://www.blackhat.com/docs/us-16/materials/us-16-Sivakorn-HTTP-Cookie-Hijacking-In-The-Wild-Security-And-Privacy-Implications-wp.pdf +- https://towardsdatascience.com/deep-learning-for-specific-information-extraction-from-unstructured-texts-12c5b9dceada +- https://portswigger.net/research/cracking-the-lens-targeting-https-hidden-attack-surface +- https://www.botconf.eu/category/keynote/ +- https://www.botconf.eu/2016/getting-your-hands-dirty-how-to-analyze-the-behavior-of-malware-traffic-and-web-connections/ +- https://www.botconf.eu/2015/dga-clustering-and-analysis-mastering-modern-evolving-threats/ +- https://www.blackhat.com/us-16/briefings/schedule/#account-jumping-post-infection-persistency--lateral-movement-in-aws-4309 +- https://www.blackhat.com/us-16/briefings/schedule/#http-cookie-hijacking-in-the-wild-security-and-privacy-implications-3467 +- https://www.blackhat.com/docs/us-17/wednesday/us-17-Kettle-Cracking-The-Lens-Exploiting-HTTPs-Hidden-Attack-Surface.pdf +- https://www.blackhat.com/docs/us-17/wednesday/us-17-Kettle-Cracking-The-Lens-Exploiting-HTTPs-Hidden-Attack-Surface-wp.pdf +- https://www.blackhat.com/docs/us-17/thursday/us-17-Prandl-PEIMA-Harnessing-Power-Laws-To-Detect-Malicious-Activities-From-Denial-Of-Service-To-Intrusion-Detection-Traffic-Analysis-And-Beyond.pdf +- https://www.blackhat.com/docs/us-17/thursday/us-17-Prandl-PEIMA-Harnessing-Power-Laws-To-Detect-Malicious-Activities-From-Denial-Of-Service-To-Intrusion-Detection-Traffic-Analysis-And-Beyond-wp.pdf +- https://www.blackhat.com/docs/us-17/thursday/us-17-Hypponen-The-Epocholypse-2038-Whats-In-Store-For-The-Next-20-Years.pdf +- https://www.blackhat.com/docs/us-16/materials/us-16-Amiga-Account-Jumping-Post-Infection-Persistency-And-Lateral-Movement-In-AWS.pdf +- https://www.blackhat.com/docs/us-16/materials/us-16-Sivakorn-HTTP-Cookie-Hijacking-In-The-Wild-Security-And-Privacy-Implications.pdf +- https://www.blackhat.com/docs/us-16/materials/us-16-Gelernter-Timing-Attacks-Have-Never-Been-So-Practical-Advanced-Cross-Site-Search-Attacks.pdf +- https://www.blackhat.com/docs/us-16/materials/us-16-Ermishkin-Viral-Video-Exploiting-Ssrf-In-Video-Converters.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Gavrichenkov-Breaking-HTTPS-With-BGP-Hijacking.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Zadeh-From-False-Positives-To-Actionable-Analysis-Behavioral-Intrusion-Detection-Machine-Learning-And-The-SOC.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Zadeh-From-False-Positives-To-Actionable-Analysis-Behavioral-Intrusion-Detection-Machine-Learning-And-The-SOC-wp.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Wang-The-Applications-Of-Deep-Learning-On-Traffic-Identification.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Wang-The-Applications-Of-Deep-Learning-On-Traffic-Identification-wp.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Morgan-Web-Timing-Attacks-Made-Practical.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Morgan-Web-Timing-Attacks-Made-Practical-wp.pdf +- https://www.blackhat.com/docs/us-15/materials/us-15-Saxe-Why-Security-Data-Science-Matters-And-How-Its-Different.pdf +- https://www.blackhat.com/docs/us-14/materials/us-14-Pinto-Secure-Because-Math-A-Deep-Dive-On-Machine-Learning-Based-Monitoring-WP.pdf +- https://www.blackhat.com/docs/us-14/materials/us-14-Pinto-Secure-Because-Math-A-Deep-Dive-On-Machine-Learning-Based-Monitoring.pdf +- https://media.blackhat.com/us-13/US-13-Pinto-Defending-Networks-with-Incomplete-Information-A-Machine-Learning-Approach-Slides.pdf +- https://paper.bobylive.com/Meeting_Papers/BlackHat/USA-2013/US-13-Pinto-Defending-Networks-with-Incomplete-Information-A-Machine-Learning-Approach-Slides.pdf +- https://paper.bobylive.com/Meeting_Papers/BlackHat/USA-2013/US-13-Peck-Abusing-Web-APIs-Through-Scripted-Android-Applications-WP.pdf +- https://www.youtube.com/watch?v=RGqCZO3cgY8 +- https://www.youtube.com/watch?v=JUY4DQZ02o4 +- https://www.youtube.com/watch?v=D6MG2uBIfUI +- https://paper.bobylive.com/Meeting_Papers/BlackHat/USA-2011/BH_US_11_Balduzzi_HPP_Slides.pdf +- https://www.blackhat.com/html/bh-us-11/bh-us-11-archives.html +- https://www.blackhat.com/docs/eu-14/materials/eu-14-Hafif-Reflected-File-Download-A-New-Web-Attack-Vector.pdf +- https://www.madlab.it/slides/BHEU2011/whitepaper-bhEU2011.pdf +- https://infocon.org/cons/Black%20Hat/Black%20Hat%20Europe/Black%20Hat%20Europe%202011/Presentations/Raul_Siles/BlackHat_EU_2011_Siles_SAP_Session-WP.pdf +- https://www.blackhat.com/presentations/bh-europe-09/Zanero_Criscione/BlackHat-Europe-2009-Zanero-Criscione-Masibty-Web-App-Firewall-slides.pdf +- https://infocon.org/cons/Black%20Hat/Black%20Hat%20USA/Black%20Hat%20USA%202007/presentations/Bolzoni_and_Zambon/Whitepaper/bh-usa-07-bolzoni_and_zambon-WP.pdf +- https://www.blackhat.com/html/bh-media-archives/bh-archives-2007.html#eu_07 + diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..6fb7695 --- /dev/null +++ b/README.rst @@ -0,0 +1,758 @@ +DataRisk Detection Learning Resources +==================================== + +.. image:: https://img.shields.io/github/stars/liaowenzhe/dataRisk-detection-resources.svg + :target: https://github.com/yzhao062/anomaly-detection-resources/stargazers + :alt: GitHub stars + + +.. image:: https://img.shields.io/github/forks/liaowenzhe/dataRisk-detection-resources.svg?color=blue + :target: https://github.com/yzhao062/anomaly-detection-resources/network + :alt: GitHub forks + + +.. image:: https://img.shields.io/github/license/liaowenzhe/dataRisk-detection-resources.svg?color=blue + :target: https://github.com/yzhao062/anomaly-detection-resources/blob/master/LICENSE + :alt: License + + +.. image:: https://awesome.re/badge-flat2.svg + :target: https://awesome.re/badge-flat2.svg + :alt: Awesome + + +---- + +Data security: Based on the "data-centric security system" position, it generally refers to the entire security system focusing on +data classification and the protection of the entire life cycle of sensitive data. + +DataRisk Detection: DataRisk Detection identification is a way to identify possible causes of harm to confidentiality, integrity, availability, etc. of data assets in data processing activities, and to further analyze the threat motive, frequency and possibility of occurrence. + +This repository collects: + +#. Books & Academic Papers +#. Online Courses and Videos +#. DataRisk Datasets +#. Open-source and Commercial Libraries/Toolkits +#. Key Conferences & Journals + + +**More items will be added to the repository**. +Please feel free to suggest other key resources by opening an issue report, +submitting a pull request, or dropping me an email @ (wenzhe.1.liao@gmail.com). +Enjoy reading! + +BTW, you may find my `[GitHub] `_ useful. + +---- + +Table of Contents +----------------- + + +* `1. Books & Tutorials <#1-books--tutorials>`_ + + * `1.1. Books <#11-books>`_ + * `1.2. Tutorials <#12-tutorials>`_ + +* `2. Courses/Seminars/Videos <#2-coursesseminarsvideos>`_ +* `3. Toolbox & Datasets <#3-toolbox--datasets>`_ + + * `3.1. Multivariate data risk detection <#31-multivariate-data>`_ + * `3.2. Time series outlier detection <#32-time-series-outlier-detection>`_ + * `3.3. Real-time Elasticsearch <#33-real-time-elasticsearch>`_ + * `3.4. Datasets <#34-datasets>`_ + +* `4. Papers <#4-papers>`_ + + * `4.1. Overview & Survey Papers <#41-overview--survey-papers>`_ + * `4.2. Key Algorithms <#42-key-algorithms>`_ + * `4.3. Graph & Network Outlier Detection <#43-graph--network-outlier-detection>`_ + * `4.4. Time Series Outlier Detection <#44-time-series-outlier-detection>`_ + * `4.5. Feature Selection in Outlier Detection <#45-feature-selection-in-outlier-detection>`_ + * `4.6. High-dimensional & Subspace Outliers <#46-high-dimensional--subspace-outliers>`_ + * `4.7. Outlier Ensembles <#47-outlier-ensembles>`_ + * `4.8. Outlier Detection in Evolving Data <#48-outlier-detection-in-evolving-data>`_ + * `4.9. Representation Learning in Outlier Detection <#49-representation-learning-in-outlier-detection>`_ + * `4.10. Interpretability <#410-interpretability>`_ + * `4.11. Outlier Detection with Neural Networks <#411-outlier-detection-with-neural-networks>`_ + * `4.12. Active Anomaly Detection <#412-active-anomaly-detection>`_ + * `4.13. Interactive Outlier Detection <#413-interactive-outlier-detection>`_ + * `4.14. Outlier Detection in Other fields <#414-outlier-detection-in-other-fields>`_ + * `4.15. Outlier Detection Applications <#415-outlier-detection-applications>`_ + * `4.16. Automated Outlier Detection <#416-automated-outlier-detection>`_ + * `4.17. Machine Learning Systems for Outlier Detection <#417-machine-learning-systems-for-outlier-detection>`_ + * `4.18. Fairness and Bias in Outlier Detection <#418-fairness-and-bias-in-outlier-detection>`_ + * `4.19. Isolation-based Methods <#419-isolation-based-methods>`_ + * `4.20. Emerging and Interesting Topics <#420-emerging-and-interesting-topics>`_ + +* `5. Key Conferences/Workshops/Journals <#5-key-conferencesworkshopsjournals>`_ + + * `5.1. Conferences & Workshops <#51-conferences--workshops>`_ + * `5.2. Journals <#52-journals>`_ + + +---- + + +1. Books & Tutorials +-------------------- + +1.1. Books +^^^^^^^^^^ + +`Outlier Analysis `_ +by Charu Aggarwal: Classical text book covering most of the outlier analysis techniques. +A **must-read** for people in the field of outlier detection. `[Preview.pdf] `_ + +`Outlier Ensembles: An Introduction `_ +by Charu Aggarwal and Saket Sathe: Great intro book for ensemble learning in outlier analysis. + +`Data Mining: Concepts and Techniques (3rd) `_ +by Jiawei Han and Micheline Kamber and Jian Pei: Chapter 12 discusses outlier detection with many key points. `[Google Search] `_ + +1.2. Tutorials +^^^^^^^^^^^^^^ + +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== +Tutorial Title Venue Year Ref Materials +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== +Data mining for anomaly detection PKDD 2008 [#Lazarevic2008Data]_ `[Video] `_ +Outlier detection techniques ACM SIGKDD 2010 [#Kriegel2010Outlier]_ `[PDF] `_ +Anomaly Detection: A Tutorial ICDM 2011 [#Chawla2011Anomaly]_ `[PDF] `_ +Anomaly Detection in Networks KDD 2017 [#Mendiratta2017Anomaly]_ `[Page] `_ +Which Anomaly Detector should I use? ICDM 2018 [#Ting2018Which]_ `[PDF] `_ +Deep Learning for Anomaly Detection KDD 2020 [#Wang2020Deep]_ `[HTML] `_, `[Video] `_ +Deep Learning for Anomaly Detection WSDM 2021 [#Pang2021Deep]_ `[HTML] `_ +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== + +---- + +2. Courses/Seminars/Videos +-------------------------- + +**Coursera Introduction to Anomaly Detection (by IBM)**\ : +`[See Video] `_ + +**Coursera Real-Time Cyber Threat Detection and Mitigation partly covers the topic**\ : +`[See Video] `_ + +**Coursera Machine Learning by Andrew Ng also partly covers the topic**\ : + + +* `Anomaly Detection vs. Supervised Learning `_ +* `Developing and Evaluating an Anomaly Detection System `_ + +**Udemy Outlier Detection Algorithms in Data Mining and Data Science**\ : +`[See Video] `_ + +**Stanford Data Mining for Cyber Security** also covers part of anomaly detection techniques\ : +`[See Video] `_ + +---- + +3. Toolbox & Datasets +--------------------- + +3.1. Multivariate Data +^^^^^^^^^^^^^^^^^^^^^^ + +[**Python**] `Python Outlier Detection (PyOD) `_\ : PyOD is a comprehensive and scalable Python toolkit for detecting outlying objects in multivariate data. It contains more than 20 detection algorithms, including emerging deep learning models and outlier ensembles. + +[**Python**, **GPU**] `TOD: Tensor-based Outlier Detection (PyTOD) `_: A general GPU-accelerated framework for outlier detection. + +[**Python**] `Python Streaming Anomaly Detection (PySAD) `_\ : PySAD is a streaming anomaly detection framework in Python, which provides a complete set of tools for anomaly detection experiments. It currently contains more than 15 online anomaly detection algorithms and 2 different methods to integrate PyOD detectors to the streaming setting. + +[**Python**] `Scikit-learn Novelty and Outlier Detection `_. It supports some popular algorithms like LOF, Isolation Forest, and One-class SVM. + +[**Python**] `Scalable Unsupervised Outlier Detection (SUOD) `_\ : SUOD (Scalable Unsupervised Outlier Detection) is an acceleration framework for large-scale unsupervised outlier detector training and prediction, on top of PyOD. + +[**Java**] `ELKI: Environment for Developing KDD-Applications Supported by Index-Structures `_\ : +ELKI is an open source (AGPLv3) data mining software written in Java. The focus of ELKI is research in algorithms, with an emphasis on unsupervised methods in cluster analysis and outlier detection. + +[**Java**] `RapidMiner Anomaly Detection Extension `_\ : The Anomaly Detection Extension for RapidMiner comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets. It allows you to find data, which is significantly different from the normal, without the need for the data being labeled. + +[**R**] `CRAN Task View: Anomaly Detection with R `_\ : This CRAN task view contains a list of packages that can be used for anomaly detection with R. + +[**R**] `outliers package `_\ : A collection of some tests commonly used for identifying outliers in R. + +[**Matlab**] `Anomaly Detection Toolbox - Beta `_\ : A collection of popular outlier detection algorithms in Matlab. + + +3.2. Time series outlier detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +[**Python**] `TODS `_\ : TODS is a full-stack automated machine learning system for outlier detection on multivariate time-series data. + +[**Python**] `skyline `_\ : Skyline is a near real time anomaly detection system. + +[**Python**] `banpei `_\ : Banpei is a Python package of the anomaly detection. + +[**Python**] `telemanom `_\ : A framework for using LSTMs to detect anomalies in multivariate time series data. + +[**Python**] `DeepADoTS `_\ : A benchmarking pipeline for anomaly detection on time series data for multiple state-of-the-art deep learning methods. + +[**Python**] `NAB: The Numenta Anomaly Benchmark `_\ : NAB is a novel benchmark for evaluating algorithms for anomaly detection in streaming, real-time applications. + +[**Python**] `CueObserve `_\ : Anomaly detection on SQL data warehouses and databases. + +[**R**] `CRAN Task View: Anomaly Detection with R `_\ : This CRAN task view contains a list of packages that can be used for anomaly detection with R. + +[**R**] `AnomalyDetection `_\ : AnomalyDetection is an open-source R package to detect anomalies which is robust, from a statistical standpoint, in the presence of seasonality and an underlying trend. + +[**R**] `anomalize `_\ : The 'anomalize' package enables a "tidy" workflow for detecting anomalies in data. + +3.3. Real-time Elasticsearch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +[**Open Distro**] `Real Time Anomaly Detection in Open Distro for Elasticsearch by Amazon `_\ : A machine learning-based anomaly detection plugins for Open Distro for Elasticsearch. See `Real Time Anomaly Detection in Open Distro for Elasticsearch `_. + +[**Python**] `datastream.io `_\ : An open-source framework for real-time anomaly detection using Python, Elasticsearch and Kibana. + + +3.4. Datasets +^^^^^^^^^^^^^ + +**ELKI Outlier Datasets**\ : https://elki-project.github.io/datasets/outlier + +**Outlier Detection DataSets (ODDS)**\ : http://odds.cs.stonybrook.edu/#table1 + +**Unsupervised Anomaly Detection Dataverse**\ : https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/OPQMVF + +**Anomaly Detection Meta-Analysis Benchmarks**\ : https://ir.library.oregonstate.edu/concern/datasets/47429f155 + +**Skoltech Anomaly Benchmark (SKAB)**\ : https://github.com/waico/skab + + +---- + + +4. Papers +--------- + +4.1. Overview & Survey Papers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Papers are sorted by the publication year. + +====================================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +====================================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +A survey of outlier detection methodologies ARTIF INTELL REV 2004 [#Hodge2004A]_ `[PDF] `_ +Anomaly detection: A survey CSUR 2009 [#Chandola2009Anomaly]_ `[PDF] `_ +A meta-analysis of the anomaly detection problem Preprint 2015 [#Emmott2015A]_ `[PDF] `_ +On the evaluation of unsupervised outlier detection: measures, datasets, and an empirical study DMKD 2016 [#Campos2016On]_ `[HTML] `_, `[SLIDES] `_ +A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data PLOS ONE 2016 [#Goldstein2016A]_ `[PDF] `_ +A comparative evaluation of outlier detection algorithms: Experiments and analyses Pattern Recognition 2018 [#Domingues2018A]_ `[PDF] `_ +Research Issues in Outlier Detection Book Chapter 2019 [#Suri2019Research]_ `[HTML] `_ +Quantitative comparison of unsupervised anomaly detection algorithms for intrusion detection SAC 2019 [#Falcao2019Quantitative]_ `[HTML] `_ +Progress in Outlier Detection Techniques: A Survey IEEE Access 2019 [#Wang2019Progress]_ `[PDF] `_ +Deep learning for anomaly detection: A survey Preprint 2019 [#Chalapathy2019Deep]_ `[PDF] `_ +Anomalous Instance Detection in Deep Learning: A Survey Tech Report 2020 [#Bulusu2020Deep]_ `[PDF] `_ +Anomaly detection in univariate time-series: A survey on the state-of-the-art Preprint 2020 [#Braei2020Anomaly]_ `[PDF] `_ +Deep Learning for Anomaly Detection: A Review CSUR 2021 [#Pang2020Deep]_ `[PDF] `_ +A Comprehensive Survey on Graph Anomaly Detection with Deep Learning Preprint 2021 [#Ma2021A]_ `[PDF] `_ +Revisiting Time Series Outlier Detection: Definitions and Benchmarks NeurIPS 2021 [#Lai2021Revisiting]_ `[PDF] `_, `[Code] `_ +A Unified Survey on Anomaly, Novelty, Open-Set, and Out-of-Distribution Detection: Solutions and Future Challenges Preprint 2021 [#Salehi2021A]_ `[PDF] `_ +====================================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + +4.2. Key Algorithms +^^^^^^^^^^^^^^^^^^^ + +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== +Abbreviation Paper Title Venue Year Ref Materials +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== +kNN Efficient algorithms for mining outliers from large data sets ACM SIGMOD Record 2000 [#Ramaswamy2000Efficient]_ `[PDF] `_ +KNN Fast outlier detection in high dimensional spaces PKDD 2002 [#Angiulli2002Fast]_ `[PDF] `_ +LOF LOF: identifying density-based local outliers ACM SIGMOD Record 2000 [#Breunig2000LOF]_ `[PDF] `_ +IForest Isolation forest ICDM 2008 [#Liu2008Isolation]_ `[PDF] `_ +OCSVM Estimating the support of a high-dimensional distribution Neural Computation 2001 [#Scholkopf2001Estimating]_ `[PDF] `_ +AutoEncoder Ensemble Outlier detection with autoencoder ensembles SDM 2017 [#Chen2017Outlier]_ `[PDF] `_ +COPOD COPOD: Copula-Based Outlier Detection ICDM 2020 [#Li2020COPOD]_ `[PDF] `_ +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== + +4.3. Graph & Network Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== +Graph based anomaly detection and description: a survey DMKD 2015 [#Akoglu2015Graph]_ `[PDF] `_ +Anomaly detection in dynamic networks: a survey WIREs Computational Statistic 2015 [#Ranshous2015Anomaly]_ `[PDF] `_ +Outlier detection in graphs: On the impact of multiple graph models ComSIS 2019 [#Campos2019Outlier]_ `[PDF] `_ +A Comprehensive Survey on Graph Anomaly Detection with Deep Learning TKDE 2021 [#Ma2021A]_ `[PDF] `_ +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== + + +4.4. Time Series Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Outlier detection for temporal data: A survey TKDE 2014 [#Gupta2014Outlier]_ `[PDF] `_ +Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding KDD 2018 [#Hundman2018Detecting]_ `[PDF] `_, `[Code] `_ +Time-Series Anomaly Detection Service at Microsoft KDD 2019 [#Ren2019Time]_ `[PDF] `_ +Revisiting Time Series Outlier Detection: Definitions and Benchmarks NeurIPS 2021 [#Lai2021Revisiting]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.5. Feature Selection in Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== +Unsupervised feature selection for outlier detection by modelling hierarchical value-feature couplings ICDM 2016 [#Pang2016Unsupervised]_ `[PDF] `_ +Learning homophily couplings from non-iid data for joint feature selection and noise-resilient outlier detection IJCAI 2017 [#Pang2017Learning]_ `[PDF] `_ +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== + + +4.6. High-dimensional & Subspace Outliers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= +A survey on unsupervised outlier detection in high-dimensional numerical data Stat Anal Data Min 2012 [#Zimek2012A]_ `[HTML] `_ +Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection SIGKDD 2018 [#Pang2018Learning]_ `[PDF] `_ +Reverse Nearest Neighbors in Unsupervised Distance-Based Outlier Detection TKDE 2015 [#Radovanovic2015Reverse]_ `[PDF] `_, `[SLIDES] `_ +Outlier detection for high-dimensional data Biometrika 2015 [#Ro2015Outlier]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= + + +4.7. Outlier Ensembles +^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Outlier ensembles: position paper SIGKDD Explorations 2013 [#Aggarwal2013Outlier]_ `[PDF] `_ +Ensembles for unsupervised outlier detection: challenges and research questions a position paper SIGKDD Explorations 2014 [#Zimek2014Ensembles]_ `[PDF] `_ +An Unsupervised Boosting Strategy for Outlier Detection Ensembles PAKDD 2018 [#Campos2018An]_ `[HTML] `_ +LSCP: Locally selective combination in parallel outlier ensembles SDM 2019 [#Zhao2019LSCP]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + +4.8. Outlier Detection in Evolving Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +A Survey on Anomaly detection in Evolving Data: [with Application to Forest Fire Risk Prediction] SIGKDD Explorations 2018 [#Salehi2018A]_ `[PDF] `_ +Unsupervised real-time anomaly detection for streaming data Neurocomputing 2017 [#Ahmad2017Unsupervised]_ `[PDF] `_ +Outlier Detection in Feature-Evolving Data Streams SIGKDD 2018 [#Manzoor2018Outlier]_ `[PDF] `_, `[Github] `_ +Evaluating Real-Time Anomaly Detection Algorithms--The Numenta Anomaly Benchmark ICMLA 2015 [#Lavin2015Evaluating]_ `[PDF] `_, `[Github] `_ +MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams AAAI 2020 [#Bhatia2020MIDAS]_ `[PDF] `_, `[Github] `_ +NETS: Extremely Fast Outlier Detection from a Data Stream via Set-Based Processing VLDB 2019 [#Yoon2019NETS]_ `[PDF] `_, `[Github] `_, `[Slide] `_ +Ultrafast Local Outlier Detection from a Data Stream with Stationary Region Skipping KDD 2020 [#Yoon2020STARE]_ `[PDF] `_, `[Github] `_, `[Slide] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.9. Representation Learning in Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection SIGKDD 2018 [#Pang2018Learning]_ `[PDF] `_ +Learning representations for outlier detection on a budget Preprint 2015 [#Micenkova2015Learning]_ `[PDF] `_ +XGBOD: improving supervised outlier detection with unsupervised representation learning IJCNN 2018 [#Zhao2018Xgbod]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.10. Interpretability +^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Explaining Anomalies in Groups with Characterizing Subspace Rules DMKD 2018 [#Macha2018Explaining]_ `[PDF] `_ +Beyond Outlier Detection: LookOut for Pictorial Explanation ECML-PKDD 2018 [#Gupta2018Beyond]_ `[PDF] `_ +Contextual outlier interpretation IJCAI 2018 [#Liu2018Contextual]_ `[PDF] `_ +Mining multidimensional contextual outliers from categorical relational data IDA 2015 [#Tang2015Mining]_ `[PDF] `_ +Discriminative features for identifying and interpreting outliers ICDE 2014 [#Dang2014Discriminative]_ `[PDF] `_ +Sequential Feature Explanations for Anomaly Detection TKDD 2019 [#Siddiqui2019Sequential]_ `[HTML] `_ +Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network WWW 2021 [#Xu2021Beyond]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.11. Outlier Detection with Neural Networks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding KDD 2018 [#Hundman2018Detecting]_ `[PDF] `_, `[Code] `_ +MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks ICANN 2019 [#Li2019MAD]_ `[PDF] `_, `[Code] `_ +Generative Adversarial Active Learning for Unsupervised Outlier Detection TKDE 2019 [#Liu2019Generative]_ `[PDF] `_, `[Code] `_ +Deep Autoencoding Gaussian Mixture Model for Unsupervised Anomaly Detection ICLR 2018 [#Zong2018Deep]_ `[PDF] `_, `[Code] `_ +Deep Anomaly Detection with Outlier Exposure ICLR 2019 [#Hendrycks2019Deep]_ `[PDF] `_, `[Code] `_ +Unsupervised Anomaly Detection With LSTM Neural Networks TNNLS 2019 [#Ergen2019Unsupervised]_ `[PDF] `_, `[IEEE] `_, +Effective End-to-end Unsupervised Outlier Detection via Inlier Priority of Discriminative Network NeurIPS 2019 [#Wang2019Effective]_ `[PDF] `_ `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.12. Active Anomaly Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Active learning for anomaly and rare-category detection NeurIPS 2005 [#Pelleg2005Active]_ `[PDF] `_ +Outlier detection by active learning SIGKDD 2006 [#Abe2006Outlier]_ `[PDF] `_ +Active Anomaly Detection via Ensembles: Insights, Algorithms, and Interpretability Preprint 2019 [#Das2019Active]_ `[PDF] `_ +Meta-AAD: Active Anomaly Detection with Deep Reinforcement Learning ICDM 2020 [#Zha2020Meta]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.13. Interactive Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Learning On-the-Job to Re-rank Anomalies from Top-1 Feedback SDM 2019 [#Lamba2019Learning]_ `[PDF] `_ +Interactive anomaly detection on attributed networks WSDM 2019 [#Ding2019Interactive]_ `[PDF] `_ +eX2: a framework for interactive anomaly detection IUI Workshop 2019 [#Arnaldo2019ex2]_ `[PDF] `_ +Tripartite Active Learning for Interactive Anomaly Discovery IEEE Access 2019 [#Zhu2019Tripartite]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.14. Outlier Detection in Other fields +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Field Paper Title Venue Year Ref Materials +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +**Text** Outlier detection for text data SDM 2017 [#Kannan2017Outlier]_ `[PDF] `_ +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.15. Outlier Detection Applications +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +======================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Field Paper Title Venue Year Ref Materials +======================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +**Security** A survey of distance and similarity measures used within network intrusion anomaly detection IEEE Commun. Surv. Tutor. 2015 [#WellerFahy2015A]_ `[PDF] `_ +**Security** Anomaly-based network intrusion detection: Techniques, systems and challenges Computers & Security 2009 [#GarciaTeodoro2009Anomaly]_ `[PDF] `_ +**Finance** A survey of anomaly detection techniques in financial domain Future Gener Comput Syst 2016 [#Ahmed2016A]_ `[PDF] `_ +**Traffic** Outlier Detection in Urban Traffic Data WIMS 2018 [#Djenouri2018Outlier]_ `[PDF] `_ +**Social Media** A survey on social media anomaly detection SIGKDD Explorations 2016 [#Yu2016A]_ `[PDF] `_ +**Social Media** GLAD: group anomaly detection in social media analysis TKDD 2015 [#Yu2015Glad]_ `[PDF] `_ +**Machine Failure** Detecting the Onset of Machine Failure Using Anomaly Detection Methods DAWAK 2019 [#Riazi2019Detecting]_ `[PDF] `_ +**Video Surveillance** AnomalyNet: An anomaly detection network for video surveillance TIFS 2019 [#Zhou2019AnomalyNet]_ `[IEEE] `_, `Code `_ +======================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.16. Automated Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +AutoOD: Automated Outlier Detection via Curiosity-guided Search and Self-imitation Learning ICDE 2020 [#Li2020AutoOD]_ `[PDF] `_ +Automatic Unsupervised Outlier Model Selection NeurIPS 2021 [#Zhao2020Automating]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.17. Machine Learning Systems for Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This section summarizes a list of systems for outlier detection, which may +overlap with the section of tools and libraries. + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +PyOD: A Python Toolbox for Scalable Outlier Detection JMLR 2019 [#Zhao2019PYOD]_ `[PDF] `_, `[Code] `_ +SUOD: Accelerating Large-Scale Unsupervised Heterogeneous Outlier Detection MLSys 2021 [#Zhao2021SUOD]_ `[PDF] `_, `[Code] `_ +TOD: Tensor-based Outlier Detection Preprint 2021 [#Zhao2021TOD]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + + +4.18. Fairness and Bias in Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +A Framework for Determining the Fairness of Outlier Detection ECAI 2020 [#Davidson2020A]_ `[PDF] `_ +FAIROD: Fairness-aware Outlier Detection AIES 2021 [#Shekhar2021FAIROD]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + + +4.19. Isolation-Based Methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================= ============================================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================= ============================================================================================================================================================================================== +Isolation forest ICDM 2008 [#Liu2008Isolation]_ `[PDF] `_ +Isolation‐based anomaly detection using nearest‐neighbor ensembles Computational Intelligence 2018 [#Bandaragoda2018Isolation]_ `[PDF] `_, `[Code] `_ +Extended Isolation Forest TKDE 2019 [#Hariri2019Extended]_ `[PDF] `_, `[Code] `_ +Isolation Distributional Kernel: A New Tool for Kernel based Anomaly Detection KDD 2020 [#Ting2020Isolation]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================= ============================================================================================================================================================================================== + + + +4.20. Emerging and Interesting Topics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Clustering with Outlier Removal TKDE 2019 [#Liu2018Clustering]_ `[PDF] `_ +Real-World Anomaly Detection by using Digital Twin Systems and Weakly-Supervised Learning IEEE Trans. Ind. Informat. 2020 [#Castellani2020Siamese]_ `[PDF] `_ +SSD: A Unified Framework for Self-Supervised Outlier Detection ICLR 2021 [#Sehwag2021SSD]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +---- + +5. Key Conferences/Workshops/Journals +------------------------------------- + +5.1. Conferences & Workshops +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Key data mining conference **deadlines**, **historical acceptance rates**, and more +can be found `data-mining-conferences `_. + + +`ACM International Conference on Knowledge Discovery and Data Mining (SIGKDD) `_. **Note**: SIGKDD usually has an Outlier Detection Workshop (ODD), see `ODD 2021 `_. + +`ACM International Conference on Management of Data (SIGMOD) `_ + +`The Web Conference (WWW) `_ + +`IEEE International Conference on Data Mining (ICDM) `_ + +`SIAM International Conference on Data Mining (SDM) `_ + +`IEEE International Conference on Data Engineering (ICDE) `_ + +`ACM InternationalConference on Information and Knowledge Management (CIKM) `_ + +`ACM International Conference on Web Search and Data Mining (WSDM) `_ + +`The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD) `_ + +`The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD) `_ + +5.2. Journals +^^^^^^^^^^^^^ + +`ACM Transactions on Knowledge Discovery from Data (TKDD) `_ + +`IEEE Transactions on Knowledge and Data Engineering (TKDE) `_ + +`ACM SIGKDD Explorations Newsletter `_ + +`Data Mining and Knowledge Discovery `_ + +`Knowledge and Information Systems (KAIS) `_ + +---- + +References +---------- + +.. [#Abe2006Outlier] Abe, N., Zadrozny, B. and Langford, J., 2006, August. Outlier detection by active learning. In *Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining*, pp. 504-509, ACM. + +.. [#Aggarwal2013Outlier] Aggarwal, C.C., 2013. Outlier ensembles: position paper. *ACM SIGKDD Explorations Newsletter*\ , 14(2), pp.49-58. + +.. [#Ahmed2016A] Ahmed, M., Mahmood, A.N. and Islam, M.R., 2016. A survey of anomaly detection techniques in financial domain. *Future Generation Computer Systems*\ , 55, pp.278-288. + +.. [#Ahmad2017Unsupervised] Ahmad, S., Lavin, A., Purdy, S. and Agha, Z., 2017. Unsupervised real-time anomaly detection for streaming data. *Neurocomputing*, 262, pp.134-147. + +.. [#Akoglu2015Graph] Akoglu, L., Tong, H. and Koutra, D., 2015. Graph based anomaly detection and description: a survey. *Data Mining and Knowledge Discovery*\ , 29(3), pp.626-688. + +.. [#Angiulli2002Fast] Angiulli, F. and Pizzuti, C., 2002, August. Fast outlier detection in high dimensional spaces. In *European Conference on Principles of Data Mining and Knowledge Discovery*, pp. 15-27. + +.. [#Arnaldo2019ex2] Arnaldo, I., Veeramachaneni, K. and Lam, M., 2019. ex2: a framework for interactive anomaly detection. In *ACM IUI Workshop on Exploratory Search and Interactive Data Analytics (ESIDA)*. + +.. [#Bandaragoda2018Isolation] Bandaragoda, Tharindu R., Kai Ming Ting, David Albrecht, Fei Tony Liu, Ye Zhu, and Jonathan R. Wells. "Isolation‐based anomaly detection using nearest‐neighbor ensembles." *Computational Intelligence* 34, no. 4 (2018): 968-998. + +.. [#Bhatia2020MIDAS] Bhatia, S., Hooi, B., Yoon, M., Shin, K. and Faloutsos. C., 2020. MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams. In *AAAI Conference on Artificial Intelligence (AAAI)*. + +.. [#Braei2020Anomaly] Braei, M. and Wagner, S., 2020. Anomaly detection in univariate time-series: A survey on the state-of-the-art. arXiv preprint arXiv:2004.00433. + +.. [#Breunig2000LOF] Breunig, M.M., Kriegel, H.P., Ng, R.T. and Sander, J., 2000, May. LOF: identifying density-based local outliers. *ACM SIGMOD Record*\ , 29(2), pp. 93-104. + +.. [#Bulusu2020Deep] Bulusu, S., Kailkhura, B., Li, B., Varshney, P. and Song, D., 2020. Anomalous instance detection in deep learning: A survey (No. LLNL-CONF-808677). Lawrence Livermore National Lab.(LLNL), Livermore, CA (United States). + +.. [#Campos2016On] Campos, G.O., Zimek, A., Sander, J., Campello, R.J., Micenková, B., Schubert, E., Assent, I. and Houle, M.E., 2016. On the evaluation of unsupervised outlier detection: measures, datasets, and an empirical study. *Data Mining and Knowledge Discovery*\ , 30(4), pp.891-927. + +.. [#Campos2018An] Campos, G.O., Zimek, A. and Meira, W., 2018, June. An Unsupervised Boosting Strategy for Outlier Detection Ensembles. In *Pacific-Asia Conference on Knowledge Discovery and Data Mining (pp. 564-576)*. Springer, Cham. + +.. [#Campos2019Outlier] Campos, G.O., Moreira, E., Meira Jr, W. and Zimek, A., 2019. Outlier Detection in Graphs: A Study on the Impact of Multiple Graph Models. *Computer Science & Information Systems*, 16(2). + +.. [#Castellani2020Siamese] Castellani, A., Schmitt, S., Squartini, S., 2020. Real-World Anomaly Detection by using Digital Twin Systems and Weakly-Supervised Learning. In *IEEE Transactions on Industrial Informatics*. + +.. [#Chalapathy2019Deep] Chalapathy, R. and Chawla, S., 2019. Deep learning for anomaly detection: A survey. arXiv preprint arXiv:1901.03407. + +.. [#Chandola2009Anomaly] Chandola, V., Banerjee, A. and Kumar, V., 2009. Anomaly detection: A survey. *ACM computing surveys* , 41(3), p.15. + +.. [#Chawla2011Anomaly] Chawla, S. and Chandola, V., 2011, Anomaly Detection: A Tutorial. *Tutorial at ICDM 2011*. + +.. [#Chen2017Outlier] Chen, J., Sathe, S., Aggarwal, C. and Turaga, D., 2017, June. Outlier detection with autoencoder ensembles. *SIAM International Conference on Data Mining*, pp. 90-98. Society for Industrial and Applied Mathematics. + +.. [#Dang2014Discriminative] Dang, X.H., Assent, I., Ng, R.T., Zimek, A. and Schubert, E., 2014, March. Discriminative features for identifying and interpreting outliers. In *International Conference on Data Engineering (ICDE)*. IEEE. + +.. [#Das2019Active] Das, S., Islam, M.R., Jayakodi, N.K. and Doppa, J.R., 2019. Active Anomaly Detection via Ensembles: Insights, Algorithms, and Interpretability. arXiv preprint arXiv:1901.08930. + +.. [#Davidson2020A] Davidson, I. and Ravi, S.S., 2020. A framework for determining the fairness of outlier detection. In Proceedings of the 24th European Conference on Artificial Intelligence (ECAI2020) (Vol. 2029). + +.. [#Ding2019Interactive] Ding, K., Li, J. and Liu, H., 2019, January. Interactive anomaly detection on attributed networks. In *Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining*, pp. 357-365. ACM. + +.. [#Djenouri2018Outlier] Djenouri, Y. and Zimek, A., 2018, June. Outlier detection in urban traffic data. In *Proceedings of the 8th International Conference on Web Intelligence, Mining and Semantics*. ACM. + +.. [#Domingues2018A] Domingues, R., Filippone, M., Michiardi, P. and Zouaoui, J., 2018. A comparative evaluation of outlier detection algorithms: Experiments and analyses. *Pattern Recognition*, 74, pp.406-421. + +.. [#Emmott2015A] Emmott, A., Das, S., Dietterich, T., Fern, A. and Wong, W.K., 2015. A meta-analysis of the anomaly detection problem. arXiv preprint arXiv:1503.01158. + +.. [#Ergen2019Unsupervised] Ergen, T. and Kozat, S.S., 2019. Unsupervised Anomaly Detection With LSTM Neural Networks. *IEEE transactions on neural networks and learning systems*. + +.. [#Falcao2019Quantitative] Falcão, F., Zoppi, T., Silva, C.B.V., Santos, A., Fonseca, B., Ceccarelli, A. and Bondavalli, A., 2019, April. Quantitative comparison of unsupervised anomaly detection algorithms for intrusion detection. In *Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing*, (pp. 318-327). ACM. + +.. [#GarciaTeodoro2009Anomaly] Garcia-Teodoro, P., Diaz-Verdejo, J., Maciá-Fernández, G. and Vázquez, E., 2009. Anomaly-based network intrusion detection: Techniques, systems and challenges. *Computers & Security*\ , 28(1-2), pp.18-28. + +.. [#Goldstein2016A] Goldstein, M. and Uchida, S., 2016. A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data. *PloS one*\ , 11(4), p.e0152173. + +.. [#Gupta2014Outlier] Gupta, M., Gao, J., Aggarwal, C.C. and Han, J., 2014. Outlier detection for temporal data: A survey. *IEEE Transactions on Knowledge and Data Engineering*\ , 26(9), pp.2250-2267. + +.. [#Hariri2019Extended] Hariri, S., Kind, M.C. and Brunner, R.J., 2019. Extended Isolation Forest. *IEEE Transactions on Knowledge and Data Engineering*. + +.. [#Hendrycks2019Deep] Hendrycks, D., Mazeika, M. and Dietterich, T.G., 2019. Deep Anomaly Detection with Outlier Exposure. International Conference on Learning Representations (ICLR). + +.. [#Hodge2004A] Hodge, V. and Austin, J., 2004. A survey of outlier detection methodologies. *Artificial intelligence review*\ , 22(2), pp.85-126. + +.. [#Hundman2018Detecting] Hundman, K., Constantinou, V., Laporte, C., Colwell, I. and Soderstrom, T., 2018, July. Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding. In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, (pp. 387-395). ACM. + +.. [#Kannan2017Outlier] Kannan, R., Woo, H., Aggarwal, C.C. and Park, H., 2017, June. Outlier detection for text data. In *Proceedings of the 2017 SIAM International Conference on Data Mining*, pp. 489-497. Society for Industrial and Applied Mathematics. + +.. [#Kriegel2010Outlier] Kriegel, H.P., Kröger, P. and Zimek, A., 2010. Outlier detection techniques. *Tutorial at ACM SIGKDD 2010*. + +.. [#Lai2021Revisiting] Lai, K.H., Zha, D., Xu, J., Zhao, Y., Wang, G. and Hu, X., 2021. Revisiting Time Series Outlier Detection: Definitions and Benchmarks. *Advances in Neural Information Processing Systems (NeurIPS)*, Datasets and Benchmarks Track. + +.. [#Lamba2019Learning] Lamba, H. and Akoglu, L., 2019, May. Learning On-the-Job to Re-rank Anomalies from Top-1 Feedback. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 612-620. Society for Industrial and Applied Mathematics. + +.. [#Lavin2015Evaluating] Lavin, A. and Ahmad, S., 2015, December. Evaluating Real-Time Anomaly Detection Algorithms--The Numenta Anomaly Benchmark. In *2015 IEEE 14th International Conference on Machine Learning and Applications (ICMLA)* (pp. 38-44). IEEE. + +.. [#Lazarevic2008Data] Lazarevic, A., Banerjee, A., Chandola, V., Kumar, V. and Srivastava, J., 2008, September. Data mining for anomaly detection. *Tutorial at ECML PKDD 2008*. + +.. [#Li2019MAD] Li, D., Chen, D., Jin, B., Shi, L., Goh, J. and Ng, S.K., 2019, September. MAD-GAN: Multivariate anomaly detection for time series data with generative adversarial networks. In *International Conference on Artificial Neural Networks* (pp. 703-716). Springer, Cham. + +.. [#Li2020COPOD] Li, Z., Zhao, Y., Botta, N., Ionescu, C. and Hu, X. COPOD: Copula-Based Outlier Detection. *IEEE International Conference on Data Mining (ICDM)*, 2020. + +.. [#Liu2008Isolation] Liu, F.T., Ting, K.M. and Zhou, Z.H., 2008, December. Isolation forest. In *International Conference on Data Mining*\ , pp. 413-422. IEEE. + +.. [#Liu2018Clustering] Liu, H., Li, J., Wu, Y. and Fu, Y., 2019. Clustering with outlier removal. *IEEE transactions on knowledge and data engineering*. + +.. [#Liu2018Contextual] Liu, N., Shin, D. and Hu, X., 2017. Contextual outlier interpretation. In *International Joint Conference on Artificial Intelligence (IJCAI-18)*, pp.2461-2467. + +.. [#Liu2019Generative] Liu, Y., Li, Z., Zhou, C., Jiang, Y., Sun, J., Wang, M. and He, X., 2019. Generative Adversarial Active Learning for Unsupervised Outlier Detection. *IEEE transactions on knowledge and data engineering*. + +.. [#Li2020AutoOD] Li, Y., Chen, Z., Zha, D., Zhou, K., Jin, H., Chen, H. and Hu, X., 2020. AutoOD: Automated Outlier Detection via Curiosity-guided Search and Self-imitation Learning. *ICDE*. + +.. [#Ma2021A] Ma, X., Wu, J., Xue, S., Yang, J., Zhou, C., Sheng, Q.Z., Xiong, H. and Akoglu, L., 2021. A comprehensive survey on graph anomaly detection with deep learning. *IEEE Transactions on Knowledge and Data Engineering*. + +.. [#Macha2018Explaining] Macha, M. and Akoglu, L., 2018. Explaining anomalies in groups with characterizing subspace rules. Data Mining and Knowledge Discovery, 32(5), pp.1444-1480. + +.. [#Manzoor2018Outlier] Manzoor, E., Lamba, H. and Akoglu, L. Outlier Detection in Feature-Evolving Data Streams. In *24th ACM SIGKDD International Conference on Knowledge Discovery and Data mining (KDD)*. 2018. + +.. [#Mendiratta2017Anomaly] Mendiratta, B.V., 2017. Anomaly Detection in Networks. *Tutorial at ACM SIGKDD 2017*. + +.. [#Micenkova2015Learning] Micenková, B., McWilliams, B. and Assent, I., 2015. Learning representations for outlier detection on a budget. arXiv preprint arXiv:1507.08104. + +.. [#Gupta2018Beyond] Gupta, N., Eswaran, D., Shah, N., Akoglu, L. and Faloutsos, C., Beyond Outlier Detection: LookOut for Pictorial Explanation. *ECML PKDD 2018*. + +.. [#Pang2016Unsupervised] Pang, G., Cao, L., Chen, L. and Liu, H., 2016, December. Unsupervised feature selection for outlier detection by modelling hierarchical value-feature couplings. In Data Mining (ICDM), 2016 IEEE 16th International Conference on (pp. 410-419). IEEE. + +.. [#Pang2017Learning] Pang, G., Cao, L., Chen, L. and Liu, H., 2017, August. Learning homophily couplings from non-iid data for joint feature selection and noise-resilient outlier detection. In Proceedings of the 26th International Joint Conference on Artificial Intelligence (pp. 2585-2591). AAAI Press. + +.. [#Pang2018Learning] Pang, G., Cao, L., Chen, L. and Liu, H., 2018. Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection. In *24th ACM SIGKDD International Conference on Knowledge Discovery and Data mining (KDD)*. 2018. + +.. [#Pang2020Deep] Pang, G., Shen, C., Cao, L. and Hengel, A.V.D., 2021. Deep Learning for Anomaly Detection: A Review. ACM Computing Surveys (CSUR), 54(2), pp.1-38. + +.. [#Pang2021Deep] Pang, G., Cao, L. and Aggarwal, C., 2021. Deep Learning for Anomaly Detection. *Tutorial at WSDM 2021*. + +.. [#Pelleg2005Active] Pelleg, D. and Moore, A.W., 2005. Active learning for anomaly and rare-category detection. In *Advances in neural information processing systems*\, pp. 1073-1080. + +.. [#Radovanovic2015Reverse] Radovanović, M., Nanopoulos, A. and Ivanović, M., 2015. Reverse nearest neighbors in unsupervised distance-based outlier detection. *IEEE transactions on knowledge and data engineering*, 27(5), pp.1369-1382. + +.. [#Ramaswamy2000Efficient] Ramaswamy, S., Rastogi, R. and Shim, K., 2000, May. Efficient algorithms for mining outliers from large data sets. *ACM SIGMOD Record*\ , 29(2), pp. 427-438. + +.. [#Ranshous2015Anomaly] Ranshous, S., Shen, S., Koutra, D., Harenberg, S., Faloutsos, C. and Samatova, N.F., 2015. Anomaly detection in dynamic networks: a survey. Wiley Interdisciplinary Reviews: Computational Statistics, 7(3), pp.223-247. + +.. [#Ren2019Time] Ren, H., Xu, B., Wang, Y., Yi, C., Huang, C., Kou, X., Xing, T., Yang, M., Tong, J. and Zhang, Q., 2019. Time-Series Anomaly Detection Service at Microsoft. In *Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*. ACM. + +.. [#Riazi2019Detecting] Riazi, M., Zaiane, O., Takeuchi, T., Maltais, A., Günther, J. and Lipsett, M., Detecting the Onset of Machine Failure Using Anomaly Detection Methods. + +.. [#Ro2015Outlier] Ro, K., Zou, C., Wang, Z. and Yin, G., 2015. Outlier detection for high-dimensional data. *Biometrika*, 102(3), pp.589-599. + +.. [#Salehi2018A] Salehi, Mahsa & Rashidi, Lida. (2018). A Survey on Anomaly detection in Evolving Data: [with Application to Forest Fire Risk Prediction]. *ACM SIGKDD Explorations Newsletter*. 20. 13-23. + +.. [#Salehi2021A] Salehi, M., Mirzaei, H., Hendrycks, D., Li, Y., Rohban, M.H., Sabokrou, M., 2021. A Unified Survey on Anomaly, Novelty, Open-Set, and Out-of-Distribution Detection: Solutions and Future Challenges. arXiv preprint arXiv:2110.14051. + +.. [#Scholkopf2001Estimating] Schölkopf, B., Platt, J.C., Shawe-Taylor, J., Smola, A.J. and Williamson, R.C., 2001. Estimating the support of a high-dimensional distribution. *Neural Computation*, 13(7), pp.1443-1471. + +.. [#Sehwag2021SSD] Sehwag, V., Chiang, M., Mittal, P., 2021. SSD: A Unified Framework for Self-Supervised Outlier Detection. *International Conference on Learning Representations (ICLR)*. + +.. [#Shekhar2021FAIROD] Shekhar, S., Shah, N. and Akoglu, L., 2021. FAIROD: Fairness-aware Outlier Detection. AAAI/ACM Conference on AI, Ethics, and Society (AIES). + +.. [#Siddiqui2019Sequential] Siddiqui, M.A., Fern, A., Dietterich, T.G. and Wong, W.K., 2019. Sequential Feature Explanations for Anomaly Detection. *ACM Transactions on Knowledge Discovery from Data (TKDD)*, 13(1), p.1. + +.. [#Suri2019Research] Suri, N.R. and Athithan, G., 2019. Research Issues in Outlier Detection. In *Outlier Detection: Techniques and Applications*, pp. 29-51. Springer, Cham. + +.. [#Tang2015Mining] Tang, G., Pei, J., Bailey, J. and Dong, G., 2015. Mining multidimensional contextual outliers from categorical relational data. *Intelligent Data Analysis*, 19(5), pp.1171-1192. + +.. [#Ting2018Which] Ting, KM., Aryal, S. and Washio, T., 2018, Which Anomaly Detector should I use? *Tutorial at ICDM 2018*. + +.. [#Ting2020Isolation] Ting, Kai Ming, Bi-Cun Xu, Takashi Washio, and Zhi-Hua Zhou. "Isolation Distributional Kernel: A New Tool for Kernel based Anomaly Detection." In *Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, pp. 198-206. 2020. + +.. [#Wang2019Effective] Wang, S., Zeng, Y., Liu, X., Zhu, E., Yin, J., Xu, C. and Kloft, M., 2019. Effective End-to-end Unsupervised Outlier Detection via Inlier Priority of Discriminative Network. In *33rd Conference on Neural Information Processing Systems*. + +.. [#Wang2019Progress] Wang, H., Bah, M.J. and Hammad, M., 2019. Progress in Outlier Detection Techniques: A Survey. *IEEE Access*, 7, pp.107964-108000. + +.. [#Wang2020Deep] Wang, R., Nie, K., Chang, Y. J., Gong, X., Wang, T., Yang, Y., Long, B., 2020. Deep Learning for Anomaly Detection. *Tutorial at KDD 2020*. + +.. [#WellerFahy2015A] Weller-Fahy, D.J., Borghetti, B.J. and Sodemann, A.A., 2015. A survey of distance and similarity measures used within network intrusion anomaly detection. *IEEE Communications Surveys & Tutorials*\ , 17(1), pp.70-91. + +.. [#Xu2021Beyond] Xu, H., Wang, Y., Jian, S., Huang, Z., Wang, Y., Liu, N. and Li, F., 2021, April. Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network. In *Proceedings of the Web Conference* 2021 (pp. 1328-1339). + +.. [#Yoon2019NETS] Yoon, S., Lee, J. G., & Lee, B. S., 2019. NETS: extremely fast outlier detection from a data stream via set-based processing. Proceedings of the VLDB Endowment, 12(11), 1303-1315. + +.. [#Yoon2020STARE] Yoon, S., Lee, J. G., & Lee, B. S., 2020. Ultrafast local outlier detection from a data stream with stationary region skipping. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (pp. 1181-1191) + +.. [#Yu2015Glad] Yu, R., He, X. and Liu, Y., 2015. GLAD: group anomaly detection in social media analysis. *ACM Transactions on Knowledge Discovery from Data (TKDD)*\ , 10(2), p.18. + +.. [#Yu2016A] Yu, R., Qiu, H., Wen, Z., Lin, C. and Liu, Y., 2016. A survey on social media anomaly detection. *ACM SIGKDD Explorations Newsletter*\ , 18(1), pp.1-14. + +.. [#Zha2020Meta] Zha, D., Lai, K.H., Wan, M. and Hu, X., 2020. Meta-AAD: Active Anomaly Detection with Deep Reinforcement Learning. *ICDM*. + +.. [#Zhao2018Xgbod] Zhao, Y. and Hryniewicki, M.K., 2018, July. XGBOD: improving supervised outlier detection with unsupervised representation learning. In *2018 International Joint Conference on Neural Networks (IJCNN)*. IEEE. + +.. [#Zhao2019LSCP] Zhao, Y., Nasrullah, Z., Hryniewicki, M.K. and Li, Z., 2019, May. LSCP: Locally selective combination in parallel outlier ensembles. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 585-593. Society for Industrial and Applied Mathematics. + +.. [#Zhao2019PYOD] Zhao, Y., Nasrullah, Z. and Li, Z., PyOD: A Python Toolbox for Scalable Outlier Detection. *Journal of Machine Learning Research*, 20, pp.1-7. + +.. [#Zhao2020Automating] Zhao, Y., Rossi, R.A. and Akoglu, L., 2021. Automatic Unsupervised Outlier Model Selection. *Advances in Neural Information Processing Systems*. + +.. [#Zhao2021SUOD] Zhao, Y., Hu, X., Cheng, C., Wang, C., Wan, C., Wang, W., Yang, J., Bai, H., Li, Z., Xiao, C. and Wang, Y., 2021. SUOD: Accelerating Large-scale Unsupervised Heterogeneous Outlier Detection. *Proceedings of Machine Learning and Systems (MLSys)*. + +.. [#Zhao2021TOD] Zhao, Y., Chen, G.H. and Jia, Z., 2021. TOD: Tensor-based Outlier Detection. arXiv preprint arXiv:2110.14007. + +.. [#Zhou2019AnomalyNet] Zhou, J.T., Du, J., Zhu, H., Peng, X., Liu, Y. and Goh, R.S.M., 2019. AnomalyNet: An anomaly detection network for video surveillance. *IEEE Transactions on Information Forensics and Security*. + +.. [#Zhu2019Tripartite] Zhu, Y. and Yang, K., 2019. Tripartite Active Learning for Interactive Anomaly Discovery. *IEEE Access*. + +.. [#Zimek2012A] Zimek, A., Schubert, E. and Kriegel, H.P., 2012. A survey on unsupervised outlier detection in high‐dimensional numerical data. *Statistical Analysis and Data Mining: The ASA Data Science Journal*\ , 5(5), pp.363-387. + +.. [#Zimek2014Ensembles] Zimek, A., Campello, R.J. and Sander, J., 2014. Ensembles for unsupervised outlier detection: challenges and research questions a position paper. *ACM Sigkdd Explorations Newsletter*\ , 15(1), pp.11-22. + +.. [#Zong2018Deep] Zong, B., Song, Q., Min, M.R., Cheng, W., Lumezanu, C., Cho, D. and Chen, H., 2018. Deep autoencoding gaussian mixture model for unsupervised anomaly detection. International Conference on Learning Representations (ICLR). diff --git a/README_CN.rst b/README_CN.rst new file mode 100644 index 0000000..f846769 --- /dev/null +++ b/README_CN.rst @@ -0,0 +1,553 @@ +异常检测学习资源(Anomaly Detection Learning Resources) +==================================================== + +.. image:: https://img.shields.io/github/stars/yzhao062/anomaly-detection-resources.svg + :target: https://github.com/yzhao062/anomaly-detection-resources/stargazers + :alt: GitHub stars + + +.. image:: https://img.shields.io/github/forks/yzhao062/anomaly-detection-resources.svg?color=blue + :target: https://github.com/yzhao062/anomaly-detection-resources/network + :alt: GitHub forks + + +.. image:: https://img.shields.io/github/license/yzhao062/anomaly-detection-resources.svg?color=blue + :target: https://github.com/yzhao062/anomaly-detection-resources/blob/master/LICENSE + :alt: License + + +.. image:: https://img.shields.io/badge/link-996.icu-red.svg + :target: https://github.com/996icu/996.ICU + :alt: 996.ICU + + +---- + +`异常检测 (anomaly detection) `_ +(又名 Outlier Detection) 是一个重要但非常有挑战性的领域。异常检测的目标主要是找到数据中 +偏离于主要分布的案例--它在很多领域都有重要意义,包括「信用卡诈骗检测」、「网络入侵检测」、 +「机械故障检测」等。 + +这个仓库中收藏了关于异常检测的: + + +#. 专业书籍与学术论文 +#. 在线课程与视频 +#. 异常检测数据集 +#. 开源与商业工具库 +#. 重要的会议与期刊 + + +**更多内容会被陆续添加到当前仓库中来**。 +请建议/推荐相关资源,你可以选择提交issue report、pull request或者给我发邮件 (zhaoy@cmu.edu)。 +Enjoy reading! + +---- + +目录 +----------------- + + +* `1. 书籍 & 教程 <#1-书籍--教程>`_ + + * `1.1. 书籍 <#11-书籍>`_ + * `1.2. 教程 <#12-教程>`_ + +* `2. Courses/Seminars/Videos <#2-coursesseminarsvideos>`_ +* `3. Toolbox & Datasets <#3-toolbox--datasets>`_ + + * `3.1. Multivariate data outlier detection <#31-multivariate-data>`_ + * `3.2. Time series outlier detection <#32-time-series-outlier-detection>`_ + * `3.3. Datasets <#33-datasets>`_ + +* `4. Papers <#4-papers>`_ + + * `4.1. Overview & Survey Papers <#41-overview--survey-papers>`_ + * `4.2. Key Algorithms <#42-key-algorithms>`_ + * `4.3. Graph & Network Outlier Detection <#43-graph--network-outlier-detection>`_ + * `4.4. Time Series Outlier Detection <#44-time-series-outlier-detection>`_ + * `4.5. Feature Selection in Outlier Detection <#45-feature-selection-in-outlier-detection>`_ + * `4.6. High-dimensional & Subspace Outliers <#46-high-dimensional--subspace-outliers>`_ + * `4.7. Outlier Ensembles <#47-outlier-ensembles>`_ + * `4.8. Outlier Detection in Evolving Data <#48-outlier-detection-in-evolving-data>`_ + * `4.9. Representation Learning in Outlier Detection <#49-representation-learning-in-outlier-detection>`_ + * `4.10. Interpretability <#410-interpretability>`_ + * `4.11. Outlier Detection with Neural Networks <#411-outlier-detection-with-neural-networks>`_ + * `4.12. Active Anomaly Detection <#412-active-anomaly-detection>`_ + * `4.13. Interactive Outlier Detection <#413-interactive-outlier-detection>`_ + * `4.14. Outlier Detection in Other fields <#414-outlier-detection-in-other-fields>`_ + * `4.15. Outlier Detection Applications <#415-outlier-detection-applications>`_ + +* `5. Key Conferences/Workshops/Journals <#5-key-conferencesworkshopsjournals>`_ + + * `5.1. Conferences & Workshops <#51-conferences--workshops>`_ + * `5.2. Journals <#52-journals>`_ + + +---- + +1. 书籍 & 教程 +------------- + +1.1. 书籍 +^^^^^^^^ + +`Outlier Analysis `_ +作者: Charu Aggarwal: 经典异常检测教科书,内容涵盖了大部分相关算法与应用。异常检测领域人士必读。 +`[预览.pdf] `_ + +`Outlier Ensembles: An Introduction `_ +作者: Charu Aggarwal and Saket Sathe: 非常权威的集成异常检测教科书。 + +`Data Mining: Concepts and Techniques (3rd) `_ +作者: 韩家炜 (Jiawei Han) and Micheline Kamber and Jian Pei (裴健): 该书第十二章讨论了异常检测技术。 `[Google Search] `_ + +1.2. 教程 +^^^^^^^^ + +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== +Tutorial Title Venue Year Ref Materials +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== +Outlier detection techniques ACM SIGKDD 2010 [#Kriegel2010Outlier]_ `[PDF] `_ +Anomaly Detection: A Tutorial ICDM 2011 [#Chawla2011Anomaly]_ `[PDF] `_ +Data mining for anomaly detection PKDD 2008 [#Lazarevic2008Data]_ `[Video] `_ +===================================================== ============================================ ===== ============================ ========================================================================================================================================================================== + +---- + +2. Courses/Seminars/Videos +-------------------------- + +**Coursera Introduction to Anomaly Detection (by IBM)**\ : +`[See Video] `_ + +**Coursera Real-Time Cyber Threat Detection and Mitigation partly covers the topic**\ : +`[See Video] `_ + +**Coursera Machine Learning by Andrew Ng also partly covers the topic**\ : + + +* `Anomaly Detection vs. Supervised Learning `_ +* `Developing and Evaluating an Anomaly Detection System `_ + +**Udemy Outlier Detection Algorithms in Data Mining and Data Science**\ : +`[See Video] `_ + +**Stanford Data Mining for Cyber Security** also covers part of anomaly detection techniques\ : +`[See Video] `_ + +---- + +3. Toolbox & Datasets +--------------------- + +3.1. Multivariate Data +^^^^^^^^^^^^^^^^^^^^^^ + +[**Python**] `Python Outlier Detection (PyOD) `_\ : PyOD is a comprehensive and scalable Python toolkit for detecting outlying objects in multivariate data. It contains more than 20 detection algorithms, including emerging deep learning models and outlier ensembles. + +[**Python**] `Scikit-learn Novelty and Outlier Detection `_. It supports some popular algorithms like LOF, Isolation Forest, and One-class SVM. + +[**Java**] `ELKI: Environment for Developing KDD-Applications Supported by Index-Structures `_\ : +ELKI is an open source (AGPLv3) data mining software written in Java. The focus of ELKI is research in algorithms, with an emphasis on unsupervised methods in cluster analysis and outlier detection. + +[**Java**] `RapidMiner Anomaly Detection Extension `_\ : The Anomaly Detection Extension for RapidMiner comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets. It allows you to find data, which is significantly different from the normal, without the need for the data being labeled. + +[**R**] `outliers package `_\ : A collection of some tests commonly used for identifying outliers in R. + +[**Matlab**] `Anomaly Detection Toolbox - Beta `_\ : A collection of popular outlier detection algorithms in Matlab. + + +3.2. Time series outlier detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +[**Python**] `datastream.io `_\ : An open-source framework for real-time anomaly detection using Python, Elasticsearch and Kibana. + +[**Python**] `skyline `_\ : Skyline is a near real time anomaly detection system. + +[**Python**] `banpei `_\ : Banpei is a Python package of the anomaly detection. + +[**Python**] `telemanom `_\ : A framework for using LSTMs to detect anomalies in multivariate time series data. + +[**Python**] `DeepADoTS `_\ : A benchmarking pipeline for anomaly detection on time series data for multiple state-of-the-art deep learning methods. + +[**R**] `AnomalyDetection `_\ : AnomalyDetection is an open-source R package to detect anomalies which is robust, from a statistical standpoint, in the presence of seasonality and an underlying trend. + + +3.3. Datasets +^^^^^^^^^^^^^ + +**ELKI Outlier Datasets**\ : https://elki-project.github.io/datasets/outlier + +**Outlier Detection DataSets (ODDS)**\ : http://odds.cs.stonybrook.edu/#table1 + +**Unsupervised Anomaly Detection Dataverse**\ : https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/OPQMVF + +**Anomaly Detection Meta-Analysis Benchmarks**\ : https://ir.library.oregonstate.edu/concern/datasets/47429f155 + +---- + +4. Papers +--------- + +4.1. Overview & Survey Papers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +A survey of outlier detection methodologies ARTIF INTELL REV 2004 [#Hodge2004A]_ `[PDF] `_ +Anomaly detection: A survey CSUR 2009 [#Chandola2009Anomaly]_ `[PDF] `_ +A meta-analysis of the anomaly detection problem Preprint 2015 [#Emmott2015A]_ `[PDF] `_ +On the evaluation of unsupervised outlier detection: measures, datasets, and an empirical study DMKD 2016 [#Campos2016On]_ `[HTML] `_, `[SLIDES] `_ +A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data PLOS ONE 2016 [#Goldstein2016A]_ `[PDF] `_ +A comparative evaluation of outlier detection algorithms: Experiments and analyses Pattern Recognition 2018 [#Domingues2018A]_ `[PDF] `_ +Research Issues in Outlier Detection Book Chapter 2019 [#Suri2019Research]_ `[HTML] `_ +Quantitative comparison of unsupervised anomaly detection algorithms for intrusion detection SAC 2019 [#Falcao2019Quantitative]_ `[HTML] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + +4.2. Key Algorithms +^^^^^^^^^^^^^^^^^^^ + +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== +Abbreviation Paper Title Venue Year Ref Materials +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== +kNN Efficient algorithms for mining outliers from large data sets ACM SIGMOD Record 2000 [#Ramaswamy2000Efficient]_ `[PDF] `_ +KNN Fast outlier detection in high dimensional spaces PKDD 2002 [#Angiulli2002Fast]_ `[PDF] `_ +LOF LOF: identifying density-based local outliers ACM SIGMOD Record 2000 [#Breunig2000LOF]_ `[PDF] `_ +IForest Isolation forest ICDM 2008 [#Liu2008Isolation]_ `[PDF] `_ +OCSVM Estimating the support of a high-dimensional distribution Neural Computation 2001 [#Scholkopf2001Estimating]_ `[PDF] `_ +AutoEncoder Ensemble Outlier detection with autoencoder ensembles SDM 2017 [#Chen2017Outlier]_ `[PDF] `_ +==================== ================================================================================================= ================================= ===== =========================== ============================================================================================================================================================================================== + +4.3. Graph & Network Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== +Graph based anomaly detection and description: a survey DMKD 2015 [#Akoglu2015Graph]_ `[PDF] `_ +Anomaly detection in dynamic networks: a survey WIREs Computational Statistic 2015 [#Ranshous2015Anomaly]_ `[PDF] `_ +================================================================================================= ============================= ===== ============================ ========================================================================================================================================================================== + + +4.4. Time Series Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Outlier detection for temporal data: A survey TKDE 2014 [#Gupta2014Outlier]_ `[PDF] `_ +Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding KDD 2018 [#Hundman2018Detecting]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.5. Feature Selection in Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== +Unsupervised feature selection for outlier detection by modelling hierarchical value-feature couplings ICDM 2016 [#Pang2016Unsupervised]_ `[PDF] `_ +Learning homophily couplings from non-iid data for joint feature selection and noise-resilient outlier detection IJCAI 2017 [#Pang2017Learning]_ `[PDF] `_ +================================================================================================================ ============================ ===== ============================ ========================================================================================================================================================================== + + +4.6. High-dimensional & Subspace Outliers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= +A survey on unsupervised outlier detection in high-dimensional numerical data Stat Anal Data Min 2012 [#Zimek2012A]_ `[HTML] `_ +Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection SIGKDD 2018 [#Pang2018Learning]_ `[PDF] `_ +Reverse Nearest Neighbors in Unsupervised Distance-Based Outlier Detection TKDE 2015 [#Radovanovic2015Reverse]_ `[PDF] `_, `[SLIDES] `_ +Outlier detection for high-dimensional data Biometrika 2015 [#Ro2015Outlier]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ======================================================================================================================================================================================================= + + +4.7. Outlier Ensembles +^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Outlier ensembles: position paper SIGKDD Explorations 2013 [#Aggarwal2013Outlier]_ `[PDF] `_ +Ensembles for unsupervised outlier detection: challenges and research questions a position paper SIGKDD Explorations 2014 [#Zimek2014Ensembles]_ `[PDF] `_ +An Unsupervised Boosting Strategy for Outlier Detection Ensembles PAKDD 2018 [#Campos2018An]_ `[HTML] `_ +LSCP: Locally selective combination in parallel outlier ensembles SDM 2019 [#Zhao2019LSCP]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + +4.8. Outlier Detection in Evolving Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +A Survey on Anomaly detection in Evolving Data: [with Application to Forest Fire Risk Prediction] SIGKDD Explorations 2018 [#Salehi2018A]_ `[PDF] `_ +Unsupervised real-time anomaly detection for streaming data Neurocomputing 2017 [#Ahmad2017Unsupervised]_ `[PDF] `_ +Outlier Detection in Feature-Evolving Data Streams SIGKDD 2018 [#Manzoor2018Outlier]_ `[PDF] `_, `[Github] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.9. Representation Learning in Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection SIGKDD 2018 [#Pang2018Learning]_ `[PDF] `_ +Learning representations for outlier detection on a budget Preprint 2015 [#Micenkova2015Learning]_ `[PDF] `_ +XGBOD: improving supervised outlier detection with unsupervised representation learning IJCNN 2018 [#Zhao2018Xgbod]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.10. Interpretability +^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Explaining Anomalies in Groups with Characterizing Subspace Rules DMKD 2018 [#Macha2018Explaining]_ `[PDF] `_ +Beyond Outlier Detection: LookOut for Pictorial Explanation ECML-PKDD 2018 [#Gupta2018Beyond]_ `[PDF] `_ +Contextual outlier interpretation IJCAI 2018 [#Liu2018Contextual]_ `[PDF] `_ +Mining multidimensional contextual outliers from categorical relational data IDA 2015 [#Tang2015Mining]_ `[PDF] `_ +Discriminative features for identifying and interpreting outliers ICDE 2014 [#Dang2014Discriminative]_ `[PDF] `_ +Sequential Feature Explanations for Anomaly Detection TKDD 2019 [#Siddiqui2019Sequential]_ `[HTML] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.11. Outlier Detection with Neural Networks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding KDD 2018 [#Hundman2018Detecting]_ `[PDF] `_, `[Code] `_ +MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks Preprint 2019 [#Li2019MAD]_ `[PDF] `_, `[Code] `_ +Generative Adversarial Active Learning for Unsupervised Outlier Detection TKDE 2019 [#Liu2019Generative]_ `[PDF] `_, `[Code] `_ +Deep Autoencoding Gaussian Mixture Model for Unsupervised Anomaly Detection ICLR 2018 [#Zong2018Deep]_ `[PDF] `_, `[Code] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.12. Active Anomaly Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== +Active learning for anomaly and rare-category detection NeurIPS 2005 [#Pelleg2005Active]_ `[PDF] `_ +Outlier detection by active learning SIGKDD 2006 [#Abe2006Outlier]_ `[PDF] `_ +Active Anomaly Detection via Ensembles: Insights, Algorithms, and Interpretability Preprint 2019 [#Das2019Active]_ `[PDF] `_ +================================================================================================== ============================ ===== ============================ ========================================================================================================================================================================== + + +4.13. Interactive Outlier Detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Paper Title Venue Year Ref Materials +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Learning On-the-Job to Re-rank Anomalies from Top-1 Feedback SDM 2019 [#Lamba2019Learning]_ `[PDF] `_ +Interactive anomaly detection on attributed networks WSDM 2019 [#Ding2019Interactive]_ `[PDF] `_ +eX2: a framework for interactive anomaly detection IUI Workshop 2019 [#Arnaldo2019ex2]_ `[PDF] `_ +Tripartite Active Learning for Interactive Anomaly Discovery IEEE Access 2019 [#Zhu2019Tripartite]_ `[PDF] `_ +================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.14. Outlier Detection in Other fields +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Field Paper Title Venue Year Ref Materials +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +**Text** Outlier detection for text data SDM 2017 [#Kannan2017Outlier]_ `[PDF] `_ +============== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +4.15. Outlier Detection Applications +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +=================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +Field Paper Title Venue Year Ref Materials +=================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== +**Security** A survey of distance and similarity measures used within network intrusion anomaly detection IEEE Commun. Surv. Tutor. 2015 [#WellerFahy2015A]_ `[PDF] `_ +**Security** Anomaly-based network intrusion detection: Techniques, systems and challenges Computers & Security 2009 [#GarciaTeodoro2009Anomaly]_ `[PDF] `_ +**Finance** A survey of anomaly detection techniques in financial domain Future Gener Comput Syst 2016 [#Ahmed2016A]_ `[PDF] `_ +**Traffic** Outlier Detection in Urban Traffic Data WIMS 2018 [#Djenouri2018Outlier]_ `[PDF] `_ +**Social Media** A survey on social media anomaly detection SIGKDD Explorations 2016 [#Yu2016A]_ `[PDF] `_ +**Social Media** GLAD: group anomaly detection in social media analysis TKDD 2015 [#Yu2015Glad]_ `[PDF] `_ +**Machine Failure** Detecting the Onset of Machine Failure Using Anomaly Detection Methods DAWAK 2019 [#Riazi2019Detecting]_ `[PDF] `_ +=================== ================================================================================================= ============================ ===== ============================ ========================================================================================================================================================================== + + +---- + +5. Key Conferences/Workshops/Journals +------------------------------------- + +5.1. Conferences & Workshops +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Key data mining conference **deadlines**, **historical acceptance rates**, and more +can be found `data-mining-conferences `_. + + +`ACM International Conference on Knowledge Discovery and Data Mining (SIGKDD) `_. **Note**: SIGKDD usually has an Outlier Detection Workshop (ODD), see `ODD 2018 `_. + +`ACM International Conference on Management of Data (SIGMOD) `_ + +`The Web Conference (WWW) `_ + +`IEEE International Conference on Data Mining (ICDM) `_ + +`SIAM International Conference on Data Mining (SDM) `_ + +`IEEE International Conference on Data Engineering (ICDE) `_ + +`ACM InternationalConference on Information and Knowledge Management (CIKM) `_ + +`ACM International Conference on Web Search and Data Mining (WSDM) `_ + +`The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD) `_ + +`The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD) `_ + +5.2. Journals +^^^^^^^^^^^^^ + +`ACM Transactions on Knowledge Discovery from Data (TKDD) `_ + +`IEEE Transactions on Knowledge and Data Engineering (TKDE) `_ + +`ACM SIGKDD Explorations Newsletter `_ + +`Data Mining and Knowledge Discovery `_ + +`Knowledge and Information Systems (KAIS) `_ + +---- + +References +---------- + +.. [#Abe2006Outlier] Abe, N., Zadrozny, B. and Langford, J., 2006, August. Outlier detection by active learning. In *Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining*, pp. 504-509, ACM. + +.. [#Aggarwal2013Outlier] Aggarwal, C.C., 2013. Outlier ensembles: position paper. *ACM SIGKDD Explorations Newsletter*\ , 14(2), pp.49-58. + +.. [#Ahmed2016A] Ahmed, M., Mahmood, A.N. and Islam, M.R., 2016. A survey of anomaly detection techniques in financial domain. *Future Generation Computer Systems*\ , 55, pp.278-288. + +.. [#Ahmad2017Unsupervised] Ahmad, S., Lavin, A., Purdy, S. and Agha, Z., 2017. Unsupervised real-time anomaly detection for streaming data. *Neurocomputing*, 262, pp.134-147. + +.. [#Akoglu2015Graph] Akoglu, L., Tong, H. and Koutra, D., 2015. Graph based anomaly detection and description: a survey. *Data Mining and Knowledge Discovery*\ , 29(3), pp.626-688. + +.. [#Angiulli2002Fast] Angiulli, F. and Pizzuti, C., 2002, August. Fast outlier detection in high dimensional spaces. In *European Conference on Principles of Data Mining and Knowledge Discovery*, pp. 15-27. + +.. [#Arnaldo2019ex2] Arnaldo, I., Veeramachaneni, K. and Lam, M., 2019. ex2: a framework for interactive anomaly detection. In *ACM IUI Workshop on Exploratory Search and Interactive Data Analytics (ESIDA)*. + +.. [#Breunig2000LOF] Breunig, M.M., Kriegel, H.P., Ng, R.T. and Sander, J., 2000, May. LOF: identifying density-based local outliers. *ACM SIGMOD Record*\ , 29(2), pp. 93-104. + +.. [#Campos2016On] Campos, G.O., Zimek, A., Sander, J., Campello, R.J., Micenková, B., Schubert, E., Assent, I. and Houle, M.E., 2016. On the evaluation of unsupervised outlier detection: measures, datasets, and an empirical study. *Data Mining and Knowledge Discovery*\ , 30(4), pp.891-927. + +.. [#Campos2018An] Campos, G.O., Zimek, A. and Meira, W., 2018, June. An Unsupervised Boosting Strategy for Outlier Detection Ensembles. In *Pacific-Asia Conference on Knowledge Discovery and Data Mining (pp. 564-576)*. Springer, Cham. + +.. [#Chandola2009Anomaly] Chandola, V., Banerjee, A. and Kumar, V., 2009. Anomaly detection: A survey. *ACM computing surveys* , 41(3), p.15. + +.. [#Chawla2011Anomaly] Chawla, S. and Chandola, V., 2011, Anomaly Detection: A Tutorial. *Tutorial at ICDM 2011*. + +.. [#Chen2017Outlier] Chen, J., Sathe, S., Aggarwal, C. and Turaga, D., 2017, June. Outlier detection with autoencoder ensembles. *SIAM International Conference on Data Mining*, pp. 90-98. Society for Industrial and Applied Mathematics. + +.. [#Dang2014Discriminative] Dang, X.H., Assent, I., Ng, R.T., Zimek, A. and Schubert, E., 2014, March. Discriminative features for identifying and interpreting outliers. In *International Conference on Data Engineering (ICDE)*. IEEE. + +.. [#Das2019Active] Das, S., Islam, M.R., Jayakodi, N.K. and Doppa, J.R., 2019. Active Anomaly Detection via Ensembles: Insights, Algorithms, and Interpretability. arXiv preprint arXiv:1901.08930. + +.. [#Ding2019Interactive] Ding, K., Li, J. and Liu, H., 2019, January. Interactive anomaly detection on attributed networks. In *Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining*, pp. 357-365. ACM. + +.. [#Djenouri2018Outlier] Djenouri, Y. and Zimek, A., 2018, June. Outlier detection in urban traffic data. In *Proceedings of the 8th International Conference on Web Intelligence, Mining and Semantics*. ACM. + +.. [#Domingues2018A] Domingues, R., Filippone, M., Michiardi, P. and Zouaoui, J., 2018. A comparative evaluation of outlier detection algorithms: Experiments and analyses. *Pattern Recognition*, 74, pp.406-421. + +.. [#Emmott2015A] Emmott, A., Das, S., Dietterich, T., Fern, A. and Wong, W.K., 2015. A meta-analysis of the anomaly detection problem. arXiv preprint arXiv:1503.01158. + +.. [#Falcao2019Quantitative] Falcão, F., Zoppi, T., Silva, C.B.V., Santos, A., Fonseca, B., Ceccarelli, A. and Bondavalli, A., 2019, April. Quantitative comparison of unsupervised anomaly detection algorithms for intrusion detection. In *Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing*, (pp. 318-327). ACM. + +.. [#GarciaTeodoro2009Anomaly] Garcia-Teodoro, P., Diaz-Verdejo, J., Maciá-Fernández, G. and Vázquez, E., 2009. Anomaly-based network intrusion detection: Techniques, systems and challenges. *Computers & Security*\ , 28(1-2), pp.18-28. + +.. [#Goldstein2016A] Goldstein, M. and Uchida, S., 2016. A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data. *PloS one*\ , 11(4), p.e0152173. + +.. [#Gupta2014Outlier] Gupta, M., Gao, J., Aggarwal, C.C. and Han, J., 2014. Outlier detection for temporal data: A survey. *IEEE Transactions on Knowledge and Data Engineering*\ , 26(9), pp.2250-2267. + +.. [#Hodge2004A] Hodge, V. and Austin, J., 2004. A survey of outlier detection methodologies. *Artificial intelligence review*\ , 22(2), pp.85-126. + +.. [#Hundman2018Detecting] Hundman, K., Constantinou, V., Laporte, C., Colwell, I. and Soderstrom, T., 2018, July. Detecting spacecraft anomalies using lstms and nonparametric dynamic thresholding. In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, (pp. 387-395). ACM. + +.. [#Kannan2017Outlier] Kannan, R., Woo, H., Aggarwal, C.C. and Park, H., 2017, June. Outlier detection for text data. In *Proceedings of the 2017 SIAM International Conference on Data Mining*, pp. 489-497. Society for Industrial and Applied Mathematics. + +.. [#Kriegel2010Outlier] Kriegel, H.P., Kröger, P. and Zimek, A., 2010. Outlier detection techniques. *Tutorial at ACM SIGKDD 2010*. + +.. [#Lazarevic2008Data] Lazarevic, A., Banerjee, A., Chandola, V., Kumar, V. and Srivastava, J., 2008, September. Data mining for anomaly detection. *Tutorial at ECML PKDD 2008*. + +.. [#Lamba2019Learning] Lamba, H. and Akoglu, L., 2019, May. Learning On-the-Job to Re-rank Anomalies from Top-1 Feedback. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 612-620. Society for Industrial and Applied Mathematics. + +.. [#Li2019MAD] Li, D., Chen, D., Shi, L., Jin, B., Goh, J. and Ng, S.K., 2019. MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks. arXiv preprint arXiv:1901.04997. + +.. [#Liu2008Isolation] Liu, F.T., Ting, K.M. and Zhou, Z.H., 2008, December. Isolation forest. In *International Conference on Data Mining*\ , pp. 413-422. IEEE. + +.. [#Liu2018Contextual] Liu, N., Shin, D. and Hu, X., 2017. Contextual outlier interpretation. In *International Joint Conference on Artificial Intelligence (IJCAI-18)*, pp.2461-2467. + +.. [#Liu2019Generative] Liu, Y., Li, Z., Zhou, C., Jiang, Y., Sun, J., Wang, M. and He, X., 2019. Generative Adversarial Active Learning for Unsupervised Outlier Detection. *IEEE transactions on knowledge and data engineering*. + +.. [#Macha2018Explaining] Macha, M. and Akoglu, L., 2018. Explaining anomalies in groups with characterizing subspace rules. Data Mining and Knowledge Discovery, 32(5), pp.1444-1480. + +.. [#Manzoor2018Outlier] Manzoor, E., Lamba, H. and Akoglu, L. Outlier Detection in Feature-Evolving Data Streams. In *24th ACM SIGKDD International Conference on Knowledge Discovery and Data mining (KDD)*. 2018. + +.. [#Micenkova2015Learning] Micenková, B., McWilliams, B. and Assent, I., 2015. Learning representations for outlier detection on a budget. arXiv preprint arXiv:1507.08104. + +.. [#Gupta2018Beyond] Gupta, N., Eswaran, D., Shah, N., Akoglu, L. and Faloutsos, C., Beyond Outlier Detection: LookOut for Pictorial Explanation. *ECML PKDD 2018*. + +.. [#Pang2016Unsupervised] Pang, G., Cao, L., Chen, L. and Liu, H., 2016, December. Unsupervised feature selection for outlier detection by modelling hierarchical value-feature couplings. In Data Mining (ICDM), 2016 IEEE 16th International Conference on (pp. 410-419). IEEE. + +.. [#Pang2017Learning] Pang, G., Cao, L., Chen, L. and Liu, H., 2017, August. Learning homophily couplings from non-iid data for joint feature selection and noise-resilient outlier detection. In Proceedings of the 26th International Joint Conference on Artificial Intelligence (pp. 2585-2591). AAAI Press. + +.. [#Pang2018Learning] Pang, G., Cao, L., Chen, L. and Liu, H., 2018. Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection. In *24th ACM SIGKDD International Conference on Knowledge Discovery and Data mining (KDD)*. 2018. + +.. [#Pelleg2005Active] Pelleg, D. and Moore, A.W., 2005. Active learning for anomaly and rare-category detection. In *Advances in neural information processing systems*\, pp. 1073-1080. + +.. [#Radovanovic2015Reverse] Radovanović, M., Nanopoulos, A. and Ivanović, M., 2015. Reverse nearest neighbors in unsupervised distance-based outlier detection. *IEEE transactions on knowledge and data engineering*, 27(5), pp.1369-1382. + +.. [#Ramaswamy2000Efficient] Ramaswamy, S., Rastogi, R. and Shim, K., 2000, May. Efficient algorithms for mining outliers from large data sets. *ACM SIGMOD Record*\ , 29(2), pp. 427-438. + +.. [#Ranshous2015Anomaly] Ranshous, S., Shen, S., Koutra, D., Harenberg, S., Faloutsos, C. and Samatova, N.F., 2015. Anomaly detection in dynamic networks: a survey. Wiley Interdisciplinary Reviews: Computational Statistics, 7(3), pp.223-247. + +.. [#Riazi2019Detecting] Riazi, M., Zaiane, O., Takeuchi, T., Maltais, A., Günther, J. and Lipsett, M., Detecting the Onset of Machine Failure Using Anomaly Detection Methods. + +.. [#Ro2015Outlier] Ro, K., Zou, C., Wang, Z. and Yin, G., 2015. Outlier detection for high-dimensional data. *Biometrika*, 102(3), pp.589-599. + +.. [#Salehi2018A] Salehi, Mahsa & Rashidi, Lida. (2018). A Survey on Anomaly detection in Evolving Data: [with Application to Forest Fire Risk Prediction]. *ACM SIGKDD Explorations Newsletter*. 20. 13-23. + +.. [#Scholkopf2001Estimating] Schölkopf, B., Platt, J.C., Shawe-Taylor, J., Smola, A.J. and Williamson, R.C., 2001. Estimating the support of a high-dimensional distribution. *Neural Computation*, 13(7), pp.1443-1471. + +.. [#Siddiqui2019Sequential] Siddiqui, M.A., Fern, A., Dietterich, T.G. and Wong, W.K., 2019. Sequential Feature Explanations for Anomaly Detection. *ACM Transactions on Knowledge Discovery from Data (TKDD)*, 13(1), p.1. + +.. [#Suri2019Research] Suri, N.R. and Athithan, G., 2019. Research Issues in Outlier Detection. In *Outlier Detection: Techniques and Applications*, pp. 29-51. Springer, Cham. + +.. [#Tang2015Mining] Tang, G., Pei, J., Bailey, J. and Dong, G., 2015. Mining multidimensional contextual outliers from categorical relational data. *Intelligent Data Analysis*, 19(5), pp.1171-1192. + +.. [#WellerFahy2015A] Weller-Fahy, D.J., Borghetti, B.J. and Sodemann, A.A., 2015. A survey of distance and similarity measures used within network intrusion anomaly detection. *IEEE Communications Surveys & Tutorials*\ , 17(1), pp.70-91. + +.. [#Yu2015Glad] Yu, R., He, X. and Liu, Y., 2015. GLAD: group anomaly detection in social media analysis. *ACM Transactions on Knowledge Discovery from Data (TKDD)*\ , 10(2), p.18. + +.. [#Yu2016A] Yu, R., Qiu, H., Wen, Z., Lin, C. and Liu, Y., 2016. A survey on social media anomaly detection. *ACM SIGKDD Explorations Newsletter*\ , 18(1), pp.1-14. + +.. [#Zhao2018Xgbod] Zhao, Y. and Hryniewicki, M.K., 2018, July. XGBOD: improving supervised outlier detection with unsupervised representation learning. In *2018 International Joint Conference on Neural Networks (IJCNN)*. IEEE. + +.. [#Zhao2019LSCP] Zhao, Y., Nasrullah, Z., Hryniewicki, M.K. and Li, Z., 2019, May. LSCP: Locally selective combination in parallel outlier ensembles. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 585-593. Society for Industrial and Applied Mathematics. + +.. [#Zhu2019Tripartite] Zhu, Y. and Yang, K., 2019. Tripartite Active Learning for Interactive Anomaly Discovery. *IEEE Access*. + +.. [#Zimek2012A] Zimek, A., Schubert, E. and Kriegel, H.P., 2012. A survey on unsupervised outlier detection in high‐dimensional numerical data. *Statistical Analysis and Data Mining: The ASA Data Science Journal*\ , 5(5), pp.363-387. + +.. [#Zimek2014Ensembles] Zimek, A., Campello, R.J. and Sander, J., 2014. Ensembles for unsupervised outlier detection: challenges and research questions a position paper. *ACM Sigkdd Explorations Newsletter*\ , 15(1), pp.11-22. + +.. [#Zong2018Deep] Zong, B., Song, Q., Min, M.R., Cheng, W., Lumezanu, C., Cho, D. and Chen, H., 2018. Deep autoencoding gaussian mixture model for unsupervised anomaly detection. International Conference on Learning Representations (ICLR). diff --git a/download.py b/download.py new file mode 100644 index 0000000..c1617ee --- /dev/null +++ b/download.py @@ -0,0 +1,32 @@ +#!/usr/bin/python + +""" + This script will download all papers/books and rename to proper name + if there is no copyright issue. + + TODO: download resources by item number + TODO: add exception handler for downloader +""" +import re +import pathlib +import urllib.request + +# initialize the log directory if it does not exist +pathlib.Path('resources').mkdir(parents=True, exist_ok=True) + +f = open('resource_urls\\papers.txt', 'r') +for line in f: + # print(line) + line_splits = line.split(' | ') + + # remove all special char in file name + file_name = re.sub(r'[\\/*?:"<>|]', "", line_splits[0]) + # strip filename length in case it is too long + if len(file_name) > 255: + file_name = file_name[:255] + url = line_splits[1] + + print('Downloading', file_name, 'from', url) + urllib.request.urlretrieve(url, "resources\\" + file_name + '.pdf') + +f.close() diff --git a/resource_urls/1 b/resource_urls/1 new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/resource_urls/1 @@ -0,0 +1 @@ + diff --git a/resource_urls/papers.txt b/resource_urls/papers.txt new file mode 100644 index 0000000..9036ab1 --- /dev/null +++ b/resource_urls/papers.txt @@ -0,0 +1,6 @@ +Anomaly detection: A survey | https://www.vs.inf.ethz.ch/edu/HS2011/CPS/papers/chandola09_anomaly-detection-survey.pdf +A survey of outlier detection methodologies | https://www-users.cs.york.ac.uk/vicky/myPapers/Hodge+Austin_OutlierDetection_AIRE381.pdf +A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data | http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0152173&type=printable +Outlier detection for temporal data: A survey | https://pdfs.semanticscholar.org/18d1/714870fb989f32b4311892e8765f00f7098f.pdf +Ensembles for unsupervised outlier detection: challenges and research questions a position paper | http://www.kdd.org/exploration_files/V15-01-02-Zimek.pdf +Outlier ensembles: position paper | https://pdfs.semanticscholar.org/841e/ce7c3812bbf799c99c84c064bbcf77916ba9.pdf \ No newline at end of file diff --git a/url_checker.py b/url_checker.py new file mode 100644 index 0000000..0945222 --- /dev/null +++ b/url_checker.py @@ -0,0 +1,47 @@ +import re +import requests + + +def exists(path): + """ Utility function to check whether a web file exists + :param path: + :return: + """ + r = requests.head(path) + # print(r.status_code) + return r.status_code == requests.codes.ok + + +def exists_adv(path): + """ Utility function to check whether a web file exists + :param path: + :return: + """ + # TODO: use selenium + r = requests.head(path) + # print(r.status_code) + return r.status_code == requests.codes.ok + + +if __name__ == "__main__": + # driver = webdriver.Firefox() + manual_links = [] + potential_broken_links = [] + with open('README_11232019.md', encoding="utf-8") as f: + lines = f.readlines() + + for line in lines: + result = re.search(']\(http(.*)\)', line) + if result: + link = "http" + result.group(1) + if "github" in link or "coursera" in link: + manual_links.append(link) + else: + flag = exists(link) + if not flag: + potential_broken_links.append(link) + print(link) + + print() + for link in manual_links: + print(link)