<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">ResProt</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Res Protoc</journal-id>
      <journal-title>JMIR Research Protocols</journal-title>
      <issn pub-type="epub">1929-0748</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v7i11e10734</article-id>
    <article-id pub-id-type="pmid">30404769</article-id>
    <article-id pub-id-type="doi">10.2196/10734</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Protocol</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Protocol</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Patterns of Patients’ Interactions With a Health Care Organization and Their Impacts on Health Quality Measurements: Protocol for a Retrospective Cohort Study</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Allem</surname>
          <given-names>Jon-Patrick</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Mavragani</surname>
          <given-names>Amaryllis</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Benis</surname>
        <given-names>Arriel</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>Faculty of Technology Management</institution>
        <institution>Holon Institute of Technology</institution>
        <addr-line>POB 305</addr-line>
        <addr-line>52 Golomb Street</addr-line>
        <addr-line>Holon, 5810201</addr-line>
        <country>Israel</country>
        <phone>972 523404890</phone>
        <email>arrielb@hit.ac.il</email>
      </address>  
      <xref rid="aff2" ref-type="aff">2</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9125-8300</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Harel</surname>
          <given-names>Nissim</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6633-890X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Barak Barkan</surname>
          <given-names>Refael</given-names>
        </name>
        <degrees>MD, PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7119-6317</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Srulovici</surname>
          <given-names>Einav</given-names>
        </name>
        <degrees>RN, MHA, PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1291-8284</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Key</surname>
          <given-names>Calanit</given-names>
        </name>
        <degrees>RN, MHA</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7561-1496</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <label>1</label>
    <institution>Faculty of Technology Management</institution>
    <institution>Holon Institute of Technology</institution>  
    <addr-line>Holon</addr-line>
    <country>Israel</country></aff>
    <aff id="aff2">
    <label>2</label>
    <institution>Clalit Research Institute</institution>
    <institution>Clalit Health Services</institution>  
    <addr-line>Tel-Aviv</addr-line>
    <country>Israel</country></aff>
    <aff id="aff3">
    <label>3</label>
    <institution>Department of Computer Sciences</institution>
    <institution>Faculty of Sciences</institution>  
    <institution>HIT - Holon Institute of Technology</institution>  
    <addr-line>Holon</addr-line>
    <country>Israel</country></aff>
    <aff id="aff4">
    <label>4</label>
    <institution>School of Nursing</institution>
    <institution>University of Haifa</institution>  
    <addr-line>Haifa</addr-line>
    <country>Israel</country></aff>
    <aff id="aff5">
    <label>5</label>
    <institution>Clalit Community Division</institution>
    <institution>Clalit Health Services</institution>  
    <addr-line>Tel-Aviv</addr-line>
    <country>Israel</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Arriel Benis 
      <email>arrielb@hit.ac.il</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>11</month><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>07</day>
      <month>11</month>
      <year>2018</year>
    </pub-date>
    <volume>7</volume>
    <issue>11</issue>
    <elocation-id>e10734</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>11</day>
        <month>4</month>
        <year>2018</year>
      </date>
      <date date-type="rev-request">
        <day>22</day>
        <month>6</month>
        <year>2018</year>
      </date>
      <date date-type="rev-recd">
        <day>14</day>
        <month>8</month>
        <year>2018</year>
      </date>
      <date date-type="accepted">
        <day>20</day>
        <month>8</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Arriel Benis, Nissim Harel, Refael Barak Barkan, Einav Srulovici, Calanit Key. Originally published in JMIR Research Protocols (http://www.researchprotocols.org), 07.11.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Research Protocols, is properly cited. The complete bibliographic information, a link to the original publication on http://www.researchprotocols.org, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.researchprotocols.org/2018/11/e10734/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Data collected by health care organizations consist of medical information and documentation of interactions with patients through different communication channels. This enables the health care organization to measure various features of its performance such as activity, efficiency, adherence to a treatment, and different quality indicators. This information can be linked to sociodemographic, clinical, and communication data with the health care providers and administrative teams. Analyzing all these measurements together may provide insights into the different types of patient behaviors or more accurately to the different types of interactions patients have with the health care organizations.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The primary aim of this study is to characterize usage profiles of the available communication channels with the health care organization. The main objective is to suggest new ways to encourage the usage of the most appropriate communication channel based on the patient’s profile. The first hypothesis is that the patient’s follow-up and clinical outcomes are influenced by the patient’s preferred communication channels with the health care organization. The second hypothesis is that the adoption of newly introduced communication channels between the patient and the health care organization is influenced by the patient’s sociodemographic or clinical profile. The third hypothesis is that the introduction of a new communication channel influences the usage of existing communication channels.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>All relevant data will be extracted from the Clalit Health Services data warehouse, the largest health care management organization in Israel. Data analysis process will use data mining approach as a process of discovering new knowledge and dealing with processing data extracted with statistical methods, machine learning algorithms, and information visualization tools. More specifically, we will mainly use the k-means clustering algorithm for discretization purposes and patients’ profile building, a hierarchical clustering algorithm, and heat maps for generating a visualization of the different communication profiles. In addition, patients’ interviews will be conducted to complement the information drawn from the data analysis phase with the aim of suggesting ways to optimize existing communication flows.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>The project was funded in 2016. Data analysis is currently under way and the results are expected to be submitted for publication in 2019. Identification of patient profiles will allow the health care organization to improve its accessibility to patients and their engagement, which in turn will achieve a better treatment adherence, quality of care, and patient experience.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>Defining solutions to increase patient accessibility to health care organization by matching the communication channels to the patient’s profile and to change the health care organization’s communication with the patient to a highly proactive one will increase the patient’s engagement according to his or her profile.</p>
      </sec>
      <sec sec-type="registered-report">
        <title>International Registered Report Identifier (IRRID)</title>
        <p>RR1-10.2196/10734</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>health communication</kwd>
      <kwd>population characteristics</kwd>
      <kwd>eHealth</kwd>
      <kwd>mHealth</kwd>
      <kwd>telehealth</kwd>
      <kwd>health information systems</kwd>
      <kwd>consumer health informatics</kwd>
      <kwd>delivery of health care</kwd>
      <kwd>machine learning</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Health care organizations and patients communicate with each other using various communication channels [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Some of these communication channels are traditional: face-to-face meetings with a physician or a nurse, face-to-face interactions with the administrative staff, and phone calls. However, in the past decade, many health care organizations introduced novel methods of digital communication with patients such as text messages, emails, video calls, websites, and mobile apps. The communication channels between the health care organization and its patients have been examined and analyzed in previous studies [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        <p>Data mining and machine learning methodologies have been used to define or redefine clusters of patients according to their state of health and other sociodemographic data [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Recently, process mining has been used to try to improve communication between consumers and health care providers [<xref ref-type="bibr" rid="ref13">13</xref>]. However, no studies attempting to cluster patients by combining medical, sociodemographic, or communication characteristics have been conducted and certainly not in a population as large as the one proposed in this study. We expect that such research will improve communication between patients, service providers, and medical organizations and will improve the quality of treatment and treatment effectiveness and responsiveness.</p>
      </sec>
      <sec>
        <title>Aims and Objectives</title>
        <p>Finding the circumstances and the extent to which different population segments use different communication channels, and specifically, the extent to which usage of newly introduced channels replaces the usage of more traditional channels will help us learn about the effectiveness of these new channels. Tying these population segments’ communication behavior with their sociodemographic profiles and health outcomes will help us establish the association between the 3, and it may help drive the hypotheses as to the causation. In addition, identifying communication-based population segments may help health care providers to use the most appropriate channels with each population segment, leading to more efficient and targeted communications, for example, identifying and quantifying the early adopters group will help the health care organization to estimate the usage level of a newly developed communication channel, its effectiveness in driving the intended message, and to some extent, its effect on health outcomes. Accordingly, this will also allow to improve the quality of treatment, treatment effectiveness, and responsiveness.</p>
        <p>The aims of this retrospective data study are to assist health care policy makers to improve and personalize the communication between patients and health care professionals (eg, physicians and nurses). Communication improvement includes enhancing the accessibility of health care professionals by expanding the capabilities of current communication channels and introducing new ones. These communications will help to improve patient engagement with the treatment process, increase patient responsiveness to follow-up requirements and treatment, and improve patient experience with health care services. More specifically, the primary aim of this study is to characterize usage profiles in the available communication channels in the Clalit Health Services (Clalit), each one of them without considering the others and then all of them together. The second aim is to establish relationships between communication profiles, sociodemographic, and medical patients’ profiles. The main objective is to suggest new ways to encourage the usage of the most appropriate communication channel based on the patient’s profile. A secondary objective is to suggest ways for improving communication between the patient and the health care organization mainly through technological means.</p>
      </sec>
      <sec>
        <title>Hypotheses</title>
        <p>The first hypothesis is that the patient’s follow-up and clinical outcomes are influenced by the patient’s preferred channel(s) of communication with the health care organization. If this hypothesis is validated, the research will quantify the phenomenon.</p>
        <p>The second hypothesis is that the adoption of newly introduced communication channels between the patient and the health care organization is influenced by the patient’s sociodemographic and/or clinical profile. If this hypothesis is validated, the research will identify sociodemographic and/or clinical attributes that affect the adoption of newly introduced communication channels.</p>
        <p>The third hypothesis is that the introduction of a new communication channel influences the usage of existing communication channels. If this hypothesis is validated, the research will characterize the changes in usage of existing communication channels once a new communication channel is introduced.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Materials</title>
        <p>This is a data-based study that analyzes information stored in Clalit electronic medical records (EMRs) and in logs documenting access to various communication channels between patients and Clalit, such as the internet personal health records, and telephone logs. Researchers have full access to Clalit EMRs and logs on the entire insured population of 4.53 million patients in 2015, which constitute 54% of the Israeli population of 8.38 million as of 2015. Data collected include demographic, clinical, and pharmacological information. In addition, we plan to conduct interviews with a representative sample of the patients to learn directly about the patients’ perceptions, their relationship with the various means of communication, patterns of use, and suggestions for improvement. We hope that this survey will provide supplementary information to the one we will receive from analyzing the data.</p>
        <p>Clinical data from community and hospital settings and pharmacological data are routinely collected in the data warehouses (DWHs) of the health maintenance organization (HMO) and classified into the appropriate data world (eg, appointment scheduling, consultation with a physician, appointment with a specialist, diagnosis during hospitalization, medical services, and prescriptions). The information recorded includes sociodemographic data (gender, marital status, number of children at home, age, origin, socioeconomic status (SES), and place of residence), medical information (dates of specialist appointment, physician license number and the corresponding specialization, diagnoses, date of each diagnosis, prescriptions, acquisition of prescriptions, laboratory results, and imaging), and communication data (appointment date, date the appointment occurred, time elapsed between the scheduled appointment and the actual appointment, and the way the appointment was scheduled—through a medical secretary, call center, website, or mobile app). All relevant pieces of information include a patient identifier, which allows compiling all data relevant to a specific patient into a single record.</p>
        <p>The information to be analyzed is extracted from the EHR DWH of Clalit and includes data collected between 2008 and 2016 for all relevant patients. The long duration of the study will allow us to identify changes in the ways patients interact with the HMO as a function of time and as a function of new communication channels the HMO introduced (eg, website, mobile apps, and the use of the short message service [SMS] text messaging). Accordingly, the patient can start or stop using 1 or more channels to interact with the HMO. The patients included in this study are aged 21 years and over and are members of Clalit for at least 1 year before 2008 and are still alive in 2016. We will focus our study on patients with chronic disease because we want to examine long-term adherence and efficacy. In addition, patients who suffer from 1 chronic disease or more have a high rate of resource consumption. In the United States, for example, 86% of health care spending is devoted to patients with chronic diseases [<xref ref-type="bibr" rid="ref14">14</xref>]. In particular, we will examine diabetic patients, who in 2001 accounted for about 20% of the patient population [<xref ref-type="bibr" rid="ref15">15</xref>]. We hope that the study will help optimize the processes in which these patients participate. The incidence of chronic diseases in general and of diabetes in particular is increasing over the years due to several factors, most notably the aging of the Israeli population. According to Clalit data, as of the end of 2014, more than 40% of the insured population had at least 1 diagnosis that is defined as chronic (eg, diabetes, asthma, heart disease, mental illness, and cancer). Patients with diabetes constitute more than 300,000 individuals with our inclusion criteria [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. The profiles that will be found will help define the recommendations and policies that will improve communication with specific subpopulation groups and will increase the effectiveness of treatment and patient adherence. Chronic diseases are not spread uniformly by age; however, given the high cost of treating patients with chronic diseases, we believe it is more useful to concentrate on these patients despite this bias.</p>
      </sec>
      <sec>
        <title>Ethics</title>
        <p>Ethical approval for the study was granted by the Clalit ethical committee (147-15-COM2; January 26, 2016).</p>
      </sec>
      <sec>
        <title>Methodologies</title>
        <p>The communication between health care providers (ie, physicians, nurses, hospitals, and more globally, HMOs) and patients is studied by focusing, generally, only on 1 or 2 of the channels [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. To fulfill our research aims and objectives, our analysis will consist of characterizing the usage profiles of existing nontechnological and technological communication channels over a period of 9 years, taking into account that Clalit has added and changed over the time the methods by which patients contact health care professionals (eg, the introduction of Web and mobile apps). Then, the sociodemographic and clinical profiles of each one of the different communication channels’ usage profiles will be defined. This will allow us to qualitatively evaluate the influence of the communication profile on patient’s engagement and follow-up quality.</p>
        <p>As part of the analysis, we will evaluate impacts of new communication channels introduced over the research period. This will allow us to suggest future improvements to the communication between the patient and physician or nurse, with the aim of improving the work processes of the health organization.</p>
        <p>This research is based on knowledge discovery in databases (KDD) methodologies [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. KDD is an interdisciplinary discipline that deals with methodologies for the extraction and identification of valid, new, nontrivial patterns of data that have the potential to be useful and understandable [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. The continued increase in the amounts of data available, a product of the unprecedented development of computer and communications technologies over the past two decades, created a unique opportunity to implement KDD methodologies. Data science experts from different disciplines are therefore challenged to find new and effective ways to extract and generate new knowledge from existing data.</p>
        <p>In the analysis phase, we will use one-dimensional and multidimensional statistical methods as well as different data mining algorithms. The data mining stage is part of the KDD process and focuses mainly on the discovery of unknown patterns. For this purpose, we will use and tune, if necessary, data mining [<xref ref-type="bibr" rid="ref21">21</xref>] and machine learning [<xref ref-type="bibr" rid="ref22">22</xref>] algorithms for dealing with the multidimensional dataset (ie, sociodemographics, bio-clinical, and communication-related data over time), which will be explored in this study. The patterns found in this stage are then evaluated and interpreted to form the knowledge extracted from the KDD process.</p>
        <p>The KDD process that will be developed and implemented in this research includes data collection and integration, early processing and cleaning of data, development and implementation of data mining algorithms to discover new knowledge and a qualitative research [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <sec>
          <title>Data Acquisition</title>
          <p>Clalit DWH is the main source of information the research uses, and a replication for research purposes is updated on a weekly basis. The data extracted from Clalit DWH for each patient comprise the following information:</p>
          <list list-type="order">
            <list-item>
              <p>Sociodemographic data</p>
              <list list-type="bullet">
                <list-item>
                  <p>Date and country of birth and date of immigration when relevant</p>
                </list-item>
                <list-item>
                  <p>Date of death (allowing exclusion)</p>
                </list-item>
                <list-item>
                  <p>Start and end date of membership (allowing exclusion)</p>
                </list-item>
                <list-item>
                  <p>Gender</p>
                </list-item>
                <list-item>
                  <p>Ethnic sector (general Jewish, Arab, and ultra-orthodox Jewish)—the ethnic sector is determined according to the clinic at which the member receives primary care medicine. It is computed by the Clalit computer services unit by integrating geostatistical data from the Israeli Central Bureau of Statistics</p>
                </list-item>
                <list-item>
                  <p>Clinic-level SES (3 categories: low, mid, and high)—the SES is determined according to the clinic at which the member receives primary care medicine. It is computed by the Clalit computer services unit by integrating geostatistical data from the Israeli Central Bureau of Statistics</p>
                </list-item>
              </list>
            </list-item>
            <list-item>
              <p>Bio-clinical</p>
              <list list-type="bullet">
                <list-item>
                  <p>Body mass index (BMI) category (underweight, normal, overweight, obese, or unknown) [<xref ref-type="bibr" rid="ref23">23</xref>]</p>
                </list-item>
                <list-item>
                  <p>Smoking status (current, past, never, or unknown)</p>
                </list-item>
                <list-item>
                  <p>Last available glycated hemoglobin (HbA<sub>1c</sub>) measurement reflecting the level of blood sugar control in patient with diabetes</p>
                </list-item>
                <list-item>
                  <p>Last available lipidemic profiling (high-density lipoprotein, low-density lipoprotein, triglycerides, and total cholesterol)</p>
                </list-item>
                <list-item>
                  <p>Adjusted clinical groups (ACG) [<xref ref-type="bibr" rid="ref24">24</xref>]</p>
                </list-item>
                <list-item>
                  <p>Comorbidities according to the Clalit chronic diseases registry [<xref ref-type="bibr" rid="ref15">15</xref>]</p>
                </list-item>
                <list-item>
                  <p>Proportion of days covered by treatment of diabetes when relevant based on purchase of drugs used in diabetes and more particularly by blood glucose lowering drugs excluding insulin (Anatomical Therapeutic Chemical Classification System codes starting with A10B) [<xref ref-type="bibr" rid="ref25">25</xref>]</p>
                </list-item>
              </list>
            </list-item>
            <list-item>
              <p>Communication or contacts with the HMO data</p>
              <list list-type="bullet">
                <list-item>
                  <p>Appointments scheduling (through a medical secretary—data available since 2009, call center—data available since 2009, website—since 2011, or mobile app—since 2012)</p>
                </list-item>
                <list-item>
                  <p>Consultations with a physician or a nurse</p>
                </list-item>
                <list-item>
                  <p>Hospitalizations</p>
                </list-item>
                <list-item>
                  <p>Consultations at an emergency department</p>
                </list-item>
                <list-item>
                  <p>Nonqueue requests (eg, request for periodic checks, prescription renewal, and sick leave certificate) done without visiting but only by sending a request to a physician through a call to a medical secretary or a nurse or by completing a paper or an electronic form</p>
                </list-item>
                <list-item>
                  <p>Any purchases in a pharmacy of the HMO or purchase related to a prescription in other pharmacies having an agreement with the HMO</p>
                </list-item>
                <list-item>
                  <p>Prescription renewals by SMS—since 2015.</p>
                </list-item>
              </list>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Data Preprocessing</title>
          <sec>
            <title>Data Cleansing</title>
            <p>After integrating the data collected and extracted from the Clalit’s DWH, we will prepare it for analysis. This stage includes cleansing of the data collected by Clalit’s DWH when necessary. The main objective of this phase is to reduce noise by detecting and removing or correcting outliers [<xref ref-type="bibr" rid="ref26">26</xref>] in the dataset by evaluating the quality of the data [<xref ref-type="bibr" rid="ref21">21</xref>]. An outlier is a data measurement that is inconsistent with other historical measurement data of the same individual (eg, outlaying height value, an exceptionally high number of consultations with a physician—a few hundred per year-). When a measurement-specific (eg, BMI) algorithm has been developed in-house by Clalit Research Institute (CRI) for epidemiological studies, outlier detection and data correction will be processed using it. For example, an algorithm screens data on BMI, weight, and height, to detect and handle outliers in the recording of 1 of these 3 measurements (eg, due to mistyping). When the CRI algorithms will not be relevant, outliers will be detected with statistical approaches such as median absolute deviation to find outliers (nonparametric due to lack of knowledge regarding the data distribution [<xref ref-type="bibr" rid="ref27">27</xref>] and/or machine learning algorithms such as k-means [<xref ref-type="bibr" rid="ref28">28</xref>]).</p>
            <p>Data related to communication between patient and Clalit have not yet been fully processed and cleansed before, and accordingly, we may need to develop special cleaning and correction algorithms for these data. If data correction algorithms and/or algorithms that deal with cases of missing information do not exist for any given data in our database [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>], we will use appropriate machine learning algorithms and/or statistical approaches [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] to correct and/or deal with missing data where needed. Examples of potential problems that we might encounter are identifying irrelevant entries (eg, entries related to quality assurance traffic and testing and entries that are not the result of human activity) and lack of full documentation. In addition, interface exposed to the user is a <italic>breathing</italic> interface and changes over time depending on the services that the HMO chooses to provide through the Web-based and app services. A new version of the website, for example, is released every 6 months. Data processing and analysis should reflect these changes.</p>
          </sec>
          <sec>
            <title>Data Transformation</title>
            <p>Many methods of machine learning and data mining require, as part of the preprocessing phase, a data reformulation such as a new categorization or a new grouping of numerical, categorical, or textual data to reduce the number of values each attribute has [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
            <p>This step involves the use of techniques for reducing the number of dimensions or transduction methods to reduce the number of variables for analysis or to find invariant representations of the data [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>].</p>
            <p>For example, if we consider attributes with continuous values such as laboratory tests or clinical measurement having existing and defined scales in the literature, we will reformulate them into categorical values as a part of the dataset dimension reduction. For example, HbA<sub>1c</sub> values may be divided into 5 categories: excellent control (&#60;6.5%), good control (6.5% to 7.5%), moderate control (7.5% to 8.9%), poor control (<bold>≥</bold>9%), and not available [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p>
            <p>However, for attributes that do not have predefined scales in the literature or which are specific to Clalit, such as the number of appointments by using the HMO website or the number of visits to a physician per year, we will use the k-means clustering algorithm for discretization purposes in 6 groups of resource consumption: “No” (meaning not consuming of the related resource, so excluded from the k-means run and assigned to this group), “Small,” “Small-Moderate,” “Moderate,” “Moderate-Large,” and “Large.” The cluster bounds are validated, if necessary, by a domain expert (ie, a public health practitioner having some experience with the Clalit data).</p>
          </sec>
        </sec>
        <sec>
          <title>Data Mining</title>
          <p>For identifying population clusters, different machine learning methods and algorithms must be used. The main aim is to characterize usage profiles in the available communication channels. Considering the fact that we do not have prior knowledge on the data, we will use unsupervised machine learning algorithms [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref43">43</xref>] and will more particularly focus on k-means [<xref ref-type="bibr" rid="ref38">38</xref>] and hierarchical clustering [<xref ref-type="bibr" rid="ref37">37</xref>]. We choose to use these specific algorithms because they are relatively simple to communicate with people having less technical knowledge, such as decision and policy makers of the HMO, which will get the final analysis report and will need to implement its recommendations.</p>
          <p>The first data mining goal is to find the number of hidden k clusters in the “Communication/contacts with the HMO data” or in other words, the number of different types of patient communication profiles. This will be performed on the available data of the year 2016 because by that time, data cleansing will be fully performed. As communication channels constantly evolve, we chose the most recent year to be the reference point to which previous years, with less communication channels, are compared with. The “Communication/contacts with the HMO data” of 2016 will be clustered as follows:</p>
          <list list-type="order">
            <list-item>
              <p>For each k between 2 and 100, 100 randomly selected samples of 20% of the cohort will be generated</p>
            </list-item>
            <list-item>
              <p>For each sample, k-means will be run</p>
            </list-item>
            <list-item>
              <p>For each run, the Ray-Turi criterion [<xref ref-type="bibr" rid="ref44">44</xref>] will be computed</p>
            </list-item>
            <list-item>
              <p>The results of the overall Ray-Turi criterion computation will be plotted on a graph</p>
            </list-item>
            <list-item>
              <p>The elbow will be manually defined on the previously built plot for finding the relevant k.</p>
            </list-item>
          </list>
          <p>Each cluster relates to a type of patient communication. This step allows reducing the patient communication profiles from the number of patients included into the cohort (more than 300,000 if we consider patients with diabetes) to a small one (at most less than a few dozen).</p>
          <p>The second data mining goal is to generate a hierarchical clustering of the previously discovered clusters to allow understanding the similarities and dissimilarities between the communication patterns.</p>
          <p>Descriptive statistics of sociodemographic, bio-medical, and communication data will be generated for each cluster.</p>
          <p>On the basis of the previously built k clusters of “Communication/contacts with the HMO data” of 2016 and the related hierarchical clustering, we will generate descriptive statistics for each patient communication profiles (ie, cluster or set of patients) over the years (2008-2015).</p>
        </sec>
        <sec>
          <title>Information Visualization</title>
          <p>To provide user-friendly tools to decision and policy makers [<xref ref-type="bibr" rid="ref45">45</xref>], allowing them to understand the different patient communication profiles and the strengths and weaknesses of each one, we will build heat maps for each year between 2008 and 2016 based on the previously generated hierarchical clustering of 2016 data.</p>
        </sec>
        <sec>
          <title>Process Mining</title>
          <p>Furthermore, we plan to implement algorithms and approaches from the field of process mining [<xref ref-type="bibr" rid="ref46">46</xref>] to identify the changes in communication profiles over time, which may be the cause of treatment adherence changes. For example, process mining will allow us to model how patients with a similar communication profile (ie, patients within the same cluster) have changed their communication patterns with the HMO using the following channels:</p>
          <list list-type="order">
            <list-item>
              <p>Consulting with physicians and/or nurses</p>
            </list-item>
            <list-item>
              <p>Scheduling appointments by using 1 or more of the following channels: through a medical secretary—data available since 2009, call center—data available since 2009, website—since 2011, or mobile app—since 2012</p>
            </list-item>
            <list-item>
              <p>Overall interaction with the HMO (using the overall services).</p>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Qualitative Research</title>
          <p>Qualitative research of focus groups is the most effective means to fully understand factors that encourage or delay the use of communication interfaces with the health care organization. Focus groups enable the collection of information from a multicultural population [<xref ref-type="bibr" rid="ref47">47</xref>] and discussion of new ideas that do not arise during personal interviews [<xref ref-type="bibr" rid="ref48">48</xref>]. We designed the qualitative part of the proposed study based on the guidelines presented by King et al [<xref ref-type="bibr" rid="ref49">49</xref>]. The qualitative part of the research will include between 1 and 8 focus groups depending on their usage level of the communications channels with Clalit. Each one of the focus groups will include up to 8 patients from the same area. Participants in the focus groups will be asked to complete a short sociodemographic questionnaire and sign an informed consent form. During the focus group meeting, the group facilitator will record the discussion and make important notes related to the participants’ nonverbal communication.</p>
          <p>A guideline questionnaire for the focus groups will be constructed with the assistance of experts in the field and relevant literature. This questionnaire will evaluate factors that encourage or delay the use of communication channels with Clalit. The guiding questionnaire will include up to 10 open questions that will facilitate responses providing critical information, for example, “What factors contribute or will contribute to your use of the communication channel X?”; “What factors delay or will delay your usage of communication channel X?”; or “How do you think that communication channel X can be improved?”. The guiding questionnaire will be used to explore aspects that are relevant for better understanding the topic and will facilitate expanding the discussion to areas that the participants consider to be most significant.</p>
          <p>The discussions in the focus groups will be recorded and transcribed. The transcripts of the focus group discussion will be analyzed in a phenomenological approach that emphasizes the patient’s unique and subjective perception through qualitative content analysis [<xref ref-type="bibr" rid="ref50">50</xref>]. The coding process will begin with open coding (ie, identification of major categories), following by axial coding that results from 1 core phenomenon. Next, the data will be categorized according to this core phenomenon [<xref ref-type="bibr" rid="ref51">51</xref>] and will be reviewed by external domain experts to ensure objectivity [<xref ref-type="bibr" rid="ref49">49</xref>]. Sandelowski [<xref ref-type="bibr" rid="ref52">52</xref>] notes that through qualitative content analysis, researchers can add new information to the existing one and gain new insights. The encoding and analysis will be performed by the principal investigators and the associate investigators, with the same encoding rules for guaranteeing homogeneous and consistent encoding [<xref ref-type="bibr" rid="ref49">49</xref>]. In cases of disagreement regarding the encoding, an expanded forum will be held in which the majority decision prevails.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>This project was funded in 2016, and the research project is scheduled to be completed in 2019.</p>
      <p>A preliminary analysis has been performed on the data of the year 2015 related to 309,460 patients with diabetes in 2015, aged 32 years and above, having the disease treated by Clalit for more than 7 years. Overall, 7 main communication patterns have been discovered.</p>
      <p>The first cluster is of patients with relatively low contacts with the HMO in comparison with the rest of the population. Patients in these 2 groups tend to be relatively young (median age: 64 years) and less morbid (ACG between 3 and 4). Although patients in the first group tend to have a poor follow-up quality, 21.21% (18,779/88,524) of the patients were missing BMI measurement and 23.09% (20,436/88,524) were missing their HbA<sub>1c</sub> measurement in 2015; patients in the second cluster have an average follow-up quality: only 7.72% (6228/80,714) of the patients did not perform a BMI measurement and only 10.56% (8527/80,714) did not perform a HbA<sub>1c</sub> measurement. A possible explanation for this difference may be related to the tendency of the patients in the second group to resort mainly to human contact (face-to-face or by phone).</p>
      <p>The next 2 clusters are of early adopters of technology. These diabetic patients interacted in 2015 with Clalit mainly through new digital platforms: the website (first group) or the mobile app (the second group). These patients also tend to use lesser medical services compared with the rest of the population, and their follow-up quality was better than the rest of the population: only 4.64% (1212/26,098) and 6.10% (1593/26,098) of the first group did not perform BMI and HbA<sub>1c</sub> tests in 2015, respectively, whereas 5.05% (603/11,945) and 6.93% (826/11,945) of the second group did not perform BMI and HbA<sub>1c</sub> tests in 2015, respectively.</p>
      <p>The patients included in the fifth cluster are mainly using nursing services. They also tend not to schedule appointments. This subpopulation has a low SES (40.79%, 14,531/35,624). However, the follow-up of these patients is quite good (with 3.17% [1128/35,624] and 6.05% [2155/35,624] of these patients missing their BMI and HbA<sub>1c</sub> measurements, respectively). This is a clear effect of the nursing personnel involvement.</p>
      <p>Patients in the last 2 clusters tend to be older than the rest of the patient population (aged more than 70 years) and with relatively high morbidity (ACG=5). Patients in the sixth cluster tend to be consumers of medical services that involve access to a human being, whereas patients in the seventh cluster tend to be heavy users of all medical services. They also tend to have one of the best follow-up rates: only 1.64% (825/38,070) and 4.38% (1668/38,070) of the patients in the sixth cluster have missed their BMI and HbA<sub>1c</sub> measurements, respectively, in 2015, whereas only 4.22% (1203/28,485) and 6.40% (1822/28,485) of the patients in the seventh cluster have missed their BMI and HbA<sub>1c</sub> measurements, respectively, in 2015.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>This research protocol deals with the identification of patient communication profiles. This knowledge will help the health care organization to increase the accessibility of patients to the services the health care organization provides and to improve patients’ engagement with the treatment process. This, in turn, may motivate the patient to achieve a better treatment adherence, improve quality of care, and generate better patient experience.</p>
      </sec>
      <sec>
        <title>Expected Results and Future Directions</title>
        <p>Analysis of communication patterns over time may reveal long-term behavior patterns as well as identify patterns at a higher abstraction level (eg, early adopters of technology and early adopters of services). It should be noted that the research is planned to be performed on data from a period that witnessed a significant yet gradual change in the communication channels Clalit provides its patients. Analyzing the response of the patient population to these changes will hopefully help improve the available communication channels as well as assist in formulating realistic expectations from the introduction of new communication channels, taking into consideration also the sociodemographic characteristics and clinical constraints as well as their previous communication patterns with the HMO.</p>
        <p>By tuning its communication tools to patients’ preferences (eg, by translating the user interfaces of the electronic communications tools—website or apps—from Hebrew to other languages such as Arabic, English, Russian, Amharic, French, and Spanish), the health organization would (1) improve and increase accessibility to health care services, achieve better patient engagement and responsiveness to treatment, and improve quality of treatment and treatment experience within existing budgetary constraints and (2) increase patients’ engagement with the treatment process by transforming the communication scheme with each patient to a more proactive scheme, so as to better fit their profile.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>Clalit insured and provided medical services to approximately 4.53 million patients in 2015 and is the largest health care provider in Israel. The data available spans all treatment providers including hospitals’ end emergency units. Nevertheless, overall ethnic distribution of the Clalit population does not fully reflect the overall Israeli demographic composition. The Clalit members comprise, in comparison with the Israeli general population, (1) a higher proportion of Arabs and a lower proportion of ultra-orthodox members and (2) a higher proportion of members having a low SES.</p>
        <p>Another potential limitation is the decision to analyze only patients with diabetes. These patients may exhibit behaviors that are unique to this specific chronic disease and may not be shared by other chronic patients. Nevertheless, diabetes is 1 of the most common chronic diseases, with prevalence of approximately 7% within Clalit’s insured population.</p>
        <p>Finally, this research is conducted on data of Israeli patients. The structure of the Israeli health care system as well as Israeli culture and norms may affect patients’ behavior and may not apply to patients in other geographical locations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACG</term>
          <def>
            <p>adjusted clinical groups</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BMI</term>
          <def>
            <p>body mass index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Clalit</term>
          <def>
            <p>Clalit Health Services</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRI</term>
          <def>
            <p>Clalit Research Institute</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DWH</term>
          <def>
            <p>data warehouse</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">HMO</term>
          <def>
            <p>health maintenance organization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">HbA 
          <sub>1c</sub></term>
          <def>
            <p>glycated hemoglobin</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">KDD</term>
          <def>
            <p>knowledge discovery in databases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">SES</term>
          <def>
            <p>socioeconomic status</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SMS</term>
          <def>
            <p>short message service</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The research was supported by a grant from the Israel National Institute for Health Policy (#188-15).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Axén</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Bodin</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Bergström</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Halasz</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Lange</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Lövgren</surname>
            <given-names>PW</given-names>
          </name>
          <name name-style="western">
            <surname>Rosenbaum</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Leboeuf-Yde</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Jensen</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Clustering patients on the basis of their individual course of low back pain over a six month period</article-title>
        <source>BMC Musculoskelet Disord</source>  
        <year>2011</year>  
        <month>05</month>  
        <day>17</day>  
        <volume>12</volume>  
        <fpage>99</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmusculoskeletdisord.biomedcentral.com/articles/10.1186/1471-2474-12-99"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2474-12-99</pub-id>
        <pub-id pub-id-type="medline">21586117</pub-id>
        <pub-id pub-id-type="pii">1471-2474-12-99</pub-id>
        <pub-id pub-id-type="pmcid">PMC3125255</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rai</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Pye</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Baird</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Understanding determinants of consumer mobile health usage intentions, assimilation, and channel preferences</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <month>08</month>  
        <day>2</day>  
        <volume>15</volume>  
        <issue>8</issue>  
        <fpage>e149</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2013/8/e149/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2635</pub-id>
        <pub-id pub-id-type="medline">23912839</pub-id>
        <pub-id pub-id-type="pii">v15i8e149</pub-id>
        <pub-id pub-id-type="pmcid">PMC3742412</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hoffman</surname>
            <given-names>AS</given-names>
          </name>
          <name name-style="western">
            <surname>Volk</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Saarimaki</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Stirling</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>LC</given-names>
          </name>
          <name name-style="western">
            <surname>Härter</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kamath</surname>
            <given-names>GR</given-names>
          </name>
          <name name-style="western">
            <surname>Llewellyn-Thomas</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Delivering patient decision aids on the internet: definitions, theories, current evidence, and emerging research areas</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2013</year>  
        <volume>13 Suppl 2</volume>  
        <fpage>S13</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-13-S2-S13"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-13-S2-S13</pub-id>
        <pub-id pub-id-type="medline">24625064</pub-id>
        <pub-id pub-id-type="pii">1472-6947-13-S2-S13</pub-id>
        <pub-id pub-id-type="pmcid">PMC4043476</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Beck</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Richard</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen-Thanh</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Montagni</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Parizot</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Renahy</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Use of the internet as a health information resource among French young adults: results from a nationally representative survey</article-title>
        <source>J Med Internet Res</source>  
        <year>2014</year>  
        <month>05</month>  
        <day>13</day>  
        <volume>16</volume>  
        <issue>5</issue>  
        <fpage>e128</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2014/5/e128/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2934</pub-id>
        <pub-id pub-id-type="medline">24824164</pub-id>
        <pub-id pub-id-type="pii">v16i5e128</pub-id>
        <pub-id pub-id-type="pmcid">PMC4051740</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Moick</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Terlutter</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Physicians' motives for professional internet use and differences in attitudes toward the internet-informed patient, physician-patient communication, and prescribing behavior</article-title>
        <source>Med 2 0</source>  
        <year>2012</year>  
        <month>07</month>  
        <day>6</day>  
        <volume>1</volume>  
        <issue>2</issue>  
        <fpage>e2</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.medicine20.com/2012/2/e2"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/med20.1996</pub-id>
        <pub-id pub-id-type="medline">25075230</pub-id>
        <pub-id pub-id-type="pii">v1i2e2</pub-id>
        <pub-id pub-id-type="pmcid">PMC4084769</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kritz</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Gschwandtner</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Stefanov</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Hanbury</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Samwald</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Utilization and perceived problems of online medical resources and search tools among different groups of European physicians</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <month>06</month>  
        <day>26</day>  
        <volume>15</volume>  
        <issue>6</issue>  
        <fpage>e122</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2013/6/e122/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2436</pub-id>
        <pub-id pub-id-type="medline">23803299</pub-id>
        <pub-id pub-id-type="pii">v15i6e122</pub-id>
        <pub-id pub-id-type="pmcid">PMC3713956</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dugdale</surname>
            <given-names>DC</given-names>
          </name>
          <name name-style="western">
            <surname>Epstein</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Pantilat</surname>
            <given-names>SZ</given-names>
          </name>
        </person-group>
        <article-title>Time and the patient-physician relationship</article-title>
        <source>J Gen Intern Med</source>  
        <year>1999</year>  
        <month>01</month>  
        <volume>14 Suppl 1</volume>  
        <fpage>S34</fpage>  
        <lpage>40</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1496869/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1046/j.1525-1497.1999.00263.x</pub-id>
        <pub-id pub-id-type="medline">9933493</pub-id>
        <pub-id pub-id-type="pmcid">PMC1496869</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Weiner</surname>
            <given-names>JP</given-names>
          </name>
        </person-group>
        <article-title>Doctor-patient communication in the e-health era</article-title>
        <source>Isr J Health Policy Res</source>  
        <year>2012</year>  
        <month>08</month>  
        <day>28</day>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>33</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ijhpr.org/content/1/1/33"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/2045-4015-1-33</pub-id>
        <pub-id pub-id-type="medline">22929000</pub-id>
        <pub-id pub-id-type="pii">2045-4015-1-33</pub-id>
        <pub-id pub-id-type="pmcid">PMC3461429</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Peleg</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Avdalimov</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Freud</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Providing cell phone numbers and email addresses to patients: the physician's perspective</article-title>
        <source>BMC Res Notes</source>  
        <year>2011</year>  
        <month>03</month>  
        <day>23</day>  
        <volume>4</volume>  
        <fpage>76</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/1756-0500-4-76"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1756-0500-4-76</pub-id>
        <pub-id pub-id-type="medline">21426591</pub-id>
        <pub-id pub-id-type="pii">1756-0500-4-76</pub-id>
        <pub-id pub-id-type="pmcid">PMC3076270</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Peleg</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Nazarenko</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Providing cell phone numbers and e-mail addresses to patients: the patient's perspective, a cross sectional study</article-title>
        <source>Isr J Health Policy Res</source>  
        <year>2012</year>  
        <month>08</month>  
        <day>28</day>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>32</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://ijhpr.biomedcentral.com/articles/10.1186/2045-4015-1-32"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/2045-4015-1-32</pub-id>
        <pub-id pub-id-type="medline">22929801</pub-id>
        <pub-id pub-id-type="pii">2045-4015-1-32</pub-id>
        <pub-id pub-id-type="pmcid">PMC3441808</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Henao</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Murray</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ginsburg</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Carin</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Lucas</surname>
            <given-names>JE</given-names>
          </name>
        </person-group>
        <article-title>Patient clustering with uncoded text in electronic medical records</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2013</year>  
        <volume>2013</volume>  
        <fpage>592</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24551361"/>
        </comment>  
        <pub-id pub-id-type="medline">24551361</pub-id>
        <pub-id pub-id-type="pmcid">PMC3900202</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sewitch</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Leffondré</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Dobkin</surname>
            <given-names>PL</given-names>
          </name>
        </person-group>
        <article-title>Clustering patients according to health perceptions: relationships to psychosocial characteristics and medication nonadherence</article-title>
        <source>J Psychosom Res</source>  
        <year>2004</year>  
        <month>03</month>  
        <volume>56</volume>  
        <issue>3</issue>  
        <fpage>323</fpage>  
        <lpage>32</lpage>  
        <pub-id pub-id-type="doi">10.1016/S0022-3999(03)00508-7</pub-id>
        <pub-id pub-id-type="medline">15046970</pub-id>
        <pub-id pub-id-type="pii">S0022399903005087</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Webster</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>EHR business process management: from process mining to process improvement to process usability</article-title>
        <year>2012</year>  
        <month>02</month>  
        <day>20</day>  
        <conf-name>Healthcare Systems Process Improvement Conference</conf-name>
        <conf-date>February 20, 2012</conf-date>
        <conf-loc>Las Vegas, USA</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://wareflo.com/HSPI2012/ehr-bpm-process-mining-webster-2012-shs-conf.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gerteis</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Izrael</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Deitz</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>LeRoy</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Ricciardi</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Miller</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Basu</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Healthcare Utilization and Costs</article-title>
        <source>Multiple Chronic Conditions Chartbook</source>  
        <year>2014</year>  
        <month>04</month>  
        <publisher-loc>Rockville, MD</publisher-loc>
        <publisher-name>Agency for Healthcare Research and Quality</publisher-name>
        <fpage>7</fpage>  
        <lpage>14</lpage> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rennert</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Peterburg</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Prevalence of selected chronic diseases in Israel</article-title>
        <source>Isr Med Assoc J</source>  
        <year>2001</year>  
        <month>06</month>  
        <volume>3</volume>  
        <issue>6</issue>  
        <fpage>404</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ima.org.il/IMAJ/ViewArticle.aspx?year=2001&#38;month=06&#38;page=404"/>
        </comment>  
        <pub-id pub-id-type="medline">11433630</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Karpati</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Cohen-Stavi</surname>
            <given-names>CJ</given-names>
          </name>
          <name name-style="western">
            <surname>Leibowitz</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Hoshen</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Feldman</surname>
            <given-names>BS</given-names>
          </name>
          <name name-style="western">
            <surname>Balicer</surname>
            <given-names>RD</given-names>
          </name>
        </person-group>
        <article-title>Towards a subsiding diabetes epidemic: trends from a large population-based study in Israel</article-title>
        <source>Popul Health Metr</source>  
        <year>2014</year>  
        <month>10</month>  
        <day>30</day>  
        <volume>12</volume>  
        <issue>1</issue>  
        <fpage>32</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://pophealthmetrics.biomedcentral.com/articles/10.1186/s12963-014-0032-y"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12963-014-0032-y</pub-id>
        <pub-id pub-id-type="medline">25400512</pub-id>
        <pub-id pub-id-type="pii">32</pub-id>
        <pub-id pub-id-type="pmcid">PMC4233034</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web"> <source>Israel Center for Disease Control</source> <year>2017</year> <publisher-loc>Israel</publisher-loc> <publisher-name>Ministry of Health</publisher-name> <comment>Highlights of Health in Israel 2016<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.health.gov.il/publicationsfiles/highlights_of_health_in_israel2016.pdf">https://www.health.gov.il/publicationsfiles/highlights_of_health_in_israel2016.pdf</ext-link> <ext-link ext-link-type="webcite" xlink:href="735U5lE3I"/> </comment> </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fayyad</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Piatetsky-Shapiro</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Smyth</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>From data mining to knowledge discovery: an overview</article-title>
        <source>Advances in knowledge discovery and data mining</source>  
        <year>1996</year>  
        <publisher-loc>CA, USA</publisher-loc>
        <publisher-name>American Association for Artificial Intelligence</publisher-name></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fayyad</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Piatetsky-Shapiro</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Smyth</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>The KDD process for extracting useful knowledge from volumes of data</article-title>
        <source>Communications of the ACM</source>  
        <year>1996</year>  
        <month>11</month>  
        <volume>39</volume>  
        <issue>11</issue>  
        <fpage>27</fpage>  
        <lpage>34</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.27.2315&#38;rep=rep1&#38;type=pdf"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1145/240455.240464</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Frawley</surname>
            <given-names>WJ</given-names>
          </name>
          <name name-style="western">
            <surname>Piatetsky-Shapiro</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Matheus</surname>
            <given-names>Cj</given-names>
          </name>
        </person-group>
        <article-title>Knowledge discovery in databases: an overview</article-title>
        <source>AI Mag</source>  
        <year>1992</year>  
        <month>09</month>  
        <volume>13</volume>  
        <issue>3</issue>  
        <fpage>57</fpage>  
        <lpage>70</lpage>  
        <pub-id pub-id-type="doi">10.1609/aimag.v13i3.1011</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zaki</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Meira Jr</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <source>Data Mining and Analysis: Fundamental Concepts and Algorithms</source>  
        <year>2014</year>  
        <month>05</month>  
        <publisher-loc>Cambridge</publisher-loc>
        <publisher-name>Cambridge University Press</publisher-name></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mitchell</surname>
            <given-names>TM</given-names>
          </name>
        </person-group>
        <source>Machine Learning</source>  
        <year>1997</year>  
        <publisher-loc>New York, NY, USA</publisher-loc>
        <publisher-name>McGraw-Hill, Inc</publisher-name></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <source>National Heart Lung and Blood Institute</source>  
        <access-date>2018-10-11</access-date>
        <comment>The Practical Guide Identification, Evaluation, and Treatment of Overweight and Obesity in Adults 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nhlbi.nih.gov/files/docs/guidelines/prctgd_c.pdf">https://www.nhlbi.nih.gov/files/docs/guidelines/prctgd_c.pdf</ext-link>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Reid</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Roos</surname>
            <given-names>NP</given-names>
          </name>
          <name name-style="western">
            <surname>MacWilliam</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Frohlich</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Black</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Assessing population health care need using a claims-based ACG morbidity measure: a validation analysis in the Province of Manitoba</article-title>
        <source>Health Serv Res</source>  
        <year>2002</year>  
        <month>10</month>  
        <volume>37</volume>  
        <issue>5</issue>  
        <fpage>1345</fpage>  
        <lpage>64</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/12479500"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1111/1475-6773.01029</pub-id>
        <pub-id pub-id-type="medline">12479500</pub-id>
        <pub-id pub-id-type="pmcid">PMC1464032</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
        <source>WHO Collaborative Centre for Drug Statistics Methodology</source>  
        <year>2017</year>  
        <access-date>2018-10-11</access-date>
        <comment>Guidelines for ATC classification and DDD assignment 2018 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.whocc.no/filearchive/publications/guidelines.pdf">https://www.whocc.no/filearchive/publications/guidelines.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="735UGNsOl"/></comment> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ben-Gal</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Maimon</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Rokach</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Outlier Detection</article-title>
        <source>Data mining and knowledge discovery handbook</source>  
        <year>2010</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>117</fpage>  
        <lpage>30</lpage> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Leys</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Ley</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Klein</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Bernard</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Licata</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Detecting outliers: do not use standard deviation around the mean, use absolute deviation around the median</article-title>
        <source>J Exp Soc Psychol</source>  
        <year>2013</year>  
        <month>7</month>  
        <volume>49</volume>  
        <issue>4</issue>  
        <fpage>764</fpage>  
        <lpage>66</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jesp.2013.03.013</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bansal</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Gaur</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Outlier Detection: Applications and techniques in Data Mining</article-title>
        <year>2016</year>  
        <conf-name>The 6th International Conference - Cloud System and Big Data Engineering (Confluence)</conf-name>
        <conf-date>January 14-15, 2016</conf-date>
        <conf-loc>Noida, India</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/7508146"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1109/CONFLUENCE.2016.7508146</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rahm</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Do</surname>
            <given-names>HH</given-names>
          </name>
        </person-group>
        <article-title>Data cleaning: problems and current approaches</article-title>
        <source>Bull Tech Comm Data Eng</source>  
        <year>2000</year>  
        <month>01</month>  
        <volume>23</volume>  
        <issue>4</issue>  
        <fpage>3</fpage>  
        <lpage>13</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/6046/770d1c3e08edfdd39bdb57fccaca84f5139c.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Kamber</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Pei</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>Data Mining: Concepts and Techniques</source>  
        <year>2011</year>  
        <publisher-loc>Canada</publisher-loc>
        <publisher-name>Elsevier Science</publisher-name></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Escalante</surname>
            <given-names>HJ</given-names>
          </name>
        </person-group>
        <source>Semanticscholar</source>  
        <year>2005</year>  
        <access-date>2018-10-11</access-date>
        <comment>A Comparison of Outlier Detection Algorithms for Machine Learning 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/cf06/9b7460ce1b5a0434a6a19f420544a780f35d.pdf">https://pdfs.semanticscholar.org/cf06/9b7460ce1b5a0434a6a19f420544a780f35d.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="735M1NrEO"/></comment> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chandola</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Banerjee</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>Anomaly detection: a survey</article-title>
        <source>ACM Comput Surv</source>  
        <year>2009</year>  
        <month>07</month>  
        <day>1</day>  
        <volume>41</volume>  
        <issue>3</issue>  
        <fpage>1</fpage>  
        <lpage>58</lpage>  
        <pub-id pub-id-type="doi">10.1145/1541880.1541882</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ahmad</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Pencina</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Schulte</surname>
            <given-names>PJ</given-names>
          </name>
          <name name-style="western">
            <surname>O'Brien</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Whellan</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Piña</surname>
            <given-names>IL</given-names>
          </name>
          <name name-style="western">
            <surname>Kitzman</surname>
            <given-names>DW</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>KL</given-names>
          </name>
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Felker</surname>
            <given-names>GM</given-names>
          </name>
        </person-group>
        <article-title>Clinical implications of chronic heart failure phenotypes defined by cluster analysis</article-title>
        <source>J Am Coll Cardiol</source>  
        <year>2014</year>  
        <month>10</month>  
        <day>28</day>  
        <volume>64</volume>  
        <issue>17</issue>  
        <fpage>1765</fpage>  
        <lpage>74</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0735-1097(14)05897-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jacc.2014.07.979</pub-id>
        <pub-id pub-id-type="medline">25443696</pub-id>
        <pub-id pub-id-type="pii">S0735-1097(14)05897-5</pub-id>
        <pub-id pub-id-type="pmcid">PMC4254424</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fodor</surname>
            <given-names>IK</given-names>
          </name>
        </person-group>
        <source>A survey of dimension reduction techniques</source>  
        <year>2002</year>  
        <month>05</month>  
        <day>9</day>  
        <publisher-loc>United States of America</publisher-loc>
        <publisher-name>Lawrence Livermore National Lab, CA (US)</publisher-name></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cunningham</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <source>University College Dublin</source>  
        <year>2007</year>  
        <comment>Dimension Reduction 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.98.1478&#38;rep=rep1&#38;type=pdf">http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.98.1478&#38;rep=rep1&#38;type=pdf</ext-link></comment> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <collab>World Health Organization</collab>
        </person-group>
        <source>Use of Glycated Haemoglobin (HbA1c) in the Diagnosis of Diabetes Mellitus: Abbreviated Report of a WHO Consultation</source>  
        <year>2011</year>  
        <publisher-loc>Geneva</publisher-loc>
        <publisher-name>World Health Organization</publisher-name></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rokach</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Maimon</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Rokach</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>A survey of clustering algorithms</article-title>
        <source>Data mining and knowledge discovery handbook, 2nd ed</source>  
        <year>2010</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>269</fpage>  
        <lpage>98</lpage> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>MacQueen</surname>
            <given-names>JB</given-names>
          </name>
        </person-group>
        <article-title>Some methods for classification and analysis of multivariate observations</article-title>
        <year>1967</year>  
        <conf-name>Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability</conf-name>
        <conf-date>1967</conf-date>
        <conf-loc>Berkeley, California</conf-loc>
        <fpage>281</fpage>  
        <lpage>97</lpage> </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ester</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kriegel</surname>
            <given-names>HP</given-names>
          </name>
          <name name-style="western">
            <surname>Sander</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <article-title>A density-based algorithm for discovering clusters in large spatial databases with noise</article-title>
        <source>Proceedings of the Second International Conference on Knowledge Discovery and Data Mining</source>  
        <year>1996</year>  
        <conf-name>Proceedings of the Second International Conference on Knowledge Discovery and Data Mining</conf-name>
        <conf-date>August 2-4, 1996</conf-date>
        <conf-loc>Portland, Oregon</conf-loc>
        <fpage>226</fpage>  
        <lpage>31</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/Library/KDD/kdd96contents.php"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ankerst</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Breunig</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kriegel</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Sander</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Optics: ordering points to identify the clustering structure</article-title>
        <source>SIGMOD '99 Proceedings of the ACM SIGMOD international conference on Management of data</source>  
        <year>1999</year>  
        <conf-name>ACM SIGMOD international conference on Management of data</conf-name>
        <conf-date>May 31-June 3, 1999</conf-date>
        <conf-loc>New York</conf-loc>
        <publisher-loc>New York, NY, USA</publisher-loc>
        <publisher-name>ACM</publisher-name>
        <fpage>49</fpage>  
        <lpage>60</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/citation.cfm?id=304187"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Madeira</surname>
            <given-names>SC</given-names>
          </name>
          <name name-style="western">
            <surname>Oliveira</surname>
            <given-names>AL</given-names>
          </name>
        </person-group>
        <article-title>Biclustering algorithms for biological data analysis: a survey</article-title>
        <source>IEEE/ACM Trans Comput Biol Bioinform</source>  
        <year>2004</year>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>24</fpage>  
        <lpage>45</lpage>  
        <pub-id pub-id-type="doi">10.1109/TCBB.2004.2</pub-id>
        <pub-id pub-id-type="medline">17048406</pub-id></nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Wee-Chung Liew</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>DZ</given-names>
          </name>
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Biclustering analysis for pattern discovery: current techniques, comparative studies and applications</article-title>
        <source>Curr Bioinform</source>  
        <year>2012</year>  
        <month>03</month>  
        <day>1</day>  
        <volume>7</volume>  
        <issue>1</issue>  
        <fpage>43</fpage>  
        <lpage>55</lpage>  
        <pub-id pub-id-type="doi">10.2174/157489312799304413</pub-id></nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Aggarwal</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Han</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>Frequent Pattern Mining</source>  
        <year>2014</year>  
        <publisher-loc>Switzerland</publisher-loc>
        <publisher-name>Springer International Publishing</publisher-name></nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ray</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Turi</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Determination of number of clusters in k-means clustering and application in colour image segmentation</article-title>
        <year>1999</year>  
        <conf-name>The 4th international conference on advances in pattern recognition and digital techniques</conf-name>
        <conf-date>1999</conf-date>
        <conf-loc>Calcutta, India</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.587.3517"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Card</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mackinlay</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Shneiderman</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <source>Readings in Information Visualization: Using Vision to Think</source>  
        <year>1999</year>  
        <publisher-loc>USA</publisher-loc>
        <publisher-name>Elsevier Science</publisher-name></nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>van der Aalst</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <source>Process Mining</source>  
        <year>2016</year>  
        <publisher-loc>Berlin Heidelberg</publisher-loc>
        <publisher-name>Springer</publisher-name></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Culley</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Hudson</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Rapport</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Using focus groups with minority ethnic communities: researching infertility in British South Asian communities</article-title>
        <source>Qual Health Res</source>  
        <year>2007</year>  
        <month>01</month>  
        <volume>17</volume>  
        <issue>1</issue>  
        <fpage>102</fpage>  
        <lpage>12</lpage>  
        <pub-id pub-id-type="doi">10.1177/1049732306296506</pub-id>
        <pub-id pub-id-type="medline">17170248</pub-id></nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kidd</surname>
            <given-names>PS</given-names>
          </name>
          <name name-style="western">
            <surname>Parshall</surname>
            <given-names>MB</given-names>
          </name>
        </person-group>
        <article-title>Getting the focus and the group: enhancing analytical rigor in focus group research</article-title>
        <source>Qual Health Res</source>  
        <year>2000</year>  
        <month>05</month>  
        <volume>10</volume>  
        <issue>3</issue>  
        <fpage>293</fpage>  
        <lpage>308</lpage>  
        <pub-id pub-id-type="doi">10.1177/104973200129118453</pub-id>
        <pub-id pub-id-type="medline">10947477</pub-id></nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>King</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Verba</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Keohane</surname>
            <given-names>RO</given-names>
          </name>
        </person-group>
        <source>Designing Social Inquiry: Scientific Inference in Qualitative Research</source>  
        <year>1994</year>  
        <publisher-loc>Princeton</publisher-loc>
        <publisher-name>Princeton University Press</publisher-name></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>DW</given-names>
          </name>
          <name name-style="western">
            <surname>Shamdasani</surname>
            <given-names>PN</given-names>
          </name>
        </person-group>
        <source>Focus Groups: Theory and Practice (Applied Social Research Methods), 3rd ed</source>  
        <year>2018</year>  
        <publisher-loc>USA</publisher-loc>
        <publisher-name>SAGE Publications</publisher-name></nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lewis</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Qualitative inquiry and research design: choosing among five approaches</article-title>
        <source>Health Promot Pract</source>  
        <year>2015</year>  
        <month>04</month>  
        <day>2</day>  
        <volume>16</volume>  
        <issue>4</issue>  
        <fpage>473</fpage>  
        <lpage>75</lpage>  
        <pub-id pub-id-type="doi">10.1177/1524839915580941</pub-id></nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sandelowski</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Whatever happened to qualitative description?</article-title>
        <source>Res Nurs Health</source>  
        <year>2000</year>  
        <month>08</month>  
        <volume>23</volume>  
        <issue>4</issue>  
        <fpage>334</fpage>  
        <lpage>40</lpage>  
        <pub-id pub-id-type="medline">10940958</pub-id>
        <pub-id pub-id-type="pii">10.1002/1098-240X(200008)23:4&#60;334::AID-NUR9&#62;3.0.CO;2-G</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
