<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i7e14455</article-id>
      <article-id pub-id-type="pmid">32729844</article-id>
      <article-id pub-id-type="doi">10.2196/14455</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Gender, Soft Skills, and Patient Experience in Online Physician Reviews: A Large-Scale Text Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Reblin</surname>
            <given-names>Maija</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Reynolds</surname>
            <given-names>Malaika</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Motoki</surname>
            <given-names>Yoko</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Dunivin</surname>
            <given-names>Zackary</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1860-7199</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zadunayski</surname>
            <given-names>Lindsay</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6989-356X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Baskota</surname>
            <given-names>Ujjwal</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1349-2975</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Siek</surname>
            <given-names>Katie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8632-2411</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Mankoff</surname>
            <given-names>Jennifer</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>University of Washington</institution>
            <addr-line>Bill &#38; Melinda Gates Center for Computer Science and Engineering</addr-line>
            <addr-line>3800 E Stevens Way NE</addr-line>
            <addr-line>Seattle, WA, 98112</addr-line>
            <country>United States</country>
            <phone>1 4125677720</phone>
            <email>jmankoff@acm.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9235-5324</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Indiana University</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Rensselaer Polytechnic Institute</institution>
        <addr-line>Troy, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Jackson State University</institution>
        <addr-line>Jackson, MS</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>University of Washington</institution>
        <addr-line>Seattle, WA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jennifer Mankoff <email>jmankoff@acm.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>7</issue>
      <elocation-id>e14455</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>4</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>1</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>4</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Zackary Dunivin, Lindsay Zadunayski, Ujjwal Baskota, Katie Siek, Jennifer Mankoff. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 30.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2020/7/e14455" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Online physician reviews are an important source of information for prospective patients. In addition, they represent an untapped resource for studying the effects of gender on the doctor-patient relationship. Understanding gender differences in online reviews is important because it may impact the value of those reviews to patients. Documenting gender differences in patient experience may also help to improve the doctor-patient relationship. This is the first large-scale study of physician reviews to extensively investigate gender bias in online reviews or offer recommendations for improvements to online review systems to correct for gender bias and aid patients in selecting a physician.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study examines 154,305 reviews from across the United States for all medical specialties. Our analysis includes a qualitative and quantitative examination of review content and physician rating with regard to doctor and reviewer gender.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A total of 154,305 reviews were sampled from Google Place reviews. Reviewer and doctor gender were inferred from names. Reviews were coded for overall patient experience (negative or positive) by collapsing a 5-star scale and coded for general categories (process, positive/negative soft skills), which were further subdivided into themes. Computational text processing methods were employed to apply this codebook to the entire data set, rendering it tractable to quantitative methods. Specifically, we estimated binary regression models to examine relationships between physician rating, patient experience themes, physician gender, and reviewer gender).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Female reviewers wrote 60% more reviews than men. Male reviewers were more likely to give negative reviews (odds ratio [OR] 1.15, 95% CI 1.10-1.19; <italic>P</italic>&#60;.001). Reviews of female physicians were considerably more negative than those of male physicians (OR 1.99, 95% CI 1.94-2.14; <italic>P</italic>&#60;.001). Soft skills were more likely to be mentioned in the reviews written by female reviewers and about female physicians. Negative reviews of female doctors were more likely to mention candor (OR 1.61, 95% CI 1.42-1.82; <italic>P</italic>&#60;.001) and amicability (OR 1.63, 95% CI 1.47-1.90; <italic>P</italic>&#60;.001). Disrespect was associated with both female physicians (OR 1.42, 95% CI 1.35-1.51; <italic>P</italic>&#60;.001) and female reviewers (OR 1.27, 95% CI 1.19-1.35; <italic>P</italic>&#60;.001). Female patients were less likely to report disrespect from female doctors than expected from the base ORs (OR 1.19, 95% CI 1.04-1.32; <italic>P</italic>=.008), but this effect overrode only the effect for female reviewers.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This work reinforces findings in the extensive literature on gender differences and gender bias in patient-physician interaction. Its novel contribution lies in highlighting gender differences in online reviews. These reviews inform patients’ choice of doctor and thus affect both patients and physicians. The evidence of gender bias documented here suggests review sites may be improved by providing information about gender differences, controlling for gender when presenting composite ratings for physicians, and helping users write less biased reviews.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>reviews</kwd>
        <kwd>physician-patient relationship</kwd>
        <kwd>gender</kwd>
        <kwd>soft-skills</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Physician review sites are relatively new and were initially greeted with concern by some in the medical community. In particular, some physicians were critical of the lack of transparency in composite statistics [<xref ref-type="bibr" rid="ref1">1</xref>] and were concerned that online reviews could harm their careers [<xref ref-type="bibr" rid="ref2">2</xref>]—perhaps unfairly [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Although ratings are generally high [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], negative ratings undoubtedly influence patient behavior [<xref ref-type="bibr" rid="ref7">7</xref>] and impact doctors [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Some doctors have attempted to <italic>gag</italic> patients by contractually prohibiting them from writing online reviews [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>Most studies of online physician reviews have focused on portals such as HealthGrades [<xref ref-type="bibr" rid="ref10">10</xref>], RateMDs [<xref ref-type="bibr" rid="ref11">11</xref>], Vitals [<xref ref-type="bibr" rid="ref12">12</xref>], and Yelp [<xref ref-type="bibr" rid="ref13">13</xref>]. Studies tend to have a small sample size, analyzing approximately 5400 reviews [<xref ref-type="bibr" rid="ref6">6</xref>]. Many studies aim to understand the factors that influence quantitative physician ratings. The qualitative analysis of 712 reviews by López et al [<xref ref-type="bibr" rid="ref5">5</xref>] established thematic categories that tended to appear in reviews. Paul et al [<xref ref-type="bibr" rid="ref14">14</xref>] replicated and expanded this work with a natural language processing (NLP) approach, which they applied to the text of 50,000 online reviews downloaded from RateMDs [<xref ref-type="bibr" rid="ref11">11</xref>]. Their novel joint topic–sentiment modeling approach found that certain textual accounts of interpersonal skills such as <italic>rude</italic>, <italic>arrogant</italic>, and <italic>condescending</italic> are strongly associated with negative reviews, and drew attention to the role of patient experience of bureaucratic process in reviews, noting that these experiences were often reflected in reviews. Wallace et al [<xref ref-type="bibr" rid="ref15">15</xref>] expanded on the work by López et al [<xref ref-type="bibr" rid="ref5">5</xref>] and Paul et al [<xref ref-type="bibr" rid="ref14">14</xref>] by analyzing 60,000 reviews to identify relationships between overall rating, health outcomes, and cost of care. To date, the only study to investigate the relationship between physician reviews and gender is that by Nwachukwu et al [<xref ref-type="bibr" rid="ref16">16</xref>] on surgeon quality in sports medicine. They found that communication style influenced the valence of ratings for top- and bottom-tier surgeons and that female surgeons typically had higher ratings [<xref ref-type="bibr" rid="ref16">16</xref>]. These and other studies of online reviews endeavor to understand how clinical experiences influence patient satisfaction and health outcomes. However, they tend to overlook or minimize questions about whether online review data reflect real experiences of medical care. Reviews may not be representative of the public or reflect demographic variation in health care utilization; indeed, doctor reviews are typically written by educated, younger, affluent, and healthier people [<xref ref-type="bibr" rid="ref5">5</xref>]. However, a study comparing ratings of over 3000 physicians with licensing data showed a clear relationship between doctor quality and ratings [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>Little research has studied the impact of gender or other demographic factors on the content and ratings of online physician reviews. Although qualitative studies of doctor-patient relationships have considered both negative and positive experiences [<xref ref-type="bibr" rid="ref5">5</xref>], including the impact of demographics [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>], the nature of these studies makes it difficult to estimate the size or scope of gender and other demographic variation in online physician reviews. Furthermore, gender differences, in particular biased interpretations of clinical experiences based on gender stereotypes, may impact online review content, which in turn may negatively impact both patients and physicians and perpetuate false gender stereotypes. The large-scale systematic study we present here documents gender differences in patient reviews with respect to both patients and doctors and proposes improvements for online review systems that could help reduce these disparities, thus improving information quality.</p>
      </sec>
      <sec>
        <title>Gender and Health Care</title>
        <p>Although we know little about gender in the context of online reviews, gender has been studied extensively in the social sciences for over half a century. Much of this work investigates the role of gender in medical care and health systems more generally.</p>
        <p>Gender is a cultural construct that affects people’s expectations and actions [<xref ref-type="bibr" rid="ref20">20</xref>]. In social contexts and practices [<xref ref-type="bibr" rid="ref21">21</xref>], gender is <italic>assessed</italic> independently of one’s identity [<xref ref-type="bibr" rid="ref22">22</xref>]. Thus, any name appearing in online text is likely to be interpreted in terms of the man/woman binary, which is reflected in the use of gender in the current doctor review literature.</p>
        <p>The expectations of one’s behavior differ depending on one’s assessed gender. Indeed, leadership traits praised in men are penalized in women, while traditional <italic>feminine</italic> behavior is seen as ineffective [<xref ref-type="bibr" rid="ref23">23</xref>]. When writing references, men are described with more standout and ability-based words and fewer <italic>grindstone</italic> words (eg, <italic>hardworking</italic>, <italic>conscientious</italic>) [<xref ref-type="bibr" rid="ref24">24</xref>], and women are described with more communal words [<xref ref-type="bibr" rid="ref25">25</xref>]. Even when all factors are controlled, people rated teachers differently on hard skills (eg, <italic>promptness</italic>, <italic>fairness</italic>) and soft skills depending on the gender portrayed by the instructor [<xref ref-type="bibr" rid="ref26">26</xref>]. Thus, bias may influence review content even when performance is identical.</p>
        <p>In health care, gender differences influence doctors’ communication with patients [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>], doctor and patient trust [<xref ref-type="bibr" rid="ref30">30</xref>], and even diagnosis error rates [<xref ref-type="bibr" rid="ref27">27</xref>]. Female <italic>doctors</italic> are seen as partners and more involved in the patient-doctor relationship, whereas female <italic>patients</italic> are treated with more condescension [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], have their concerns dismissed [<xref ref-type="bibr" rid="ref29">29</xref>] and credibility doubted [<xref ref-type="bibr" rid="ref27">27</xref>]. Conversely, patient satisfaction is dependent on more caring communication styles for women than for men [<xref ref-type="bibr" rid="ref32">32</xref>]. However, many studies documenting these trends are small in scale or have weak evidence [<xref ref-type="bibr" rid="ref33">33</xref>]. One exception is a study of over 10,000 people experiencing long-term illness in Sweden, where women reported being blamed, interrupted, disbelieved, doubted, and regarded as stupid [<xref ref-type="bibr" rid="ref19">19</xref>]. These gender differences are likely to impact review scores, as lower patient satisfaction is correlated with high physician dominance, which can manifest itself in gendered actions (eg, poor information sharing and use of medical jargon) [<xref ref-type="bibr" rid="ref34">34</xref>]. In this study, we investigate how these gender biases are represented in online reviews, which affect patients, physicians, and people using the reviews.</p>
      </sec>
      <sec>
        <title>Study Design and Motivation</title>
        <p>The goal of this study was to broaden and deepen our understanding of the impact of gender bias and other gender differences on online physician reviews. We leveraged reports of patient sentiments about their doctors through a large-scale analysis of online reviews. The Google Place review data analyzed here allowed us to identify patient and reviewer gender and characterize patient sentiment or experience in terms of both overall quality (a reviewer-entered Likert-type scale) and thematic content. Specifically, we formulated the following hypotheses (H):</p>
        <list list-type="bullet">
          <list-item>
            <p>H1a—Physician ratings and physician gender: female physicians are more likely to receive negative reviews than male physicians.</p>
          </list-item>
          <list-item>
            <p>H1b—Physician ratings and reviewer gender: Female reviewers are more likely to report negative experiences with doctors.</p>
          </list-item>
          <list-item>
            <p>H2a—Soft skills and physician gender: Female physicians are more likely to receive criticism mentioning soft skills than male physicians.</p>
          </list-item>
          <list-item>
            <p>H2b—Soft skills and reviewer gender: Female reviewers are more likely to mention soft (interpersonal) skills in negative reviews.</p>
          </list-item>
          <list-item>
            <p>H3—Reviewer gender and physician gender: Female reviewers are more likely to report negative experiences with male doctors.</p>
          </list-item>
        </list>
        <p>Hypotheses H1a and H1b relate to physician gender and reflect the findings of prior work on gender inequalities in reviewing in other fields [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. Hypotheses H2a, H2b, and H3 are based on prior work documenting gender differences in clinical encounters [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>].</p>
        <p>Our approach comprised both qualitative and quantitative perspectives on review content that mutually informed one another throughout the research process. We collected and analyzed a corpus of 154,305 reviews of doctors that constitutes a large nationally representative sample of physician reviews across all medical subfields and clinical contexts. Our focus was specifically on characterizing the differences in experience quality and content as they relate to reviewer and doctor gender.</p>
        <p>This study contributes to the larger body of work on the impact of gender on clinical interactions and provides insight into what patients value in their doctors. Furthermore, we add to the small but growing body of literature that seeks to develop a general understanding of online reviews and the systems that collect and display them. Our results must be interpreted with caution due to the unstructured and often short nature of the patient narratives in reviews, the relative crudeness of the NLP techniques employed (as compared with human interpretation), and selection biases introduced by nonrepresentative variation in demographic characteristics of reviews and the types of experiences that motivate patients to write reviews. Such selection biases are almost unknown although we make a novel contribution here.</p>
        <p>Online reviews provide an opportunity to learn, at scale, about patients’ perceptions of their doctors. Our findings have direct implications for the design of review sites and the presentation of search results. We argued that there are a number of useful ways in which gender differences can be reflected and potentially corrected in the presentation of information on the internet.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This study is a large-scale (<italic>N</italic>&#62;10<sup>5</sup>) text analysis of reviews of US physicians in the form of social media trace data. Social media trace data have the advantage of feature richness, (often) wide availability, and relation to genuine human social behavior outside of an experimental or survey context. The analytic approach we pursued in this study followed a similar process to the hybrid ethnographic and NLP approach advocated by Nelson [<xref ref-type="bibr" rid="ref35">35</xref>]. The methodological framework is a recursive process whereby qualitative text analysis (sometimes called <italic>deep reading</italic> or <italic>content analysis</italic>) informs computational feature extraction, which is then evaluated through further qualitative analysis. This refinement process continues until the patterns in the computationally derived features match the intuitions and examples accumulated through qualitative analysis fairly well. Ultimately, a quantitative analysis, in this case regression modeling, is applied to validate large-scale patterns in the data. The rest of this section details the specifics of this approach, the review sampling process, inference of reviewer and physician gender, and modeling of associations and interactions among the variables of interest.</p>
      <sec>
        <title>Physician Review Collection</title>
        <p>To examine how gender influences patient experience at scale, we sought a representative sample of reviews of US physicians. As gender and other demographic variables are rare in social media, we sought data that contained physician and reviewer names, which we used as a proxy for gender.</p>
      </sec>
      <sec>
        <title>Review Collection Application Programming Interface</title>
        <p>After exploring possible sources of physician reviews considering various application programming interface (API) features and use in prior work, we selected the Google <italic>Places</italic> API [<xref ref-type="bibr" rid="ref36">36</xref>]. The API provides access to patient and physician names, which we leveraged to infer gender, as well as a broad range of areas and specialties. The Google My Business API has a 5-review limit for any particular doctor. Unfortunately, the API documentation does not provide information on how these reviews are selected. We can be fairly confident, however, that reviewer gender is not a factor. Thus, it is unlikely that this introduces bias into the sample with regard to the variables of interest. Furthermore, we took measures to ensure that our personal search histories did not influence review collection.</p>
      </sec>
      <sec>
        <title>Geographical Sampling</title>
        <p>Reviews, physicians, and practices are likely to vary by location. Differences in locale, such as ruralness or urbanity, can influence health outcomes and care options, as can regional differences. For instance, in the United States, mortality rates of particular conditions have been shown to differ greatly from state to state [<xref ref-type="bibr" rid="ref37">37</xref>]. Samples were taken across states from multiple regions of the United States using the Google Places API to control for the effect of locale. We steadily increased latitude and longitude intervals throughout each state with a 10,000-m radius to capture both urban and rural regions.</p>
      </sec>
      <sec>
        <title>Data Summary</title>
        <p>Reviews returned by the Google My Business API were either for a <italic>place</italic> (such as a practice with multiple doctors) or a <italic>physician</italic>. The API provided additional <italic>review-specific</italic> data for each review: a 5-point Likert-type rating assigned by the reviewer to the doctor or practice, doctor name, reviewer name, the location of the practice, and the text content of the review. The reviewer and physician gender were determined automatically using third-party software described in the following section.</p>
        <p>The collection strategy described in this section yielded 154,305 reviews of physicians across the United States. These reviews spanned 2007 to 2017. Doctor ratings were highly polarized, exhibiting a U-shaped distribution (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Of the reviews collected, 46,605 were rated 1 star or 2 stars (<italic>negative</italic> reviews) and 107,700 were rated 4 stars or 5 stars (<italic>positive</italic> reviews). Another 3,208 reviews were rated 3 stars and were omitted from our analyses. We did not screen for particular specialties.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Left: Gender distribution in the complete data set (N=154,305). The unknown category represents clinics and names that were androgynous or unknown to the gender classifier. Middle: Distribution of physician ratings by physician gender (N=137,329). Right: Distribution of physician ratings by reviewer gender (N=129,985).</p>
          </caption>
          <graphic xlink:href="jmir_v22i7e14455_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The mean length of positive reviews was 50 words, while the mean length of negative reviews was 100 word (both follow heavy-tailed distributions). The distribution of inferred gender for both doctors and reviewers is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Note that we expected to see a fairly high rate of unknown gender in these data because some reviews are for medical practices that include multiple physicians. In addition, some physicians’ gender could not be identified. Given the presence of nonperson entities among <italic>physicians</italic>, it is perhaps surprising that reviewers have a greater rate of gender ambiguity. This likely reflects typos and pseudonyms among reviewers, who have a weaker incentive to use the correct name. However, the high rate of gender detection suggests that this should not be a great concern, as almost all reviewers use <italic>real</italic> names, not screen names. We were less certain about the rate of pseudonymous users but proceeded on the assumption that even when pseudonyms are used, they accurately reflect the reviewer’s gender.</p>
      </sec>
      <sec>
        <title>Qualitative Coding</title>
        <sec>
          <title>Sampling Strategy</title>
          <p>To guide our quantitative analysis and support the validation of our approach, we additionally selected a small sample of reviews for hand coding. A total of 200 reviews were selected for hand coding using stratified sampling for a distribution of 60% negative (3/5 from each state) and 40% positive (2/5 from each state) reviews because our initial read-throughs indicated that negative verbiage was less prevalent than positive verbiage in our sample.</p>
          <p>As this sample was relatively small in comparison with the number of total reviews collected, we used search terms intended to select for specialties that would help us focus on specific patient genders. We selected 50 reviews by mostly female reviewers using <italic>maternal</italic>, <italic>fetal</italic>, <italic>fertility</italic>, <italic>natal</italic> as search terms for clinic name and <italic>maternity</italic>, <italic>fetal</italic>, <italic>miscarriage</italic>, <italic>trimester</italic>, <italic>fertility</italic>, <italic>natal</italic>, <italic>birth</italic>, <italic>pregnancy</italic>, <italic>delivery</italic>, <italic>baby</italic>, midwife, <italic>Ob/Gyn</italic> in the review. We also selected 50 reviews by mostly male reviewers using <italic>cancer</italic>, <italic>prostate</italic> in the review itself, along with 100 reviews of both male and female urology reviewers. All of these reviews were manually assessed to ensure that they represented the assigned group. Although pregnancy and prostate cancer are not comparable medically, we chose them because they selected for patients by biological sex. They are the only common conditions that affect only one biological sex. This reduces uncertainty in our interpretation of the qualitative data.</p>
          <p>We analyzed these reviews to construct a codebook (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and develop an intuition for the patterns of thematic content and gendered interaction in physician reviews. Throughout this paper, we reflect on these patterns or illustrate particular situations by quoting largely from the reviews in this sample. The intuitions we developed through our qualitative and quantitative analyses have led us to conclude that situations reported in exemplar quotations generalize beyond the women’s health or urological contexts. Accordingly, our quantitative analysis and some follow-up quantitative investigations span the entire data set.</p>
          <p>When we quote reviews, they will be cited with the following descriptors: physician gender (Male; Female; Unknown), star rating (1-star, 2-star,..., 5-star), doctor type (O=OBGYN; U=UROLOGY). For example, (Male; 5-star; O5654) is an OBGYN review of a male physician with a 5-star rating. Quotes are exemplary of many similar statements found in the reviews, with some synthesis and paraphrasing to support anonymization.</p>
        </sec>
        <sec>
          <title>Codebook Construction</title>
          <p>We developed a codebook to regulate our analysis of the reviews. These codes were divided into 2 parts: regular codes and context codes. Context codes relate to demographic information, doctor gender, or specialty, while regular codes reflect the content of the review, for example, <italic>professional</italic> or <italic>rude</italic>.</p>
          <p>We began with codes identified by López et al [<xref ref-type="bibr" rid="ref5">5</xref>]. We also used iterative open coding to identify common categories of statements in the data. After coding each of the qualitative samples in Text Analysis Markup System [<xref ref-type="bibr" rid="ref38">38</xref>], we took each coded section and created an affinity diagram, grouping similar segments independent of the initial codebook to represent the content of the reviews most accurately. These were then grouped into overarching categories that could be used for the analysis of the full data set. The process resulted in 7 main thematic areas: <italic>process</italic>, <italic>candor</italic>, <italic>trust</italic>, <italic>investment</italic>, <italic>amicability</italic>, <italic>indifference</italic>, and <italic>disrespect</italic>. The themes represented 2 general categories: those pertaining to nursing and administrative <italic>process</italic> (process) and those pertaining to soft skills. The latter group was further subdivided into <italic>positive</italic> (candor, trust, investment, amicability) and <italic>negative</italic> (indifference, disrespect) soft skills. After developing the codebook, 3 authors coded 20 reviews to assess interrater reliability using Cohen kappa (<xref ref-type="table" rid="table1">Table 1</xref>) and then refined the categories to improve agreement.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Themes that emerged in affinity diagramming and examples of the associated terms in the dictionaries used in the quantitative analysis.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="150"/>
              <col width="0"/>
              <col width="300"/>
              <col width="0"/>
              <col width="100"/>
              <col width="0"/>
              <col width="80"/>
              <col width="0"/>
              <col width="120"/>
              <col width="0"/>
              <col width="120"/>
              <col width="0"/>
              <col width="100"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Theme</td>
                  <td colspan="2">Sample terms</td>
                  <td colspan="2">Kappa<sup>a</sup></td>
                  <td colspan="2">Count</td>
                  <td colspan="2">Accuracy<sup>b</sup></td>
                  <td colspan="2">Precision</td>
                  <td>Recall</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Positive soft skills</bold>
                  </td>
                  <td colspan="2">0.61</td>
                  <td colspan="2">113</td>
                  <td colspan="2">0.77</td>
                  <td colspan="2">0.74</td>
                  <td colspan="2">0.90</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td colspan="2">Honest, explain, answer, direct</td>
                  <td colspan="2">0.95</td>
                  <td colspan="2">41</td>
                  <td colspan="2">0.84</td>
                  <td colspan="2">0.57</td>
                  <td colspan="2">0.76</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td colspan="2">Support, safe, reassure, comfort</td>
                  <td colspan="2">0.98</td>
                  <td colspan="2">27</td>
                  <td colspan="2">0.42</td>
                  <td colspan="2">0.41</td>
                  <td colspan="2">0.63</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td colspan="2">Respect, care, compassion, listen</td>
                  <td colspan="2">0.93</td>
                  <td colspan="2">65</td>
                  <td colspan="2">0.42</td>
                  <td colspan="2">0.60</td>
                  <td colspan="2">0.74</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td colspan="2">Warm, friendly, personable, funny</td>
                  <td colspan="2">0.94</td>
                  <td colspan="2">31</td>
                  <td colspan="2">0.82</td>
                  <td colspan="2">0.46</td>
                  <td colspan="2">0.84</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Negative soft skills</bold>
                  </td>
                  <td colspan="2">0.53</td>
                  <td colspan="2">25</td>
                  <td colspan="2">0.87</td>
                  <td colspan="2">0.44</td>
                  <td colspan="2">0.16</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Indifference</td>
                  <td colspan="2">Cold, dismiss, ignored, abandoned</td>
                  <td colspan="2">0.74</td>
                  <td colspan="2">16</td>
                  <td colspan="2">0.87</td>
                  <td colspan="2">0.50</td>
                  <td colspan="2">0.16</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Disrespect</td>
                  <td colspan="2">Rude, harass, condescending, arrogant</td>
                  <td colspan="2">0.40</td>
                  <td colspan="2">25</td>
                  <td colspan="2">0.92</td>
                  <td colspan="2">0.50</td>
                  <td colspan="2">0.44</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">Process</td>
                  <td colspan="2">Cost, nurse, staff, wait</td>
                  <td colspan="2">0.83</td>
                  <td colspan="2">115</td>
                  <td colspan="2">0.87</td>
                  <td colspan="2">0.84</td>
                  <td>0.96</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>Kappa represents interrater agreement (on 20 reviews).</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>Accuracy, precision, and recall, respectively, on a random sample (<italic>N</italic>=100) of 200 total reviews. A review is labeled as pertaining to a theme if at least one of the words in the theme in dictionary is presented in the review. Note the infrequency of negative soft skills (16 and 25 for indifference and disrespect, respectively), contributing to low precision and recall.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Computational Feature Extraction</title>
        <sec>
          <title>Gender Detection</title>
          <p>A third-party Python library [<xref ref-type="bibr" rid="ref39">39</xref>] was used to infer the probable gender of physicians and reviewers based on their name. Although not everyone identifies within the female-male gender binary [<xref ref-type="bibr" rid="ref22">22</xref>], currently, gender is typically assessed and reacted to with respect to this binary [<xref ref-type="bibr" rid="ref21">21</xref>], so a binarized gender of reviewers and physicians was extracted using the names provided. Although a binary definition of gender does not capture the spectrum of gender and gender relations, capturing a more complex understanding of gender is infeasible given the scope of our data and the lack of identifying information for reviewers and doctors beyond name.</p>
          <p>To verify the accuracy of the gender inference procedure, we took a random sample of 200 reviews and compared automatically inferred gender with our human-coded gender determinations informed by close reading of the reviews informed by name and gender pronouns. Automated physician gender inference was 98% accurate. The accuracy of reviewer gender was not examined because the only available measure was the reviewer’s name.</p>
          <p>The distribution of genders is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref> (left). Gender could not be inferred for 12.8% of physicians and 17.5% of patients. A logistic regression model (not shown) estimated with high confidence that female physicians are 1.41 times more likely to be reviewed by female patients (<italic>P</italic>&#60;.001). A second model (also not shown) indicates that physicians are 4.36 times more likely to be reviewed by patients of the same gender (<italic>P</italic>&#60;.001). These figures do not necessarily represent the actual gender distribution of patients <italic>seen</italic> by doctors, as there may be selection bias for or against intragender reviews.</p>
        </sec>
        <sec>
          <title>Thematic Content of Reviews</title>
          <p>Informed by the qualitative analysis, dictionaries were developed relating to the 7 themes (<italic>process</italic>, <italic>candor</italic>, <italic>trust</italic>, <italic>investment</italic>, <italic>amicability</italic>, <italic>indifference</italic>, and <italic>disrespect</italic>) identified in the qualitative coding. Review text was stemmed using Porter stemmer and tagged with a binary label for each theme if the review mentioned a word in the theme’s dictionary. Working separately, we coded 200 reviews to assess the ability of the codebook to identify each of the themes. <xref ref-type="table" rid="table1">Table 1</xref> presents summaries of the themes, their kappa statistics, and the performance of the binary variables when applied as a single-feature classifier against a random test set of 100 hand-coded reviews that were not used to inform the codebook. <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref> display the frequencies with which the terms appear in the corpus and the proportion of reviews for each combination of gender/physician rating for physician gender and patient gender separately.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Prevalence of themes by physician gender.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="130"/>
              <col width="0"/>
              <col width="220"/>
              <col width="0"/>
              <col width="210"/>
              <col width="0"/>
              <col width="210"/>
              <col width="0"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="3"> Theme</td>
                  <td colspan="7">Doctors (female, n=36,847; male, n=74,189)<sup>a,b</sup></td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <break/>
                  </td>
                  <td colspan="2">Female (negative; n=13,874), n (%)<sup>c</sup></td>
                  <td colspan="2">Female (positive; n=22,973), n (%)<sup>c</sup></td>
                  <td colspan="2">Male (negative; n=17,906), n (%)<sup>c</sup></td>
                  <td>Male (positive; n=56,283), n (%)<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Positive soft skills</bold>
                  </td>
                  <td colspan="2">7403 (53.36)</td>
                  <td colspan="2">16,984 (73.93)</td>
                  <td colspan="2">7530 (42.05)</td>
                  <td>37,993, (67.50)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td colspan="2">2756 (19.86)</td>
                  <td colspan="2">3531 (15.37)</td>
                  <td colspan="2">2227 (12.44)</td>
                  <td colspan="2">8431 (14.98)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td colspan="2">754 (5.43)</td>
                  <td colspan="2">2725 (11.86)</td>
                  <td colspan="2">745 (4.16)</td>
                  <td colspan="2">5566 (9.89)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td colspan="2">4724 (34.05)</td>
                  <td colspan="2">12,266 (53.39)</td>
                  <td colspan="2">4830 (26.97)</td>
                  <td colspan="2">26,032 (46.25)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td colspan="2">1928 (13.90)</td>
                  <td colspan="2">7481 (32.56)</td>
                  <td colspan="2">1621 (9.05)</td>
                  <td colspan="2">16,047 (28.51)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Negative soft skills</bold>
                  </td>
                  <td colspan="2">4629 (33.36)</td>
                  <td colspan="2">488 (2.12)</td>
                  <td colspan="2">4505 (25.16)</td>
                  <td>721 (1.28)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Indifference</td>
                  <td colspan="2">868 (6.26)</td>
                  <td colspan="2">163 (0.71)</td>
                  <td colspan="2">836 (4.67)</td>
                  <td colspan="2">298 (0.53)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Disrespect</td>
                  <td colspan="2">4112 (29.64)</td>
                  <td colspan="2">343 (1.49)</td>
                  <td colspan="2">3942 (22.01)</td>
                  <td colspan="2">442 (0.79)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">Process</td>
                  <td colspan="2">9330 (67.25)</td>
                  <td colspan="2">10,099 (43.96)</td>
                  <td colspan="2">9981 (55.74)</td>
                  <td>23,837 (42.35)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>Many reviews contain multiple themes, so the overall rows (bold) have smaller numbers than the sum of themes would indicate. This table includes only those reviews for which a gender was assigned (n=111,036).</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>The physician rating is denoted as negative/positive.</p>
              </fn>
              <fn id="table2fn3">
                <p><sup>c</sup>Percentages represent the proportion of reviews containing the theme for that particular gender/rating combination.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Prevalence of themes by reviewer gender.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="130"/>
              <col width="0"/>
              <col width="220"/>
              <col width="0"/>
              <col width="210"/>
              <col width="0"/>
              <col width="210"/>
              <col width="0"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Theme</td>
                  <td colspan="7">Reviewers (female, n=67,857; male, n=43,179)<sup>a,b</sup></td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <break/>
                  </td>
                  <td colspan="2">Female (negative; n=18,780), n (%)<sup>c</sup></td>
                  <td colspan="2">Female (positive; n=49,077), n (%)<sup>c</sup></td>
                  <td colspan="2">Male (negative; n=13,000), n (%)<sup>c</sup></td>
                  <td>Male (positive; n=30,179), n (%)<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Positive soft skills</bold>
                  </td>
                  <td colspan="2">9102 (48.47)</td>
                  <td colspan="2">35,334 (72.00)</td>
                  <td colspan="2">5831 (44.85)</td>
                  <td>19,643 (65.09)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td colspan="2">3104 (16.52)</td>
                  <td colspan="2">7815 (15.92)</td>
                  <td colspan="2">1879 (14.45)</td>
                  <td colspan="2">4147 (13.74)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td colspan="2">906 (4.82)</td>
                  <td colspan="2">5636 (11.48)</td>
                  <td colspan="2">593 (4.56)</td>
                  <td colspan="2">2655 (8.80)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td colspan="2">5834 (31.06)</td>
                  <td colspan="2">24,994 (50.93)</td>
                  <td colspan="2">3720 (28.62)</td>
                  <td colspan="2">13,304 (44.08)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td colspan="2">2231 (11.88)</td>
                  <td colspan="2">15,630 (31.85)</td>
                  <td colspan="2">1318 (10.14)</td>
                  <td colspan="2">7898 (26.17)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Negative soft skills</bold>
                  </td>
                  <td colspan="2">5868 (31.25)</td>
                  <td colspan="2">790 (1.61)</td>
                  <td colspan="2">3266 (25.12)</td>
                  <td>419 (1.39)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Indifference</td>
                  <td colspan="2">1101 (5.86)</td>
                  <td colspan="2">305 (0.62)</td>
                  <td colspan="2">603 (4.64)</td>
                  <td colspan="2">156 (0.52)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Disrespect</td>
                  <td colspan="2">5178 (27.57)</td>
                  <td colspan="2">513 (1.05)</td>
                  <td colspan="2">2876 (22.12)</td>
                  <td colspan="2">272 (0.90)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">Process</td>
                  <td colspan="2">11,469 (61.07)</td>
                  <td colspan="2">21,728 (44.27)</td>
                  <td colspan="2">7842 (60.32)</td>
                  <td>12,208 (40.45)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>Many reviews contain multiple themes, so the overall rows (bold) have smaller numbers than the sum of themes would indicate. This table includes only those reviews for which a gender was assigned (N=111,036).</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>The physician rating is denoted as negative/positive.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>Percentages represent the proportion of reviews containing the theme for that particular gender/rating combination.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p>Dictionary-based text analysis is crude in that it cannot determine valence, that is, the dictionary approach cannot distinguish between the phrase “Dr. X listens,” for example, “Great bedside manner. She was kind and listened to everything I had to say” (Female; 5-star; 15180), and the many variants of its negation, for example, “I never felt like she truly listened” (Female; 1-star; 25068). Positive soft skills are more likely to be negated than negative ones, largely because double negatives are far less common than single negatives in English. This is borne out by the associations between each theme and <italic>negative reviews</italic> in the set of models summarized in <xref ref-type="table" rid="table4">Table 4</xref>. Determining valence is further complicated by constructs that contradict a negative, such as “If you want a doctor who knows what’s best after not listening to you for 5 minutes, don’t see him. This quality of care is almost impossible to find” (Male; 5-star; 40558). As reviewers have assigned a general valence to their experience, we leveraged physician ratings to distinguish positive from negative sentiment. However, this applies only in analyses where positive reviews are considered in isolation from negative reviews and vice versa. In the quantitative analyses below, we typically controlled for interactions between gender and physician rating. Therefore, when considering a soft skill in the context of reviews with ratings of the opposite valence, the soft skill should be interpreted as the negation of that interpersonal trait. For instance, when <italic>amicability</italic> appears in a negative review, the reader should interpret this as the <italic>absence</italic> of amicability, whereas <italic>disrespect</italic> in the same review should be regarded as the <italic>presence</italic> of disrespect.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Logistic regression on the presence of a theme in review (n=106,325).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="100"/>
              <col width="90"/>
              <col width="80"/>
              <col width="100"/>
              <col width="100"/>
              <col width="150"/>
              <col width="180"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Model<sup>a</sup></td>
                  <td>Intercept</td>
                  <td>Doctor<sub>F</sub><sup>b</sup></td>
                  <td>Reviewer<sub>F</sub><sup>b</sup></td>
                  <td>Rating<sub>Neg</sub></td>
                  <td>Doctor<sub>F</sub>×Rating<sub>Neg</sub></td>
                  <td>Reviewer<sub>F</sub>×Rating<sub>Neg</sub></td>
                  <td>Reviewer<sub>F</sub> ×Reviewer<sub>F</sub></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="9">
                    <bold>No interactions</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td>−1.92<sup>c</sup></td>
                  <td>0.15<sup>c</sup></td>
                  <td>0.16<sup>c</sup></td>
                  <td>−0.10<sup>c</sup></td>
                  <td>—<sup>d</sup></td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td>−2.40<sup>c</sup></td>
                  <td>0.19<sup>c</sup></td>
                  <td>0.27<sup>c</sup></td>
                  <td>−1.01<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td>−0.40<sup>c</sup></td>
                  <td>0.27<sup>c</sup></td>
                  <td>0.21<sup>c</sup></td>
                  <td>−0.88<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td>−1.57<sup>c</sup></td>
                  <td>0.25<sup>c</sup></td>
                  <td>0.23<sup>c</sup></td>
                  <td>−1.20<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Indifference</td>
                  <td>−5.26<sup>c</sup></td>
                  <td>0.29<sup>c</sup></td>
                  <td>0.18<sup>c</sup></td>
                  <td>2.19<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Disrespect</td>
                  <td>−5.30<sup>c</sup></td>
                  <td>0.35<sup>c</sup></td>
                  <td>0.24<sup>c</sup></td>
                  <td>3.39<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td colspan="9"><bold>Doctor</bold><sub>F</sub>×<bold>Rating</bold><sub>Neg</sub></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td>−1.88<sup>c</sup></td>
                  <td>0.01</td>
                  <td>0.16<sup>c</sup></td>
                  <td>−0.31<sup>c</sup></td>
                  <td>0.47<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td>−2.39<sup>c</sup></td>
                  <td>0.18<sup>c</sup></td>
                  <td>0.27<sup>c</sup></td>
                  <td>−1.03<sup>c</sup></td>
                  <td>0.04</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td>−0.40<sup>c</sup></td>
                  <td>0.26<sup>c</sup></td>
                  <td>0.21<sup>c</sup></td>
                  <td>−0.89<sup>c</sup></td>
                  <td>0.01</td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td>−1.56<sup>c</sup></td>
                  <td>0.20<sup>c</sup></td>
                  <td>0.23<sup>c</sup></td>
                  <td>−1.34<sup>c</sup></td>
                  <td>0.29<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td colspan="9"><bold>Reviewer</bold><sub>F</sub>×<bold>Rating</bold><sub>Neg</sub></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Candor</td>
                  <td>−1.92<sup>c</sup></td>
                  <td>0.15<sup>c</sup></td>
                  <td>0.16<sup>c</sup></td>
                  <td>−0.10<sup>c</sup></td>
                  <td>—</td>
                  <td>−0.00</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trust</td>
                  <td>−2.42<sup>c</sup></td>
                  <td>0.19<sup>c</sup></td>
                  <td>0.31<sup>c</sup></td>
                  <td>−0.86<sup>c</sup></td>
                  <td>—</td>
                  <td>−0.23<sup>c</sup></td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Investment</td>
                  <td>−0.42<sup>c</sup></td>
                  <td>0.27<sup>c</sup></td>
                  <td>0.25<sup>c</sup></td>
                  <td>−0.80<sup>c</sup></td>
                  <td>—</td>
                  <td>−0.14<sup>c</sup></td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Amicability</td>
                  <td>−1.58<sup>c</sup></td>
                  <td>0.25<sup>c</sup></td>
                  <td>0.25<sup>c</sup></td>
                  <td>−1.12<sup>c</sup></td>
                  <td>—</td>
                  <td>−0.13<sup>e</sup></td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td colspan="9"><bold>Doctor</bold><sub>F</sub>×<bold>Reviewer</bold><sub>F</sub></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Disrespect</td>
                  <td>−5.34<sup>c</sup></td>
                  <td>0.46<sup>c</sup></td>
                  <td>0.31<sup>c</sup></td>
                  <td>3.39<sup>c</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>−0.16<sup>e</sup></td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>Rows represent distinct logit models for each of the 7 themes. Each cell reports the log-likelihood that a variable is associated with the given theme. Sentences containing terms related to the process have been removed from the reviews.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>Female=1, male=0.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup><italic>P</italic>&#60;.001.</p>
              </fn>
              <fn id="table4fn4">
                <p><sup>d</sup>Missing value indicates that no coefficient was estimated for the given endogenous variable.</p>
              </fn>
              <fn id="table4fn5">
                <p><sup>e</sup><italic>P</italic>&#60;.01.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Quantitative Analysis</title>
        <p>A total of 3 sets of logistic regression models were fitted to these data. Reviews for which either reviewer or physician gender could not be identified were removed from the analysis in all of the models presented in the Results section. This reduced the data set by 28% from 154,305 reviews to 111,036.</p>
        <p>The set of models we present investigated the association between negative reviews and reviewer/doctor gender. The second set comprised models examining the likelihood that a review mentions a soft skill. As our primary variables of interest are binary, and we are interested in interactions among those binary variables, the interpretation of the logistic regression variables is complicated. All interaction terms disrupt the interpretation of their component variables, but this interpretation is even more difficult when estimating all pairwise interactions of a set of variables (in this case, 3). Effectively, this decomposes each main effect coefficient into different components, which must be carefully interpreted and summed to construct odds ratios (ORs) for various conditions. We present one set of regressions on each review theme for each interaction rather than estimating all 3 interactions in a single model for readability. Through these sets, we tested <italic>physician gender</italic>, <italic>reviewer gender</italic>, and <italic>physician rating</italic> for pairwise interactions. The intercept and noninteracting effects are only marginally altered between these models, if at all. An additional set of models estimates the main effects of each dependent variable. When reporting these results, models are grouped by these model classes rather than by the dependent variable, as the relation between the independent variables takes priority over the particulars of the review themes. Finally, a pair of models estimates how administrative process correlates with gender, physician ratings, and soft skills.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>This section discusses the statistical models fit to the review data. The report and discussion of these results is supplemented with excerpts of real reviews examined in the qualitative component of this study. We present these reviews to illustrate and contextualize the quantitative findings and the computational method of feature extraction.</p>
      <sec>
        <title>Physician Ratings and Gender</title>
        <p>Owing to the possibility that reviews could be influenced by clinical processes outside of physician control, we first fit a pair of models with and without mentions of bureaucratic <italic>process</italic>. The logit models summarized in <xref ref-type="table" rid="table5">Table 5</xref> indicate that physician ratings are extensively influenced by gender, irrespective of mentions of <italic>process</italic>. Four models examine the correlation between sets of independent variables and the probability of a <italic>negative review</italic>. The <italic>A</italic> models were fit on all reviews for which we were able to infer both doctor and reviewer gender (<italic>N</italic>=111,036). In the data set on which we regressed the <italic>B</italic> models, we stringently controlled for mentions of <italic>process</italic>, which refer not to the physician but instead to the clinical aspects beyond the patient-physician relationship. The data set for the <italic>B</italic> models is the result of filtering sentences that mention terms associated with <italic>process</italic> from the reviews and then removing any reviews that were left without text. This process yielded a slightly smaller data set (<italic>N</italic>=106,325). Models 1A and 1B present a base model that includes only physician and reviewer gender and an interaction term. Models 2A and 2B control for soft skills, with Model 2A controlling for <italic>process</italic>. The overlapping coefficient estimates do not differ substantially between the 4 models, suggesting that mentions of <italic>process</italic> do not substantially alter the correlations captured by the variables of interest in these data. However, we conservatively controlled for them in the remainder of this section, except in models that consider <italic>process</italic> explicitly.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Logistic regression on rating negative (A: n=111,036; B: n=106,325). Models 1 and 2 differ in the inclusion of review content themes. The B variants show the effects of filtering sentences mentioning process from each review.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="130"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Variable</td>
                <td>Model 1A</td>
                <td>Model 1B (no process)</td>
                <td>Model 2A</td>
                <td>Model 2B (no process)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Intercept</td>
                <td>−1.05</td>
                <td>−1.08</td>
                <td>−1.12</td>
                <td>−0.80</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Doctor<sub>F</sub><sup>a</sup></td>
                <td>0.67</td>
                <td>0.69</td>
                <td>0.63</td>
                <td>0.71</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Reviewer<sub>F</sub><sup>a</sup></td>
                <td>−0.16</td>
                <td>−0.14</td>
                <td>−0.14</td>
                <td>−0.11</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Doctor<sub>F</sub>×Reviewer<sub>F</sub></td>
                <td>−0.03</td>
                <td>−0.05</td>
                <td>−0.02</td>
                <td>−0.03</td>
                <td>NS<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Candor</td>
                <td>N/A<sup>c</sup></td>
                <td>N/A</td>
                <td>0.06</td>
                <td>0.05</td>
                <td>&#60;.05</td>
              </tr>
              <tr valign="top">
                <td>Trust</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>−0.95</td>
                <td>−0.92</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Investment</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>−0.89</td>
                <td>−0.90</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Amicability</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>−1.49</td>
                <td>−1.18</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Indifference</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>2.35</td>
                <td>2.28</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Disrespect</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>3.42</td>
                <td>3.45</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Process</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>0.84</td>
                <td>N/A</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Female=1, male=0.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>NS: not significant.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p/>
        <p>Although the dictionaries that contain terms related to review themes were developed in conjunction with our qualitative analysis and were thus thoroughly vetted, there remained concerns that these term lists do not adequately capture the themes they purportedly represent. As discussed in the Methods section, dictionary-based or <italic>bag of words</italic> (presence or absence of terms) approaches to natural language understanding often struggle to overcome or capture nuance in word use, notably suffering an inability to distinguish positive use from negation. We fit 2 models (2A and 2B) to verify that the soft skill dictionaries are correlated with negative reviews, as expected. The coefficients for the themes indicate that our dictionaries capture the basic tendency we anticipated: 3 of 4 positive soft skills, <italic>trust</italic>, <italic>investment</italic>, and <italic>amicability</italic>, are correlated with positive reviews (negative coefficients; <italic>P</italic>&#60;.001), and 2 negative soft skills, <italic>indifference</italic> and <italic>disrespect</italic>, are correlated with negative reviews (<italic>P</italic>&#60;.001). Notably, <italic>candor</italic> is not associated with either positive or negative reviews. This suggests the possibility that <italic>candor</italic> was mischaracterized by its dictionary. However, the intercoder agreement and classifier performance in <xref ref-type="table" rid="table1">Table 1</xref> imply that the dictionary for <italic>candor</italic> captures the theme equally well as the other positive soft skills. Rather, it seems that <italic>candor</italic> was misclassified as a <italic>positive</italic> soft skill and, as defined by its dictionary, is perhaps better understood as a <italic>neutral</italic> soft skill, appearing equally in positive and negative reviews. The models estimate large absolute effects for the three other positive soft skills. A negative soft skill, <italic>indifference</italic>, fits a considerably greater effect than any positive soft skill, and <italic>disrespect</italic> fits an even greater effect. This is consistent with the descriptive statistics in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>, which suggest that although positive soft skills are more strongly associated with positive reviews, they also appear often in negative reviews. Negative soft skills by contrast occur overwhelmingly in negative reviews. Generally, these findings further suggest that our dictionaries accurately represent the themes they attempt to capture.</p>
        <sec>
          <title>H1a: Female Physicians Are More Likely to Receive Negative Reviews Than Male Physicians</title>
          <disp-quote>
            <p>She was harsh and short. I always felt rushed and uncomfortable... it was like she was just making sure she did what was required. No sympathy at all.</p>
            <attrib>Female; 1-star review; O6198</attrib>
          </disp-quote>
          <p>The logit models on physician ratings (<xref ref-type="table" rid="table5">Table 5</xref>) indicate that female doctors are considerably more likely to receive negative reviews. Model 1B, which includes only physician and reviewer gender and an interaction term, estimates that female physicians’ reviews are 2.00 (95% CI 1.90-2.10) times as likely to be negative than the reviews of male physicians (log OR 0.69, 95% CI 0.65-0.74; <italic>P</italic>&#60;.001). Model 2B, which controls for mentions of soft skills, estimates a slightly larger coefficient.</p>
        </sec>
        <sec>
          <title>H1b: Female Reviewers Are More Likely to Report Negative Experiences With Doctors</title>
          <p>Contrary to our hypothesis, Model 1B estimates that men write negative reviews at 1.15 (95% CI 1.08-1.16) times the rate of women (<italic>P</italic>&#60;.001). Controlling for review content themes (Model 2B) fit a slightly smaller estimate. There was no interaction between physician gender and reviewer gender, indicating that female patients are no more likely to give a doctor of a particular gender a negative review than men are.</p>
        </sec>
        <sec>
          <title>Patient Experience, Physician Ratings, and Gender</title>
          <p>Several batteries of logistic regression models were fit to investigate how specific aspects of the patient experience (review themes) interact with gender and overall patient experience (physician rating). The coefficients estimated by these models are listed in <xref ref-type="table" rid="table4">Table 4</xref>. As described in the Methods section, we fit separate models for each of the interaction terms, as the interpretation of multiple interaction terms is complicated, and separating them into distinct models does not significantly alter the results. We fit models that interact for gender and physician ratings for positive soft skills only, as negative soft skills are almost exclusively found in negative reviews. Furthermore, we report the gender×gender interaction model for disrespect only, as no other model estimated a significant interaction.</p>
          <p>As comments about <italic>process</italic> may be wrongly ascribed to a doctor’s soft skills, the model for each theme controls for mentions of <italic>process</italic>. We found that <italic>process</italic> was significantly associated with all soft skills (<italic>P</italic>&#60;.001), including a strong correlation with <italic>amicability</italic> and <italic>disrespect</italic> (see <xref ref-type="table" rid="table6">Table 6</xref> and <italic>Process and Gender</italic> section for a more detailed treatment of this model). The model estimates that <italic>process</italic> is 2.73 (95% CI 2.65-2.86) times as likely to co-occur with <italic>amicability</italic> and 2.02 (95% CI 1.92-2.13) times as likely to co-occur with <italic>disrespect</italic>. This is unsurprising given our qualitative investigation, which found that the reviewers commonly commented on the friendliness or rudeness of the staff. For instance, when positive reviews mentioned both <italic>process</italic> and <italic>disrespect</italic>, it almost always contrasted a positive experience with a physician with a negative process experience. Reviewers seemed to be fairly capable of separating feelings about bureaucratic process from their experience with a physician, setting their dissatisfaction with, for example, staff, insurance, or booking aside when assigning a rating to a doctor who otherwise provided a good clinical experience.</p>
        </sec>
      </sec>
      <sec>
        <title>Physician Ratings and Soft Skills</title>
        <p>In our qualitative analysis, we observed that reviews mentioning positive soft skills were primarily associated with high scores for male and female doctors. Reviewers wrote positively about physicians who were candid and direct, “Ladies this doctor listens and responds with respect, she does not talk down to you either” (Female; 5-star; U940); who were trustworthy and supportive, “The delivery would’ve been terrifying without him” (Male; 4-star; O7390); invested, “She asks questions and listens. She makes me feel like I am important” (Female; 5-star; U933); and amicable, for example, “She is an amazing doctor. Kind, caring, empathetic, warm, knowledgeable, quick thinking, funny, and honest” (Female; 5-star; O679). As stated in the discussion of the logit models in <xref ref-type="table" rid="table5">Table 5</xref>, positive soft skills are more likely to appear in positive reviews (<italic>P</italic>&#60;.001). These models also estimate large effects for positive soft skills except <italic>candor</italic>, being at least 2.4 times as likely to appear in positive reviews than negative ones.</p>
        <p>We also coded for 2 negative soft skills, <italic>indifference</italic> and <italic>disrespect</italic>. <italic>Indifference</italic> was relatively rare, appearing in only 2% of the reviews. <italic>Disrespect</italic> was more common, occurring in 8% of all reviews. Unlike positive soft skills, which appeared in both positive and negative reviews, negative soft skills were almost exclusively found in negative reviews. When they did appear in positive reviews, it almost always referred to bureaucratic process, not the physician. Typical reviewer comments coded for negative soft skills relate experiences with doctors who lack courtesy, patience, and warmth toward their patients, for example, “I could not believe how condescending and snippy she was!” (Female; 1-star; O8376); “He was very rude, condescending, arrogant, and appeared angry” (Male; 1-star; U1122). Reviewers also described feeling ignored, “I felt passed around and ignored” (Female; 1-star; O10061), or that their concerns were dismissed, “...brushed it off” (Female; 1-star; O10100) and “I was in tears because he was too stubborn to listen” (Male; 1-star; U1047). Other complaints included ignoring patients’ understanding of their own medical condition and lack of inclusion in decision-making. As reviewers mentioned, “The doctor does not listen to you and forces his opinion down your throat without considering your view. Do not visit here” (Male; 1-star; U398) and “He did not want to listen to anything I had to say and he definitely didn’t want me getting a second opinion. He got defensive and standoffish at the mention of any other opinion which says SHADY all over it” (Male; 1-star; U198).</p>
        <p>The logit models for negative soft skills indicate that negative soft skills are far more likely to appear in negative reviews than positive ones. The log-likelihood coefficients are considerably stronger for negative than for positive soft skills. This is because positive soft skills can be negated to note the absence of a positive quality, whereas negative soft skills are rarely negated to indicate a positive quality.</p>
        <sec>
          <title>H2a: Female Physicians Are More Likely to Receive Criticism Mentioning Soft Skills Than Male Physicians</title>
          <disp-quote>
            <p>I feel 100 percent comfortable telling her anything because I know she holds no judgment and treats everybody with fairness and kindness.</p>
            <attrib>Female; 5-star review; O8982</attrib>
          </disp-quote>
          <p>The coefficients in the <italic>Doctor</italic><sub>F</sub> × <italic>Rating</italic><sub>Neg</sub> indicate that positive soft skills are more likely to occur in reviews of female physicians. In positive reviews, <italic>trust</italic>, <italic>investment</italic>, and <italic>amicability</italic> were more strongly associated with the reviews of female doctors than those of male doctors (<italic>P</italic>&#60;.001). Trust and investment show no significant interaction between physician gender and physician rating, indicating that all reviews of female physicians are more likely to mention <italic>trust</italic> and <italic>investment</italic> than those of men. The models estimate that <italic>trust</italic> occurs 1.20 (95% CI 1.15-1.26) times and <italic>investment</italic> 1.31 (95% CI 1.27-1.34) times as often in reviews of female physicians than in those of male physicians. <italic>Amicability</italic> is estimated to have a significant effect on positive reviews and an additional amplifying effect in negative reviews. Positive reviews of female physicians reported <italic>amicability</italic> more often than those of male physicians (log OR 0.20, 95% CI 0.17-0.24; <italic>P</italic>&#60;.001). To calculate the probability of mentions of <italic>amicability</italic> in female doctors’ negative reviews, we summed the <italic>base</italic> (that of positive reviews) log OR (0.20) with the log OR (0.29, 95% CI 0.19-0.38; <italic>P</italic>&#60;.001) from the interaction term. The model estimates that <italic>amicability</italic> is much more likely to be mentioned in negative reviews of female physicians than in reviews of male physicians (log OR 0.49, 95% CI 0.36-0.62). Similarly, <italic>candor</italic> is much more likely to appear in negative reviews of female physicians (log OR 0.48, 95% CI 0.35-0.60; <italic>P</italic>&#60;.001), although it is equally likely to appear in male and female physicians’ positive reviews.</p>
          <p>Negative soft skills are more easily interpreted than positive ones, as they are less likely to be negated and thus occur predominately with negative valence. As discussed earlier, this is supported by the stronger associations of negative soft skills with negative reviews than those of positive soft skills and positive reviews. Qualitative analysis of reviews indicated that <italic>disrespect</italic>, when it occurs in positive reviews, usually refers to <italic>process</italic>. However, occasionally, reviewers will contradict or justify negative soft skills when referring to positive experiences with physicians, for example, “Some may misinterpret her candor as rudeness, but I appreciate that about her - she always gets right to the point” (Male; 5-star; 951) and “He can seem somewhat arrogant, but I’ve been seeing him for a while now, and he really knows his stuff and takes his patients very seriously. He has a right to think highly of himself!” (Male; 5-star; 7181).</p>
          <p>Given their overwhelmingly negative valence, it is sufficient to model negative soft skills without interactions for gender and overall review quality (the <italic>No Interactions</italic> section of <xref ref-type="table" rid="table4">Table 4</xref>). However, it is important to control for physician ratings given the much higher rate of negative reviews in women’s reviews. Both <italic>indifference</italic> (log OR 0.29, 95% CI 0.20-0.37; <italic>P</italic>&#60;.001) and <italic>disrespect</italic> (log OR 0.35, 95% CI 0.30-0.41; <italic>P</italic>&#60;.001) are more likely to be mentioned in reviews of female physicians than in reviews of male physicians.</p>
        </sec>
        <sec>
          <title>H2a: Female Reviewers Are More Likely to Mention Soft Skills in Negative Reviews</title>
          <p>Our qualitative analysis did not reveal consistent patterns of association between mentions of soft skills and reviewer gender. As this investigation was limited to a small sample of reviews, we expected the quantitative results to yield patterns consistent with observations of gender differences in patients’ clinical experience reported in the literature. The logit models of soft skills offer 2 advantages in detecting gender bias in clinical settings. First, the high volume of observations may detect a pattern that was too rare to emerge from the qualitative analysis. Second, it may be that gender bias is not explicitly identified in most reviews but rather emerges when looking at the reviews in aggregate. The logistic models of soft skills demonstrate several associations between reviewer gender and soft skills, including interesting interaction effects.</p>
          <p>The <italic>Doctor</italic><sub>F</sub> × <italic>Rating</italic><sub>Neg</sub> models in <xref ref-type="table" rid="table4">Table 4</xref> find that all positive soft skills are more likely to occur in reviews written by women than those written by men (<italic>P</italic>&#60;.001). <italic>Candor</italic> is roughly 1.17 (95% CI 1.13-1.21) times more likely to appear in all reviews written by women. The other 3 positive soft skills demonstrate a higher rate among women in positive reviews and a compensatory effect in women’s negative reviews. However, this effect merely dampens the greater probability of occurring in reviews written by women, not equalizing it. In the models that did not fit coefficients for gender and rating interactions (<italic>No Interactions</italic>), both <italic>indifference</italic> (log OR 0.18, 95% CI 0.09-0.27; <italic>P</italic>&#60;.001) and <italic>disrespect</italic> (log OR 0.24 95% CI 0.18-0.30; <italic>P</italic>&#60;.001) were more likely to appear in reviews written by women.</p>
        </sec>
        <sec>
          <title>H3: Female Reviewers Are More Likely to Report Negative Experiences With Male Doctors</title>
          <disp-quote>
            <p>I have hunted for a female Urologist for several years. I was dealing with a male doctor who kept blowing off my concerns as a woman and telling me what women think they feel or know.</p>
            <attrib>Female; 5-star review; U940</attrib>
          </disp-quote>
          <p>When women mentioned soft skills, they occasionally related difficulties with their doctor to physician gender. However, female reviewers rarely attributed poor treatment to their womanhood or to male physicians treating women poorly. It was also rare that women commented on the absence of bias in settings where they might have expected it, for example, “While he treats women and men, I think his sensitivity makes him especially good with women” (Male; 5-star; U1081).</p>
          <p>To examine whether women or men report differential treatment depending on the gender of their doctor, each model tested an interaction effect between physician gender and patient gender. Only <italic>disrespect</italic> produced a highly significant gender×gender interaction. The model for <italic>disrespect</italic> estimates log OR of −0.16 (95% CI −0.28-−0.04; <italic>P</italic>=.008) for women who review female doctors. The reader may be inclined to interpret the negative coefficient for gender×gender interaction as evidence that women are less likely to report <italic>disrespect</italic> when seeing a female physician. This is true, but it must be qualified when we ask the question, <italic>less likely relative to what</italic>?</p>
          <p>The odds ratio for the gender×g interaction indicates that the discrepancy in reports of disrespect between women and men is not so great when seeing a female doctor as we would have expected given the difference between women and men when seeing a male doctor. Female reviewers would seem to benefit from seeing female doctors, as we cannot reject the null that men and women report <italic>disrespect</italic> from a female doctor at equal probability. The <italic>Doctor</italic><sub>F</sub> × <italic>Reviewer</italic><sub>F</sub> model of <italic>disrespect</italic> estimates women to be 1.37 (95% CI 1.26-1.49) times as likely (log OR 0.31, 95% CI 0.23-0.40; <italic>P</italic>&#60;.001) as men to report <italic>disrespect</italic> when seeing a male doctor (the <italic>Reviewer</italic><sub>F</sub> column). Given the interaction term, this probability represents the OR that a woman (compared with a man) reports <italic>disrespect</italic> from a <italic>male doctor</italic>. Summing this <italic>base</italic> probability with the interaction coefficient (log OR −0.16) estimates that female reviewers are 1.16 (95% CI −0.05-0.36) times (log OR 0.15) more likely to report <italic>disrespect</italic> when seen by a female doctor than a <italic>man</italic> seeing a female doctor. As the 95% confidence interval overlaps 0; we cannot reject the null hypothesis, that men and women report disrespect with the same probability when seeing women doctors. Similarly, when a man reviews a female physician, he is 1.58 (95% CI 1.43-1.74; log OR 0.46, 95% CI 0.36-0.55) times more likely to associate her with <italic>disrespect</italic> than he would a male doctor. The compensatory effect of the gender×gender interaction coefficient diminishes, but does not dissolve, the probability that a female doctor is reported to be disrespectful. When reviewed by female patients, female doctors are 1.35 (95% CI 1.08-1.67) times more likely to be associated with <italic>disrespect</italic> than a male doctor.</p>
        </sec>
      </sec>
      <sec>
        <title>Process and Gender</title>
        <p>Although we made no predictions about administrative <italic>process</italic>, it is worth noting several patterns that emerged from the logistic models on mentions of <italic>process</italic>. We fit 2 models of <italic>process</italic> reported in <xref ref-type="table" rid="table6">Table 6</xref> in the reviews for which gender could be inferred and which did not filter out mentions of <italic>process</italic> (<italic>N</italic>=111,036). Model 1 parallels the models of themes in <xref ref-type="table" rid="table4">Table 4</xref> and accordingly reports interaction effects as separate models. Model 2 estimates correlations between each theme and <italic>process</italic>, controlling for rating and gender.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Logistic regression on the presence of process in review (N=111,036).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="190"/>
            <col width="220"/>
            <col width="250"/>
            <col width="0"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Variable</td>
                <td colspan="3">Model 1</td>
                <td colspan="2">Model 2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Doctor<sub>F</sub><sup>a</sup>×Rating<sub>Neg</sub></td>
                <td>Reviewer<sub>F</sub><sup>a</sup>×Rating<sub>Neg</sub></td>
                <td colspan="2">Doctor<sub>F</sub>×Reviewer<sub>F</sub></td>
                <td>Themes</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Intercept</td>
                <td>−0.37<sup>b</sup></td>
                <td>−0.41<sup>b</sup></td>
                <td colspan="2">−0.43<sup>b</sup></td>
                <td>−0.84<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Doctor<sub>F</sub></td>
                <td>0.06<sup>b</sup></td>
                <td>0.20<sup>b</sup></td>
                <td colspan="2">0.19<sup>b</sup></td>
                <td>0.12<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Reviewer<sub>F</sub></td>
                <td>0.11<sup>b</sup></td>
                <td>0.11<sup>b</sup></td>
                <td colspan="2">0.14<sup>b</sup></td>
                <td>0.04<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>Rating<sub>Neg</sub></td>
                <td>0.54<sup>b</sup></td>
                <td>0.70<sup>b</sup></td>
                <td colspan="2">0.78<sup>b</sup></td>
                <td>0.83<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Doctor<sub>F</sub> × Rating<sub>Neg</sub></td>
                <td>0.42<sup>b</sup></td>
                <td>—<sup>d</sup></td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Reviewer<sub>F</sub> × Rating<sub>Neg</sub></td>
                <td>—</td>
                <td>−0.02</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Doctor<sub>F</sub> × Reviewer<sub>F</sub></td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">−0.13<sup>b</sup></td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Candor</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>0.26<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Trust</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>0.34<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Investment</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>0.22<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Amicability</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>1.01<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Indifference</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>0.00</td>
              </tr>
              <tr valign="top">
                <td>Disrespect</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">—</td>
                <td>0.70<sup>b</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Female=1, male=0.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup><italic>P</italic>&#60;.001.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup><italic>P</italic>&#60;.01.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>Missing value indicate that no coefficient was estimated for the given endogenous variable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><italic>Process</italic> is much more likely to be mentioned in negative reviews (<italic>P</italic>&#60;.001). This is consistent across all models. This may be because when process is smooth, it is more likely to go unnoticed, whereas poor experiences with process are more likely to color the overall experience. Parallel to the trend observed in soft skills, <italic>process</italic> more often occurs in reviews written by and about women (<italic>P</italic>&#60;.001). The <italic>Doctor</italic><sub>F</sub> × <italic>Rating</italic><sub>Neg</sub> model estimates that negative reviews of female physicians are 2.61 (95% CI 2.41-2.88) times as likely to mention process than negative reviews of male physicians (log OR 0.96, 95% CI 0.88-1.06; <italic>P</italic>&#60;.001). By contrast, female and male reviewers mention process in negative reviews at equal rates. Women who see female doctors are less likely to mention <italic>process</italic>, which produces an equalizing effect that offsets the greater rate of reports of <italic>process</italic> for both female reviewers and physicians.</p>
        <p>We also examined the association of <italic>process</italic> with soft skills. Model 2 demonstrates a positive correlation between mentions of <italic>process</italic> and soft skills. The correlations with <italic>amicability</italic> and <italic>disrespect</italic> are sizable (log OR 1.01, 95% CI 0.98-1.04 and 0.70, 95% CI 0.65-0.76, respectively; <italic>P</italic>&#60;.001), indicating that patients value the ease of interpersonal interactions with staff, that is, whether they are <italic>friendly</italic> or <italic>rude</italic>, and likely interpret bureaucratic competence through the framework of how <italic>nice</italic> the staff are. In our qualitative analysis, we found that positive reviews expressing <italic>disrespect</italic> overwhelmingly do so with regard to <italic>process</italic>, indicating that reviewers are able to separate relationships with their doctor and the overall clinical experience as reflected in the final review. This compartmentalization is well illustrated by a patient who reported a positive experience with a doctor but faced problems with poor administration such that the patient ultimately severed their relationship with the clinic:</p>
        <disp-quote>
          <p>Terrific bedside manner!! Really dedicates time to patients and will even follow up by phone. The staff are rude and incompetent though. They repeatedly failed to file paperwork with my insurance. I got fed up with it and had to find a new doctor.</p>
          <attrib>Unknown; 5-star review; 3686</attrib>
        </disp-quote>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Interpreting These Results</title>
        <p>Our results provide compelling evidence for a number of effects of gender on patient experience, as reported in physician reviews. These findings may be interpreted through 2 distinct frames. First, a <italic>patient experience</italic> frame attempts to interpret gender dynamics in the context of the patient-physician relationship. This frame should be familiar to readers versed in the literature on gender and clinical experience. The second frame, the <italic>online review system</italic> frame, seeks not to understand or improve the clinical aspects of health care but rather considers how gender differences may subvert or be leveraged to improve reviews as a valuable public resource that informs decisions about care-seeking.</p>
        <p>The following discussion of the results in context of the hypotheses of this study assumes the <italic>patient experience</italic> perspective. The Summary and Recommendations section, however, largely reflects the <italic>online review system</italic> perspective, which is less concerned with controlled statistical inference than it is how the descriptive statistical patterns in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref> might affect public perception of physician quality and how we might design online review systems to improve physician-patient matching and offset bias.</p>
      </sec>
      <sec>
        <title>H1a: Physician Ratings and Physician Gender</title>
        <p>We hypothesized that female physicians would be more likely to receive lower ratings. This hypothesis was supported by our study. These data indicate that there is considerable reviewer bias against female physicians. This is consistent with well-documented patterns of bias against women in other fields, notably when reviewing instructor performance in a controlled online classroom [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
      </sec>
      <sec>
        <title>H1b: Reviewer Gender and Rating</title>
        <p>We hypothesized that female patients would report overall worse clinical experiences than men. The results here support the opposite hypothesis. We found that men are slightly more likely to report negative experiences than women.</p>
        <p>There are several valid interpretations of these findings. Men may receive worse care than women, as captured by patient experiences (rather than health outcomes). Alternatively, it could be that men have higher expectations for care than women or are less competent at navigating the clinical setting. Both are plausible given that men less frequently utilize health services [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. Finally, we might attribute the discrepancy to women’s greater propensity for agreeableness [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], and for forgiveness [<xref ref-type="bibr" rid="ref44">44</xref>] and compromise [<xref ref-type="bibr" rid="ref45">45</xref>] in interpersonal conflict. These interpretations are not mutually exclusive, and further research is warranted to account for this trend.</p>
      </sec>
      <sec>
        <title>H2a: Soft Skills and Physician Gender</title>
        <p>We hypothesized that reviews of female physicians would be more likely to critique their soft skills. Our results indicate that this is true of all soft skills. All soft skills were more often mentioned in the reviews of female physicians. In negative reviews of female physicians, reviewers were considerably more likely to mention <italic>candor</italic> and <italic>amicability</italic>.</p>
        <p>The results supporting this hypothesis indicate that female physicians’ soft skills are more likely to be critiqued and that female physicians are much more likely to be associated with <italic>disrespect</italic>. We also present evidence that women may be penalized for lacking <italic>candor</italic> and <italic>amicability</italic> to a much greater degree than men. This may be attributed to failure to live up to a positive stereotype, as women are generally expected to be more open and personable, and female physicians in particular are expected to be more caring [<xref ref-type="bibr" rid="ref46">46</xref>]. Furthermore, the physician role is one of authority, and research has extensively documented that women are punished for leadership styles that men are rewarded for [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>].</p>
      </sec>
      <sec>
        <title>H2b: Soft Skills and Reviewer Gender</title>
        <p>We hypothesized that female reviewers are more likely to reflect on their doctor’s soft skills. This hypothesis was supported by our study.</p>
        <p>In positive reviews, women mentioned all positive soft skills with greater probability. However, the magnitude of these effects was diminished in negative reviews. These patterns suggest that women may be more inclined to value a physician’s soft skills. However, it also indicates that men may be more sensitive to a lack of positive soft skills than their presence.</p>
        <p>We found that women were more likely to mention the negative soft skills, <italic>indifference</italic> and <italic>disrespect</italic>. This likely reflects the wealth of literature documenting the tendency for physicians to take women’s concerns less seriously and treat them with condescension [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
      </sec>
      <sec>
        <title>H3: Reviewer Gender and Physician Gender</title>
        <p>We expected that female patients would report more negative experiences with male doctors. Our results support this hypothesis. Although there was no significant interaction effect of reviewer and physician gender on the probability of a negative experience, we found a significant interaction between reviewer and physician gender on the likelihood of reporting <italic>disrespect</italic>, which overwhelmingly occurs in negative reviews. Female reviewers apparently benefit from seeing female doctors, as they are less likely to mention <italic>disrespect</italic> when reviewing female physicians than when they review men. This compensating effect neutralized the overall greater association of <italic>disrespect</italic> with female reviewers, but not female doctors.</p>
        <p>Given the literature and our previous finding that female physicians are subject to biased reviews, these results suggest that even women harbor bias against female physicians. However, this bias is considerably smaller among women than it is among men and is complicated by physician-gender/patient-gender preferences for different communication styles [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
      </sec>
      <sec>
        <title>Process and Physician Gender</title>
        <p>We made no predictions regarding the relationship between <italic>process</italic> and soft skills or reviewer/physician gender. However, we found that both <italic>amicability</italic> and <italic>disrespect</italic> were highly correlated with <italic>process</italic>, suggesting that the ease of social interaction with staff is important to reviewers. Importantly, negative reviews of female physicians are considerably more likely to mention aspects of the clinical experience beyond experiences with the doctor.</p>
        <p>The strong association between <italic>process</italic> and negative reviews of female doctors may reflect a tendency for patients to assess male doctors “on their own merits,” whereas women are more likely to be held accountable for poor process. This reflects a <italic>bias against women</italic> interpretation. A <italic>realist</italic> account might hypothesize that female doctors are more likely to work in clinics with less competent or accommodating staff.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This work is complementary to previous qualitative studies on the influence of gender on the doctor-patient relationship. We acknowledge that it is unclear whether gender differences reflect patient perception or the reality of physician behavior. For example, given the ample evidence in other contexts on bias against women, the high rate of negative reviews for female physicians likely reflects reviewer bias against physicians rather than genuine differences in treatment. However, our approach does not allow certainty in this regard. The Discussion section provides a more detailed explanation of the interpretation of gender differences in the context of the findings of previous studies. We also recognize that the data do not contain information related to patients’ health outcomes. Although the health outcome of each patient is not represented in our data, other studies have shown that reviews can reflect real health consequences [<xref ref-type="bibr" rid="ref14">14</xref>]. Similarly, treatment noncompliance, unwarranted recalcitrance, and other patient characteristics beyond the reviewer’s narrative are not captured in the reviews.</p>
        <p>In this paper and similar research, gender representation is reported as binary, which does not capture the full spectrum of gender or gendered interaction. Even though gender is likely to be interpreted in a binary fashion by most review readers [<xref ref-type="bibr" rid="ref21">21</xref>], the doctor-patient relationship is more complex, and other data could offer more nuanced and richer perspectives. Furthermore, our work does not consider the intersection of gender and other identities, such as race [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p>
        <p>As noted in the Methods section, the Google Places API limits data collection to 5 reviews per physician or practice. Google provides no documentation on how these reviews are chosen from all the reviews written. We acknowledge that the small sample may not be fully representative of a doctor or practice; however, our contribution is more focused on the biases within the reviews and not on the doctors themselves. We also note that the Google Places reviews are subject to selection biases. Demographics undoubtedly play a role in determining who writes physician reviews (eg, consider the high proportion of female reviewers in our data set). However, the data likely suffer selection biases, irrespective of demographic differences. For instance, it is probable that the U-shaped distribution of physician ratings is both a product of overall polarized attitudes and strong experiences providing greater motivation to write a review. It seems likely that other such selection biases were present in these data but were unknown to us as we performed our analysis.</p>
      </sec>
      <sec>
        <title>Summary and Recommendations</title>
        <p>The increasing prevalence of online reviews of physicians affects both medical practices and patient choices. However, little is known about biases that may be present in these reviews or whether they reflect the real biases documented in doctor-patient interactions [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Conversely, most studies of gender bias in doctor-patient interactions to date have been limited to qualitative analyses, smaller-scale data sets, or specific medical conditions.</p>
        <p>This study is the first to provide evidence that gender biases and other gender differences are observable at scale in physician reviews. We provide extensive evidence of differences in physician ratings and review content with respect to both physician and reviewer gender. Our statistical inference indicates that these differences are robust when controlling for possible confounding relationships and therefore are likely to reflect gender differences and biases in the patient-physician relationship.</p>
        <p>It is difficult to disentangle which aspects of these gender differences may be attributed to review selection bias, gender bias, or gender behavioral variation. However, these patterns undeniably affect prospective patients as they peruse online reviews to select a doctor. Thus, it is important to consider how we might educate the public about the effects of gender bias on physician ratings and how online review systems could be improved to control for bias. We propose several concrete steps that could be taken to better support patients.</p>
        <p>Review systems can draw attention to gender differences in reviews to aid prospective patients in building their own understanding of a physician’s potential gender biases. One way to do this is to organize reviews by gender. This could either be the default presentation or a special gender-separated view. Alternatively, reviewer gender might be indicated explicitly only in automatically generated summaries of physician reviews. Additionally, prospective patients might benefit from a panel that provides a sense of how a particular physician compares with other physicians on gender. For example, if there is a discrepancy between men’s and women’s ratings for a physician that differs greatly from the gender discrepancy of other similar physicians, a prospective patient might benefit from reading their reviews with this information in mind.</p>
        <p>An online review system could also help to correct for gender differences that generally affect reviews. As female physicians receive many more negative reviews on average, a prospective patient might find it easier to select among physicians if ratings are adjusted to control for the physician’s gender or if ratings are reported relative to physicians of the same gender. On the other hand, online review systems could implement measures to reduce bias in the reviews. For instance, when writing a review and using a word that is commonly used to critique female physicians, the system could prompt the reviewer about gender stereotyping in word choice or alternative terms that are gender neutral. This might encourage a more balanced approach to review writing and help reviewers recognize their own biases. Alternatively, information can be solicited from the reviewer in such a way that greatly reduces the effects of gender stereotypes on performance evaluation [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
        <p>These approaches would ideally lead to reviews that more accurately reflect the quality of care provided by physicians. Finally, this study draws attention to several important areas for future work. We advocate researchers adopt mixed methods approaches similar to the one presented here when pursuing quantitative analyses of text. Furthermore, this study raises questions specifically related to online review systems as objects of study in their own right. Little is known about how readers interpret online reviews, notably in the context of health care and gender. It also highlights the need to study how review systems can be designed to improve review accuracy and inform review readers and writers on gender bias in online reviews. We propose that experimental studies in review cognition and system design will be most fruitful to these ends.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Codebook.</p>
        <media xlink:href="jmir_v22i7e14455_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 41 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">OR</term>
          <def>
            <p>odds ratio</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>JM and KS designed the study. LZ and UB collected the data. LZ performed the initial analysis. ZD performed the statistical analysis. All authors assisted with writing the paper.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenbaum</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scoring no goal-further adventures in transparency</article-title>
          <source>N Engl J Med</source>
          <year>2015</year>
          <month>10</month>
          <day>8</day>
          <volume>373</volume>
          <issue>15</issue>
          <fpage>1385</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1510094</pub-id>
          <pub-id pub-id-type="medline">26332360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kadry</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Kadry</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gammas</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Macario</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Analysis of 4999 online physician ratings indicates that most patients give physicians a favorable rating</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>11</month>
          <day>16</day>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>e95</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2011/4/e95/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1960</pub-id>
          <pub-id pub-id-type="medline">22088924</pub-id>
          <pub-id pub-id-type="pii">v13i4e95</pub-id>
          <pub-id pub-id-type="pmcid">PMC3222200</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jha</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Are Doctors Created Equal? An Investigation of Online Ratings by Patients</article-title>
          <source>Proceedings of the Work- shop on Information Systems and Economics</source>
          <year>2010</year>
          <conf-name>WISE'10</conf-name>
          <conf-date>June 10-11, 2010</conf-date>
          <conf-loc>St Louis, Missouri</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Are-doctors-created-equal-An-investigation-of-by-Gordon-McCullough/59225be0be8ea64b249f701b6f6bc4749a6e0f93"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ellimoottil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hart</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Greco</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Quek</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Farooq</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Online reviews of 500 urologists</article-title>
          <source>J Urol</source>
          <year>2013</year>
          <month>06</month>
          <volume>189</volume>
          <issue>6</issue>
          <fpage>2269</fpage>
          <lpage>73</lpage>
          <pub-id pub-id-type="doi">10.1016/j.juro.2012.12.013</pub-id>
          <pub-id pub-id-type="medline">23228385</pub-id>
          <pub-id pub-id-type="pii">S0022-5347(12)05837-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Detz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ratanawongsa</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>What patients say about their doctors online: a qualitative content analysis</article-title>
          <source>J Gen Intern Med</source>
          <year>2012</year>
          <month>06</month>
          <volume>27</volume>
          <issue>6</issue>
          <fpage>685</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22215270"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-011-1958-4</pub-id>
          <pub-id pub-id-type="medline">22215270</pub-id>
          <pub-id pub-id-type="pmcid">PMC3358396</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Radcliff</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Wigfall</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Street</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>What do patients say about doctors online? A systematic review of studies on patient online reviews</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>04</month>
          <day>8</day>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>e12521</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/4/e12521/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12521</pub-id>
          <pub-id pub-id-type="medline">30958276</pub-id>
          <pub-id pub-id-type="pii">v21i4e12521</pub-id>
          <pub-id pub-id-type="pmcid">PMC6475821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Emmert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Meier</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pisch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Physician choice making and characteristics associated with using physician-rating websites: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>08</month>
          <day>28</day>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>e187</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/8/e187/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2702</pub-id>
          <pub-id pub-id-type="medline">23985220</pub-id>
          <pub-id pub-id-type="pii">v15i8e187</pub-id>
          <pub-id pub-id-type="pmcid">PMC3758064</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Googling ourselves--what physicians can learn from online rating sites</article-title>
          <source>N Engl J Med</source>
          <year>2010</year>
          <month>01</month>
          <day>7</day>
          <volume>362</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp0903473</pub-id>
          <pub-id pub-id-type="medline">20054044</pub-id>
          <pub-id pub-id-type="pii">362/1/6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>'I hate my doctor': reputation, defamation, and physician-review websites</article-title>
          <source>Health Matrix Clevel</source>
          <year>2013</year>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>573</fpage>
          <lpage>606</lpage>
          <pub-id pub-id-type="medline">24341082</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <source>Healthgrades</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.healthgrades.com/">http://www.healthgrades.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <source>RateMDs</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ratemds.com/">http://www.ratemds.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <source>Vitals</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.vitals.com/">http://www.vitals.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <source>Yelp</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.yelp.com/">http://www.yelp.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>What Affects Patient (Dis)satisfaction? Analyzing Online Doctor Ratings with a Joint Topic-Sentiment Model</article-title>
          <source>AAAI Press Technical Reports</source>
          <year>2014</year>
          <conf-name>AAAI'13</conf-name>
          <conf-date>July 14-18, 2013</conf-date>
          <conf-loc>Bellevue, Washington</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/What-Affects-Patient-(Dis)satisfaction-Analyzing-a-Paul-Wallace/a23628e9d88cacafc35454ba77047f3de2e69f86"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Trikalinos</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A large-scale quantitative analysis of latent factors and sentiment in online doctor reviews</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>1098</fpage>
          <lpage>103</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24918109"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2014-002711</pub-id>
          <pub-id pub-id-type="medline">24918109</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2014-002711</pub-id>
          <pub-id pub-id-type="pmcid">PMC4215053</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nwachukwu</surname>
              <given-names>BU</given-names>
            </name>
            <name name-style="western">
              <surname>Adjei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Trehan</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Amoo-Achampong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>McCormick</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ranawat</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Rating a sports medicine surgeon's 'quality' in the modern era: an analysis of popular physician online rating websites</article-title>
          <source>HSS J</source>
          <year>2016</year>
          <month>10</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>272</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27703422"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11420-016-9520-x</pub-id>
          <pub-id pub-id-type="medline">27703422</pub-id>
          <pub-id pub-id-type="pii">9520</pub-id>
          <pub-id pub-id-type="pmcid">PMC5026665</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Roter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Powe</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Patient race/ethnicity and quality of patient-physician communication during medical visits</article-title>
          <source>Am J Public Health</source>
          <year>2004</year>
          <month>12</month>
          <volume>94</volume>
          <issue>12</issue>
          <fpage>2084</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.94.12.2084</pub-id>
          <pub-id pub-id-type="medline">15569958</pub-id>
          <pub-id pub-id-type="pii">94/12/2084</pub-id>
          <pub-id pub-id-type="pmcid">PMC1448596</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mike</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Doctor Sues Patients Over Bad Yelp Reviews</article-title>
          <source>Techdirt</source>
          <year>2010</year>
          <access-date>2020-06-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.techdirt.com/articles/20101110/19053611809/doctor-sues-patients-over-bad-yelp-reviews.shtml">https://www.techdirt.com/articles/20101110/19053611809/doctor-sues-patients-over-bad-yelp-reviews.shtml</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Upmark</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Borg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Alexanderson</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in experiencing negative encounters with healthcare: a study of long-term sickness absentees</article-title>
          <source>Scand J Public Health</source>
          <year>2007</year>
          <volume>35</volume>
          <issue>6</issue>
          <fpage>577</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1080/14034940701362194</pub-id>
          <pub-id pub-id-type="medline">17852974</pub-id>
          <pub-id pub-id-type="pii">779795711</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohoon</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Aspray</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Women and Information Technology: Research on Underrepresentation</source>
          <year>2008</year>
          <publisher-loc>Cambridge, Massachusetts</publisher-loc>
          <publisher-name>The MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>West</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>DH</given-names>
            </name>
          </person-group>
          <article-title>Accounting for doing gender</article-title>
          <source>Gend Soc</source>
          <year>2009</year>
          <month>02</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>112</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1177/0891243208326529</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bouman</surname>
              <given-names>WP</given-names>
            </name>
            <name name-style="western">
              <surname>Seal</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nieder</surname>
              <given-names>TO</given-names>
            </name>
            <name name-style="western">
              <surname>T'Sjoen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Non-binary or genderqueer genders</article-title>
          <source>Int Rev Psychiatry</source>
          <year>2016</year>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>95</fpage>
          <lpage>102</lpage>
          <pub-id pub-id-type="doi">10.3109/09540261.2015.1106446</pub-id>
          <pub-id pub-id-type="medline">26753630</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rojo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Esteban</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Lazar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The gender of power: the female style in labour organizations</article-title>
          <source>Feminist Critical Discourse Analysis: Gender, Power and Ideology in Discourse</source>
          <year>2005</year>
          <publisher-loc>Houndmills, United Kingdom</publisher-loc>
          <publisher-name>Palgrave Macmillan</publisher-name>
          <fpage>61</fpage>
          <lpage>89</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmader</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Whitehead</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wysocki</surname>
              <given-names>VH</given-names>
            </name>
          </person-group>
          <article-title>A linguistic comparison of letters of recommendation for male and female chemistry and biochemistry job applicants</article-title>
          <source>Sex Roles</source>
          <year>2007</year>
          <volume>57</volume>
          <issue>7-8</issue>
          <fpage>509</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18953419"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11199-007-9291-4</pub-id>
          <pub-id pub-id-type="medline">18953419</pub-id>
          <pub-id pub-id-type="pmcid">PMC2572075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Madera</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Hebl</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Gender and letters of recommendation for academia: agentic and communal differences</article-title>
          <source>J Appl Psychol</source>
          <year>2009</year>
          <month>11</month>
          <volume>94</volume>
          <issue>6</issue>
          <fpage>1591</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1037/a0016539</pub-id>
          <pub-id pub-id-type="medline">19916666</pub-id>
          <pub-id pub-id-type="pii">2009-21033-018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacNell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Driscoll</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hunt</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>What’s in a name: exposing gender bias in student ratings of teaching</article-title>
          <source>Innov High Educ</source>
          <year>2014</year>
          <month>12</month>
          <day>5</day>
          <volume>40</volume>
          <issue>4</issue>
          <fpage>291</fpage>
          <lpage>303</lpage>
          <pub-id pub-id-type="doi">10.1007/s10755-014-9313-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elderkin-Thompson</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Waitzkin</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Differences in clinical communication by gender</article-title>
          <source>J Gen Intern Med</source>
          <year>1999</year>
          <month>02</month>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>112</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/resolve/openurl?genre=article&#38;sid=nlm:pubmed&#38;issn=0884-8734&#38;date=1999&#38;volume=14&#38;issue=2&#38;spage=112"/>
          </comment>
          <pub-id pub-id-type="doi">10.1046/j.1525-1497.1999.00296.x</pub-id>
          <pub-id pub-id-type="medline">10051782</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmittdiel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grumbach</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Selby</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Quesenberry</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>Effect of physician and patient gender concordance on patient satisfaction and preventive care practices</article-title>
          <source>J Gen Intern Med</source>
          <year>2000</year>
          <month>11</month>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>761</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/resolve/openurl?genre=article&#38;sid=nlm:pubmed&#38;issn=0884-8734&#38;date=2000&#38;volume=15&#38;issue=11&#38;spage=761"/>
          </comment>
          <pub-id pub-id-type="doi">10.1046/j.1525-1497.2000.91156.x</pub-id>
          <pub-id pub-id-type="medline">11119167</pub-id>
          <pub-id pub-id-type="pii">jgi91156</pub-id>
          <pub-id pub-id-type="pmcid">PMC1495609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Street Jr</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in health care provider-patient communication: are they due to style, stereotypes, or accommodation?</article-title>
          <source>Patient Educ Couns</source>
          <year>2002</year>
          <month>12</month>
          <volume>48</volume>
          <issue>3</issue>
          <fpage>201</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/s0738-3991(02)00171-4</pub-id>
          <pub-id pub-id-type="medline">12477604</pub-id>
          <pub-id pub-id-type="pii">S0738399102001714</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Halbert</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gandy</surname>
              <given-names>OH</given-names>
            </name>
            <name name-style="western">
              <surname>Shaker</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Racial differences in trust in health care providers</article-title>
          <source>Arch Intern Med</source>
          <year>2006</year>
          <month>04</month>
          <day>24</day>
          <volume>166</volume>
          <issue>8</issue>
          <fpage>896</fpage>
          <lpage>901</lpage>
          <pub-id pub-id-type="doi">10.1001/archinte.166.8.896</pub-id>
          <pub-id pub-id-type="medline">16636216</pub-id>
          <pub-id pub-id-type="pii">166/8/896</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>National Survey of Women's Health</article-title>
          <source>The Commonwealth Fund</source>
          <year>1993</year>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.commonwealthfund.org/publications/publication/1993/jul/national-survey-womens-health">https://www.commonwealthfund.org/publications/publication/1993/jul/national-survey-womens-health</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mast</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Roter</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Disentangling physician sex and physician communication style: their effects on patient satisfaction in a virtual medical visit</article-title>
          <source>Patient Educ Couns</source>
          <year>2007</year>
          <month>09</month>
          <volume>68</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2007.03.020</pub-id>
          <pub-id pub-id-type="medline">17482418</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(07)00114-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sandhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Clark-Carter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The impact of gender dyads on doctor-patient communication: a systematic review</article-title>
          <source>Patient Educ Couns</source>
          <year>2009</year>
          <month>09</month>
          <volume>76</volume>
          <issue>3</issue>
          <fpage>348</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2009.07.010</pub-id>
          <pub-id pub-id-type="medline">19647969</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(09)00264-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mast</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Dominance and gender in the physician-patient interaction</article-title>
          <source>J Mens Health Gend</source>
          <year>2004</year>
          <month>12</month>
          <volume>1</volume>
          <issue>4</issue>
          <fpage>354</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jmhg.2004.10.013</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(07)00114-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>LK</given-names>
            </name>
          </person-group>
          <article-title>Computational grounded theory: a methodological framework</article-title>
          <source>Sociol Methods Res</source>
          <year>2017</year>
          <month>11</month>
          <day>21</day>
          <volume>49</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1177/0049124117729703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>Overview: Introducing the API</article-title>
          <source>Google Developers</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developers.google.com/places/web-service/intro">https://developers.google.com/places/web-service/intro</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Prostate Cancer Rates by State</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2014</year>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/cancer/prostate/statistics/state.htm">https://www.cdc.gov/cancer/prostate/statistics/state.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weinstein</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>TAMS Analyzer for Macintosh OS X</article-title>
          <source>TAMS Analyzer - SourceForge</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://tamsys.sourceforge.net/">http://tamsys.sourceforge.net/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elmas</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>SexMachine 0.1.1</article-title>
          <source>The Python Package Index</source>
          <access-date>2020-05-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pypi.org/project/SexMachine/">https://pypi.org/project/SexMachine/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ladwig</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Marten-Mittag</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Formanek</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dammann</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Gender differences of symptom reporting and medical health care utilization in the German population</article-title>
          <source>Eur J Epidemiol</source>
          <year>2000</year>
          <month>06</month>
          <volume>16</volume>
          <issue>6</issue>
          <fpage>511</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1023/a:1007629920752</pub-id>
          <pub-id pub-id-type="medline">11049093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bertakis</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Azari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Helms</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in the utilization of health care services</article-title>
          <source>J Fam Pract</source>
          <year>2000</year>
          <month>02</month>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>147</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="medline">10718692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feingold</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in personality: a meta-analysis</article-title>
          <source>Psychol Bull</source>
          <year>1994</year>
          <month>11</month>
          <volume>116</volume>
          <issue>3</issue>
          <fpage>429</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1037/0033-2909.116.3.429</pub-id>
          <pub-id pub-id-type="medline">7809307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Costa Jr</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Terracciano</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McCrae</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in personality traits across cultures: robust and surprising findings</article-title>
          <source>J Pers Soc Psychol</source>
          <year>2001</year>
          <month>08</month>
          <volume>81</volume>
          <issue>2</issue>
          <fpage>322</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1037/0022-3514.81.2.322</pub-id>
          <pub-id pub-id-type="medline">11519935</pub-id>
          <pub-id pub-id-type="pii">2001-01642-012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Worthington</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>McDaniel</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Gender and forgiveness: a meta–analytic review and research agenda</article-title>
          <source>J Soc Clin Psychol</source>
          <year>2008</year>
          <month>10</month>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>843</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1521/jscp.2008.27.8.843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gayle</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Preiss</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A meta-analytic interpretation of intimate and nonintimate interpersonal conflict</article-title>
          <source>Interpersonal Communication Research: Advances Through Meta-analysis</source>
          <year>2002</year>
          <publisher-loc>Abingdon, United Kingdom</publisher-loc>
          <publisher-name>Routledge</publisher-name>
          <fpage>345</fpage>
          <lpage>68</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fennema</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Owen</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Sex of physician: patients' preferences and stereotypes</article-title>
          <source>J Fam Pract</source>
          <year>1990</year>
          <month>04</month>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>441</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="medline">2324696</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eagly</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Mladinic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Otto</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Are women evaluated more favorably than men?: an analysis of attitudes, beliefs, and emotions</article-title>
          <source>Psychol Women Q</source>
          <year>2016</year>
          <month>11</month>
          <day>25</day>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>203</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1471-6402.1991.tb00792.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eagly</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Karau</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Role congruity theory of prejudice toward female leaders</article-title>
          <source>Psychol Rev</source>
          <year>2002</year>
          <month>07</month>
          <volume>109</volume>
          <issue>3</issue>
          <fpage>573</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1037/0033-295x.109.3.573</pub-id>
          <pub-id pub-id-type="medline">12088246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crenshaw</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Demarginalizing the intersection of race and sex: a black feminist critique of antidiscrimination doctrine, feminist theory and antiracist politics</article-title>
          <source>Univ Chic Leg Forum</source>
          <year>1989</year>
          <volume>1989</volume>
          <issue>1</issue>
          <fpage>139</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.4324/9780429500480-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <source>Justice and the Politics of Difference</source>
          <year>2011</year>
          <publisher-loc>Princeton, New Jersey</publisher-loc>
          <publisher-name>Princeton University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Baltes</surname>
              <given-names>BB</given-names>
            </name>
          </person-group>
          <article-title>Reducing the effects of gender stereotypes on performance evaluations</article-title>
          <source>Sex Roles</source>
          <year>2002</year>
          <volume>47</volume>
          <issue>9-10</issue>
          <fpage>465</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1021652527696</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
